diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 18141129d9f..e30d118ef4e 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,4 +1,4 @@ contact_links: - - name: "Join MindsDB Community" - url: https://mindsdb.com/joincommunity - about: Join our community on Slack for other questions and general chat \ No newline at end of file + - name: "Join the MindsDB Discord" + url: https://mindshub.ai/discord + about: Join our Discord for questions and general chat diff --git a/.github/ISSUE_TEMPLATE/integrations_contest.yaml b/.github/ISSUE_TEMPLATE/integrations_contest.yaml deleted file mode 100644 index 8318873d296..00000000000 --- a/.github/ISSUE_TEMPLATE/integrations_contest.yaml +++ /dev/null @@ -1,47 +0,0 @@ -name: πŸ§‘β€πŸ”§ Propose a new integration -description: Share an idea for a new datasource or machine learning integration -title: "[Integration]: " -labels: [roadmap, integration] -assignees: -- -body: -- type: markdown - attributes: - value: | - Thanks for taking the time to share the new integration! Please fill out the form in English! -- type: checkboxes - attributes: - label: Is there an existing integration? - description: Please search to see if MindsDB already supports this integration.A list with supported integrations can be found [here](https://github.com/mindsdb/mindsdb#database-integrations). - options: - - label: I have searched the existing integrations. - required: true -- type: textarea - attributes: - label: Use Case - description: Which use-cases does this solve? - placeholder: | - Why this integration will be usefull to users? What is the value of having this integration? - validations: - required: true -- type: textarea - attributes: - label: Motivation - description: How will we know that this has succeeded? - placeholder: | - Explain the proposed integration as though it was already implemented and you were explaining it to a user. - validations: - required: true -- type: textarea - attributes: - label: Implementation - description: Describe how this integration will work, with code, pseudo-code, mock-ups, text, or add diagrams - validations: - required: false -- type: textarea - attributes: - label: Anything else? - description: | - Links? References? Anything that will give more context about this integration! - validations: - required: false diff --git a/.github/workflows/build_deploy_dev.yml b/.github/workflows/build_deploy_dev.yml index 78701c7b6da..cc9d25f7edd 100644 --- a/.github/workflows/build_deploy_dev.yml +++ b/.github/workflows/build_deploy_dev.yml @@ -77,6 +77,18 @@ jobs: platforms: linux/amd64 push-cache: false + scan-keycloak: + runs-on: mdb-dev + needs: [ build ] + name: Scan cloud-cpu image + steps: + - uses: actions/checkout@v4 + - uses: mindsdb/github-actions/snyk-docker-scan@main + with: + image: 168681354662.dkr.ecr.us-east-1.amazonaws.com/mindsdb:${{ github.event.pull_request.head.sha }}-cloud-cpu + snyk-token: ${{ secrets.SNYK_TOKEN }} + dockerfile: docker/mindsdb.Dockerfile + # Push cache layers to docker registry # This is separate to the build step so we can do other stuff in parallel build-cache: diff --git a/.github/workflows/build_deploy_staging.yml b/.github/workflows/build_deploy_staging.yml index d0f3b0c27b9..f580f8baa5c 100644 --- a/.github/workflows/build_deploy_staging.yml +++ b/.github/workflows/build_deploy_staging.yml @@ -2,6 +2,9 @@ name: Build and deploy to staging permissions: contents: read + pull-requests: write + pages: write + id-token: write on: # Using pull_request instead of push on main because we want access to the pull request's details via 'github.event' @@ -10,7 +13,7 @@ on: types: - closed branches: - - 'develop' + - 'main' - 'releases/*' concurrency: diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000000..2308aa164aa --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,38 @@ +name: Deploy SQL Docs to GitHub Pages + +on: + push: + branches: [main] + paths: + - 'docs/index.html' + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: github-pages + cancel-in-progress: true + +jobs: + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Configure Pages + uses: actions/configure-pages@v5 + + - name: Upload Pages artifact + uses: actions/upload-pages-artifact@v3 + with: + path: docs/ + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/tests_unit.yml b/.github/workflows/tests_unit.yml index 3a9455a7f76..876972852f7 100644 --- a/.github/workflows/tests_unit.yml +++ b/.github/workflows/tests_unit.yml @@ -23,17 +23,12 @@ env: timescaledb mssql oracle - slack redshift bigquery - clickhouse web databricks - github - ms_teams - statsforecast - chromadb - confluence + duckdb_faiss + openai # We measure 80% on this handlers, as they are the verified HANDLERS_TO_VERIFY: | mysql @@ -43,11 +38,9 @@ env: timescaledb mssql oracle - slack file redshift bigquery - confluence COVERAGE_FAIL_UNDER: "80" jobs: @@ -163,8 +156,6 @@ jobs: uv pip install ".[agents,kb]" \ -r requirements/requirements-test.txt \ "${HANDLER_EXTRAS[@]}" - # Onuxruntime is required for ChromaDB, once we have default pgvector we can remove it - uv pip install --force-reinstall onnxruntime==1.20.1 git clone --branch v$(uv pip show mindsdb_sql_parser | grep Version | cut -d ' ' -f 2) https://github.com/mindsdb/mindsdb_sql_parser.git parser_tests - name: Run unit tests diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a22cc2c206b..bc20563dfb7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,7 @@ Being part of the core MindsDB team is accessible to anyone who is motivated and wants to be part of that journey! -Please see below how to contribute to the project, also refer to the contributing documentation. +Please see below how to contribute to the project. ## How can you help us? @@ -27,7 +27,7 @@ In general, we follow the "fork-and-pull" Git workflow. > NOTE: Be sure to merge the latest from "upstream" before making a pull request! Also, make the PR to the `main` branch. ## Feature and Bug reports -We use GitHub issues to track bugs and features. Report them by opening a [new issue](https://github.com/mindsdb/mindsdb/issues/new/choose) and fill out all of the required inputs. +We use GitHub issues to track bugs and features. Report them by opening a [new issue](https://github.com/mindsdb/engine/issues) and fill out all of the required inputs. ## Code review process @@ -35,12 +35,10 @@ The Pull Request reviews are done on a regular basis. Please, make sure you resp ## Community -If you have additional questions or you want to chat with the MindsDB core team, please join our [Slack community](https://mindsdb.com/joincommunity) or post at [Github Discussions](https://github.com/mindsdb/mindsdb/discussions). - -To get updates on MindsDB’s latest announcements, releases, and events, sign up for our [Monthly Community Newsletter](https://mindsdb.com/newsletter/?utm_medium=community&utm_source=github&utm_campaign=mindsdb%20repo). +If you have additional questions or you want to chat with the MindsDB core team, please join our [Discord](https://mindshub.ai/discord) or open a [GitHub issue](https://github.com/mindsdb/engine/issues). -Join our mission of democratizing machine learning! +Join our mission of making semantic search accessible to everyone who knows SQL! ## Contributor Code of Conduct -Please note that this project is released with a [Contributor Code of Conduct](https://github.com/mindsdb/mindsdb/blob/main/CODE_OF_CONDUCT.md). By participating in this project, you agree to abide by its terms. +Please note that this project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By participating in this project, you agree to abide by its terms. diff --git a/Makefile b/Makefile index 05cea89b906..75c05b7c647 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -PYTEST_ARGS = -v -rs --disable-warnings -n auto --dist loadfile +PYTEST_ARGS = -v -xrs --disable-warnings -n 1 --dist loadfile PYTEST_ARGS_DEBUG = --runslow -vs -rs DSI_PYTEST_ARGS = --run-dsi-tests DSI_REPORT_ARGS = --json-report --json-report-file=reports/report.json @@ -87,4 +87,8 @@ unit_tests_debug: env PYTHONPATH=./ pytest $(PYTEST_ARGS_DEBUG) tests/unit/executor/ pytest $(PYTEST_ARGS_DEBUG) --ignore=tests/unit/executor tests/unit/ +.PHONY: tests-artifacts +tests-artifacts: + ./scripts/test-artifacts.sh + .PHONY: install_mindsdb install_handler precommit format run_mindsdb check build_docker run_docker integration_tests integration_tests_slow integration_tests_debug datasource_integration_tests datasource_integration_tests_debug unit_tests unit_tests_slow unit_tests_debug diff --git a/README.md b/README.md index 3ce7bd5bd15..6b455275e73 100644 --- a/README.md +++ b/README.md @@ -1,238 +1,189 @@ - +
-

- - Query engine for AI analytics, powering agents to answer questions across all your live data - -

+ # MindsDB Query Engine + +**Semantic search over all your data β€” entirely in SQL.** -
- MindsDB Release + PyPI version - Python supported + Supported Python versions - Docker pulls + Docker pulls -

- Website - Β· - Docs - Β· - Contact us for a demo - Β· - Community Slack -

+[**Docs**](https://mindsdb.github.io/engine) Β· [**Website**](https://mindshub.ai) Β· [**Discord**](https://mindshub.ai/discord) Β· [**Contact**](https://mindshub.ai/contact)
--- -MindsDB is a popular open-source query engine for AI analytics, powering AI agents that need to answer questions directly from databases, data warehouses, and applications, with no ETL required. +MindsDB Query Engine connects to 200+ data sources β€” databases, warehouses, applications, files β€” and lets you query them live in one SQL dialect, with no ETL. Index unstructured content into [knowledge bases](https://mindsdb.github.io/engine#kb-overview), then search it by meaning, by keyword, or both at once, with plain SQL filters on top. Everything is reachable from any MySQL- or PostgreSQL-compatible client. -
- - MindsDB demo - answer questions in plain English from live enterprise data - -
+> **Where this fits:** MindsDB now builds [MindsHub](https://mindshub.ai) β€” a hub for open AI agents. The Query Engine remains a standalone open-source project, and it pairs well with MindsHub agents: connect it to give an agent live, SQL-queryable access to your data and semantic search. The full story: [MindsHub vs MindsDB](https://mindshub.ai/mindshub-vs-mindsdb). -## What you can build with MindsDB Query Engine +## How it works + +``` + MySQL clients Β· PostgreSQL clients Β· BI tools Β· ORMs Β· HTTP API + β”‚ + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ MindsDB Query Engine β”‚ + β”‚ one SQL dialect over β”‚ + β”‚ a federated query planner β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ β”‚ β”‚ + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ Databases β”‚ β”‚ Apps & files β”‚ β”‚ Knowledge bases β”‚ + β”‚ Postgres, MySQL, β”‚ β”‚ Slack, web crawlerβ”‚ β”‚ embeddings + β”‚ + β”‚ MongoDB, Snowflakeβ”‚ β”‚ docs, sheets, β”‚ β”‚ vector store + β”‚ + β”‚ BigQuery, S3, … β”‚ β”‚ email, calendars… β”‚ β”‚ BM25 index β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + queried live, in place β€” data is never copied +``` + +- **One server, three interfaces.** The engine ships a built-in SQL editor on HTTP (`:47334`) and speaks the MySQL (`:47335`) and PostgreSQL (`:47336`) wire protocols β€” so `mysql`, `psql`, DBeaver, SQLAlchemy, or any BI tool [connects directly](https://mindsdb.github.io/engine#setup-clients). +- **Federated queries, no pipelines.** [`CREATE DATABASE`](https://mindsdb.github.io/engine#db-create) attaches a live data source through an integration handler. The planner translates each query, pushes work down to the source, and streams results back β€” your data stays where it is. Source-specific syntax is still available via [native queries](https://mindsdb.github.io/engine#native-queries). +- **Knowledge bases are the semantic layer.** A [knowledge base](https://mindsdb.github.io/engine#kb-overview) combines an embedding model, an optional reranking model, and a vector store (e.g. pgvector). `INSERT INTO` it to chunk, embed, and index content; `SELECT` from it to retrieve by meaning, filtered by metadata columns like any other table. +- **Hybrid retrieval.** [Hybrid search](https://mindsdb.github.io/engine#kb-hybrid) runs vector similarity and BM25 keyword matching in parallel and merges the results β€” for queries that mix natural language with exact identifiers, codes, or acronyms. +- **Organize and automate.** [Projects](https://mindsdb.github.io/engine#proj-create) namespace your work, [views](https://mindsdb.github.io/engine#view-create) save cross-source transformations, and [jobs](https://mindsdb.github.io/engine#job-create) schedule any SQL to run on an interval β€” e.g. to keep knowledge bases fresh. + +## Quick start + +Run with [Docker](https://mindsdb.github.io/engine#setup-docker): -| CONVERSATIONAL ANALYTICS AGENTS | SEMANTIC SEARCH AGENTS | -| --- | --- | -| Get precise, data-driven answers using natural language.

Unify and query data across sources (MySQL, Salesforce, Shopify, etc.), without ETL.

Watch video | Ground LLM responses in your most relevant internal knowledge.

Search across unstructured sources like documents, support tickets, Google Drive, and more.

Watch video | - -## How MindsDB works - -MindsDB follows a simple workflow: **Connect β†’ Unify β†’ Respond**. At the center is an SQL-compatible data language with additional constructs for searching unstructured data, managing workflows (jobs/triggers), and building agents. - - - - - - - - - - - - - - -
- Connect - - Universal data access: Give your agents federated access to 200+ live data sources (Postgres, MongoDB, Slack, files, and more). -
- Unify - - Dynamic context engine: Fuse structured tables with vectorized data (text, PDFs, HTML) inside a Knowledge Base. -
- Respond - - Autonomous reasoning: Deploy agents that blend and retrieve data points across your stack to produce grounded answers. -
- -## Setup - -Users can install MindsDB via Docker, Docker Extension, or PyPI. - -Here is how to pull and run MindsDB via Docker: ```bash docker run --name mindsdb_container \ --e MINDSDB_APIS=http,mysql \ --p 47334:47334 -p 47335:47335 \ -mindsdb/mindsdb:latest + -e MINDSDB_APIS=http,mysql \ + -p 47334:47334 -p 47335:47335 \ + mindsdb/mindsdb ``` -## Usage +Or install from [PyPI](https://mindsdb.github.io/engine#setup-pip): + +```bash +pip install mindsdb # add extras as needed, e.g. mindsdb[pgvector,openai,postgres] +python -m mindsdb +``` -**Follow the quickstart guide to get started with MindsDB using our demo data.** +Then open the editor at `http://127.0.0.1:47334`, or connect any MySQL client to port `47335`. The [quickstart](https://mindsdb.github.io/engine#quickstart) walks through the rest. + +## From zero to semantic search + +Six SQL statements, start to finish. Full syntax for every statement is in the [SQL reference](https://mindsdb.github.io/engine). + +**1. Attach your data sources** ([docs](https://mindsdb.github.io/engine#db-create)) β€” they are queried live, nothing is imported: -Retrieve and analyze data from over 200 data sources in one SQL dialect. For AI agents, this means faster response time, better accuracy, and lower token consumption. ```sql ---use SQL to aggregate pipeline data from Salesforce -SELECT SUM(ExpectedRevenue) AS open_pipeline -FROM salesforce.opportunities -WHERE close_date >= CURDATE() - ---use the same dialect to retrieve even from a non-SQL database, like MondoDB -SELECT COUNT(*) AS negative_emails_last_30_days -FROM mongodb.support_tickets -WHERE sentiment = 'negative' - AND created_at >= CURRENT_DATE - INTERVAL '30 days'; +CREATE DATABASE my_pg +WITH ENGINE = 'postgres', +PARAMETERS = { + "host": "localhost", "port": 5432, + "user": "user", "password": "pass", + "database": "mydb" +}; + +CREATE DATABASE my_mongo +WITH ENGINE = 'mongodb', +PARAMETERS = { + "host": "mongodb+srv://user:pass@cluster.example.net", + "database": "support" +}; ``` -Create views and join data even from different types of data systems. +**2. Query across sources in one dialect** ([docs](https://mindsdb.github.io/engine#sql-join)) β€” even non-SQL stores like MongoDB, and save the result as a [view](https://mindsdb.github.io/engine#view-create): + ```sql ---join MongoDB and Salesforce data -CREATE VIEW risky_renewals AS ( -SELECT * -FROM mongodb.support_tickets AS reviews -JOIN salesforce.opportunities AS deals - ON reviews.customer_domain = deals.customer_domain -WHERE deals.type = "renewal" - AND reviews.sentiment = "negative" +CREATE VIEW open_tickets_by_product AS ( + SELECT p.name, COUNT(t.ticket_id) AS open_tickets + FROM my_mongo.support_tickets AS t + JOIN my_pg.products AS p + ON t.product_id = p.id + WHERE t.status = 'open' + GROUP BY p.name ); ``` -Join vectorized and structured data inside a knowledge base. Combine semantic search with precise metadata criteria in a single SQL query. +**3. Create a knowledge base** ([docs](https://mindsdb.github.io/engine#kb-create)) β€” an embedding model plus a vector store, addressable as a table: + ```sql ---create a knowledge base for customer issues -CREATE KNOWLEDGE_BASE customers_issues +CREATE KNOWLEDGE_BASE support_kb USING - storage = my_vector.db, - content_columns = ['ticket_description']; - metadata_columns = ['customer_name', 'segment', 'revenue', 'is_pending_renewal']; - ---find large customers who submitted ticket related to data security topics -SELECT * FROM customers_issues -WHERE content = 'data security' -AND - is_pending_renewal = 'true'. - revenue > 1000000; + embedding_model = { + "provider": "openai", + "model_name": "text-embedding-3-large", + "api_key": "sk-..." + }, + storage = my_pgvector.support_kb_store, -- a pgvector connection + content_columns = ['subject', 'body'], + metadata_columns = ['product_name', 'priority', 'created_at'], + id_column = 'ticket_id'; ``` -Use MindsDB pre-packaged data agents and connect them with your own. See how to use MindsDB via API or MCP. +**4. Index your content** ([docs](https://mindsdb.github.io/engine#kb-insert)) β€” rows are chunked, embedded, and upserted: + ```sql -CREATE AGENT my_agent -USING - model = { - "provider": "openai", - "model_name" : "gpt-xx", - "api_key": "sk-..." - }, - data = { - "knowledge_bases": ["mindsdb.customer_issues"], - "tables": ["salesforce.opportunities", "postgres.sales", "mongodb.support_tickets"] - }, - prompt_template = 'my prompt template and agent guidance'; +INSERT INTO support_kb + SELECT ticket_id, subject, body, product_name, priority, created_at + FROM my_mongo.support_tickets; ``` -See MindsDB’s recommended usage of agents here and how to automate workflows with jobs. - -## πŸ“ƒ Tutorials -- Enterprise Knowledge Search (example) -- Advanced Semantic Search (example) -- Customer Support Automation (example1, example2) -- Intelligent Content Discovery (example) -- Financial Analysis Agents (example) -- Real-time AI-powered analytics (example) -- Conversational Data Assistants (example) -- CRM Intelligence (example) -- Compliance & Customer Intelligence (example) -- Conversation Intelligence (example) -Subscribe to our (blog) for more - -## 🫴 Help and support - -Stuck on a query? Found a bug? We’re here to help. - - - - - - - - - - - - - -
- Ask a question - - Join our Slack Community. -
- Report a bug - - Open a GitHub Issue. Please include reproduction steps! -
- Get commercial support - - Contact the MindsDB Team for enterprise SLAs and custom solutions. -
- -**Security Note:** If you find a security vulnerability, please do not open a public issue. Refer to our security policy for reporting instructions. - -## 🀝 Contribute to MindsDB - -MindsDB is open source and contributions are welcome! You can submit code changes through pull requests or by opening issues to report bugs, suggest new features, or enhancements. - -**Ways you can help:** -- Develop a database integration -- Develop an app integration -- Identify and fix bugs - -**How to contribute** - -- Read the contribution guide to get set up. -- Browse open issues. -- Join the #contributors channel in Slack. -- Explore community rewards and programs. -
+**5. Search by meaning, filter by metadata** ([docs](https://mindsdb.github.io/engine#kb-query)): -Our top 100 contributors +```sql +SELECT chunk_content, product_name, relevance +FROM support_kb +WHERE content = 'cannot connect after the latest update' + AND priority <= 2 + AND relevance >= 0.5 +LIMIT 10; + +-- hybrid search: blend vector similarity with BM25 keyword matching +SELECT * +FROM support_kb +WHERE content = 'error ERR-4421' + AND hybrid_search = true; +``` - - - - -Made with [contrib.rocks](https://contrib.rocks) -
+β–Ά [How to use semantic search with metadata filters](https://www.youtube.com/watch?v=HN4fHtS4mvo) β€” a good explainer of this feature. + +**6. Keep the index fresh with a job** ([docs](https://mindsdb.github.io/engine#job-create)): + +```sql +CREATE JOB refresh_support_kb ( + INSERT INTO support_kb + SELECT ticket_id, subject, body, product_name, priority, created_at + FROM my_mongo.support_tickets + WHERE created_at > LAST +) +EVERY hour; +``` + +## Help and support + +| You need | Go to | +| --- | --- | +| Ask a question | [Discord](https://mindshub.ai/discord) | +| Report a bug | [GitHub Issues](https://github.com/mindsdb/engine/issues) β€” please include reproduction steps | +| Commercial support | [Contact the team](https://mindshub.ai/contact) | + +**Security note:** if you find a vulnerability, please do not open a public issue β€” follow our [security policy](https://github.com/mindsdb/engine/security) instead. + +## Contributing + +Contributions are welcome β€” code, integrations, docs, and bug reports alike. We follow the fork-and-pull workflow: see the [contribution guide](CONTRIBUTING.md) to get set up, and browse the [open issues](https://github.com/mindsdb/engine/issues) for somewhere to start. Good first areas are new integration handlers, bug fixes, and documentation improvements. + +## Resources + +- [Documentation](https://mindsdb.github.io/engine) +- [MindsHub β€” open AI agents, from the same team](https://mindshub.ai) +- [MindsHub vs MindsDB β€” how the product evolved](https://mindshub.ai/mindshub-vs-mindsdb) +- [Discord](https://mindshub.ai/discord) +- [Contact](https://mindshub.ai/contact) + +## License -## πŸ“š Resources -- Documentation -- Blog -- Events -- Community Slack -- Brand guidelines -- Contact form +MindsDB Core is licensed under the [Elastic License 2.0](LICENSE); some directories carry their own license β€” see the [LICENSE](LICENSE) file for the full structure. diff --git a/assets/contributions-agreement/signatures/cla.json b/assets/contributions-agreement/signatures/cla.json index dc0e2328551..b3dabe85607 100644 --- a/assets/contributions-agreement/signatures/cla.json +++ b/assets/contributions-agreement/signatures/cla.json @@ -5831,6 +5831,1054 @@ "created_at": "2023-10-30T12:46:04Z", "repoId": 143328315, "pullRequestNo": 8163 + }, + { + "name": "minakshisharma197", + "id": 184736207, + "comment_id": 2413433683, + "created_at": "2024-10-15T09:55:40Z", + "repoId": 143328315, + "pullRequestNo": 9865 + }, + { + "name": "divyakhatiyan", + "id": 141419850, + "comment_id": 2417330560, + "created_at": "2024-10-16T16:28:34Z", + "repoId": 143328315, + "pullRequestNo": 9899 + }, + { + "name": "Sekhar-Kumar-Dash", + "id": 119131588, + "comment_id": 2419495274, + "created_at": "2024-10-17T13:05:15Z", + "repoId": 143328315, + "pullRequestNo": 9914 + }, + { + "name": "kom-senapati", + "id": 92045934, + "comment_id": 2423485137, + "created_at": "2024-10-19T02:28:49Z", + "repoId": 143328315, + "pullRequestNo": 9807 + }, + { + "name": "RiyanaD", + "id": 117534139, + "comment_id": 2420766574, + "created_at": "2024-10-17T22:54:53Z", + "repoId": 143328315, + "pullRequestNo": 9427 + }, + { + "name": "narengogi", + "id": 47327611, + "comment_id": 2296396377, + "created_at": "2024-08-19T11:55:54Z", + "repoId": 143328315, + "pullRequestNo": 9641 + }, + { + "name": "PatLittle", + "id": 31454591, + "comment_id": 2425743649, + "created_at": "2024-10-21T06:49:47Z", + "repoId": 143328315, + "pullRequestNo": 9962 + }, + { + "name": "panoskyriakis", + "id": 134383572, + "comment_id": 2317914456, + "created_at": "2024-08-29T14:39:56Z", + "repoId": 143328315, + "pullRequestNo": 9654 + }, + { + "name": "lucas-koontz", + "id": 7515210, + "comment_id": 2428585608, + "created_at": "2024-10-22T08:19:54Z", + "repoId": 143328315, + "pullRequestNo": 9976 + }, + { + "name": "Tryxns", + "id": 10586708, + "comment_id": 2433530462, + "created_at": "2024-10-23T21:51:00Z", + "repoId": 143328315, + "pullRequestNo": 9975 + }, + { + "name": "DhanushNehru", + "id": 22955675, + "comment_id": 2438155935, + "created_at": "2024-10-25T15:40:09Z", + "repoId": 143328315, + "pullRequestNo": 10047 + }, + { + "name": "TalaatHasanin", + "id": 105648065, + "comment_id": 2439488990, + "created_at": "2024-10-26T10:54:04Z", + "repoId": 143328315, + "pullRequestNo": 9726 + }, + { + "name": "AkashJana18", + "id": 103350981, + "comment_id": 2442254462, + "created_at": "2024-10-28T17:52:47Z", + "repoId": 143328315, + "pullRequestNo": 10073 + }, + { + "name": "prajwal-pai77", + "id": 108796209, + "comment_id": 2445980761, + "created_at": "2024-10-30T06:33:47Z", + "repoId": 143328315, + "pullRequestNo": 10039 + }, + { + "name": "JanumalaAkhilendra", + "id": 82641474, + "comment_id": 2446791257, + "created_at": "2024-10-30T11:43:16Z", + "repoId": 143328315, + "pullRequestNo": 10051 + }, + { + "name": "herjanice", + "id": 72483795, + "comment_id": 2370891577, + "created_at": "2024-09-24T10:33:26Z", + "repoId": 143328315, + "pullRequestNo": 9727 + }, + { + "name": "mabderrahim", + "id": 20402768, + "comment_id": 2377340466, + "created_at": "2024-09-26T15:48:00Z", + "repoId": 143328315, + "pullRequestNo": 9727 + }, + { + "name": "mohamed-abderrahim3", + "id": 183199390, + "comment_id": 2380593605, + "created_at": "2024-09-28T10:19:38Z", + "repoId": 143328315, + "pullRequestNo": 9727 + }, + { + "name": "chuangyeshuo", + "id": 14370480, + "comment_id": 2449017804, + "created_at": "2024-10-31T05:05:38Z", + "repoId": 143328315, + "pullRequestNo": 10099 + }, + { + "name": "md-abid-hussain", + "id": 101964499, + "comment_id": 2449303679, + "created_at": "2024-10-31T08:26:54Z", + "repoId": 143328315, + "pullRequestNo": 10100 + }, + { + "name": "poisonvine", + "id": 179939949, + "comment_id": 2408223847, + "created_at": "2024-10-11T23:08:39Z", + "repoId": 143328315, + "pullRequestNo": 9833 + }, + { + "name": "code-vine", + "id": 95056519, + "comment_id": 2408235943, + "created_at": "2024-10-11T23:31:03Z", + "repoId": 143328315, + "pullRequestNo": 9833 + }, + { + "name": "poisonvine", + "id": 179939949, + "comment_id": 2463687190, + "created_at": "2024-11-08T03:30:34Z", + "repoId": 143328315, + "pullRequestNo": 9833 + }, + { + "name": "vishwamartur", + "id": 64204611, + "comment_id": 2480506920, + "created_at": "2024-11-16T10:24:05Z", + "repoId": 143328315, + "pullRequestNo": 10176 + }, + { + "name": "UTSAVS26", + "id": 119779889, + "comment_id": 2482548112, + "created_at": "2024-11-18T10:15:35Z", + "repoId": 143328315, + "pullRequestNo": 10182 + }, + { + "name": "fshabashev", + "id": 6548211, + "comment_id": 2482924022, + "created_at": "2024-11-18T12:36:59Z", + "repoId": 143328315, + "pullRequestNo": 10153 + }, + { + "name": "GTgyani206", + "id": 128274569, + "comment_id": 2407637789, + "created_at": "2024-10-11T15:20:07Z", + "repoId": 143328315, + "pullRequestNo": 9832 + }, + { + "name": "QuantumPlumber", + "id": 44450703, + "comment_id": 2521508302, + "created_at": "2024-12-05T21:39:15Z", + "repoId": 143328315, + "pullRequestNo": 10243 + }, + { + "name": "Abdusshh", + "id": 101020733, + "comment_id": 2525127867, + "created_at": "2024-12-07T13:40:48Z", + "repoId": 143328315, + "pullRequestNo": 10253 + }, + { + "name": "cliffordp", + "id": 1812179, + "comment_id": 2540449382, + "created_at": "2024-12-13T03:21:48Z", + "repoId": 143328315, + "pullRequestNo": 10285 + }, + { + "name": "abhirajadhikary06", + "id": 171187625, + "comment_id": 2563775672, + "created_at": "2024-12-27T14:55:52Z", + "repoId": 143328315, + "pullRequestNo": 10331 + }, + { + "name": "jbrass", + "id": 125982, + "comment_id": 2587312474, + "created_at": "2025-01-13T14:50:21Z", + "repoId": 143328315, + "pullRequestNo": 10355 + }, + { + "name": "dj013", + "id": 47425755, + "comment_id": 2593267189, + "created_at": "2025-01-15T15:43:10Z", + "repoId": 143328315, + "pullRequestNo": 10371 + }, + { + "name": "juliette0704", + "id": 91728573, + "comment_id": 2609377887, + "created_at": "2025-01-23T10:01:31Z", + "repoId": 143328315, + "pullRequestNo": 10395 + }, + { + "name": "ivancastanop", + "id": 107499323, + "comment_id": 2598203208, + "created_at": "2025-01-17T11:55:12Z", + "repoId": 143328315, + "pullRequestNo": 10379 + }, + { + "name": "rdonato", + "id": 128521, + "comment_id": 2643683251, + "created_at": "2025-02-07T18:22:51Z", + "repoId": 143328315, + "pullRequestNo": 10444 + }, + { + "name": "SoNiC-HeRE", + "id": 96797205, + "comment_id": 2654003700, + "created_at": "2025-02-12T15:10:05Z", + "repoId": 143328315, + "pullRequestNo": 10460 + }, + { + "name": "guspan-tanadi", + "id": 36249910, + "comment_id": 2675814807, + "created_at": "2025-02-21T23:28:45Z", + "repoId": 143328315, + "pullRequestNo": 10465 + }, + { + "name": "arashaomrani", + "id": 20032520, + "comment_id": 2705110135, + "created_at": "2025-03-06T22:46:52Z", + "repoId": 143328315, + "pullRequestNo": 10544 + }, + { + "name": "kevinrawal", + "id": 84058124, + "comment_id": 2708288010, + "created_at": "2025-03-08T13:33:56Z", + "repoId": 143328315, + "pullRequestNo": 10550 + }, + { + "name": "MR901", + "id": 20877166, + "comment_id": 2788354723, + "created_at": "2025-04-09T05:54:32Z", + "repoId": 143328315, + "pullRequestNo": 10681 + }, + { + "name": "pnewsam", + "id": 22651415, + "comment_id": 2813745881, + "created_at": "2025-04-17T18:35:55Z", + "repoId": 143328315, + "pullRequestNo": 10736 + }, + { + "name": "emmanuel-ferdman", + "id": 35470921, + "comment_id": 2816053850, + "created_at": "2025-04-18T19:17:39Z", + "repoId": 143328315, + "pullRequestNo": 10739 + }, + { + "name": "Konstantinos-10", + "id": 161840728, + "comment_id": 2833463268, + "created_at": "2025-04-27T13:35:09Z", + "repoId": 143328315, + "pullRequestNo": 10761 + }, + { + "name": "NikosLaspias", + "id": 148558723, + "comment_id": 2834255670, + "created_at": "2025-04-28T07:38:11Z", + "repoId": 143328315, + "pullRequestNo": 10760 + }, + { + "name": "jzs1997", + "id": 29564670, + "comment_id": 2840686847, + "created_at": "2025-04-30T03:07:12Z", + "repoId": 143328315, + "pullRequestNo": 10776 + }, + { + "name": "HarshaVardhanMannem", + "id": 144146034, + "comment_id": 2896453670, + "created_at": "2025-05-21T03:28:49Z", + "repoId": 143328315, + "pullRequestNo": 10861 + }, + { + "name": "arun-prasath2005", + "id": 84761066, + "comment_id": 2906488930, + "created_at": "2025-05-24T06:10:22Z", + "repoId": 143328315, + "pullRequestNo": 10882 + }, + { + "name": "vmanikanta07", + "id": 117996904, + "comment_id": 2906811274, + "created_at": "2025-05-24T12:37:42Z", + "repoId": 143328315, + "pullRequestNo": 10885 + }, + { + "name": "omerc7", + "id": 32813109, + "comment_id": 2908711653, + "created_at": "2025-05-26T06:34:46Z", + "repoId": 143328315, + "pullRequestNo": 10895 + }, + { + "name": "trickster026", + "id": 212937700, + "comment_id": 2910591816, + "created_at": "2025-05-26T20:34:08Z", + "repoId": 143328315, + "pullRequestNo": 10903 + }, + { + "name": "ivanvza", + "id": 8543825, + "comment_id": 2911844022, + "created_at": "2025-05-27T09:31:36Z", + "repoId": 143328315, + "pullRequestNo": 10900 + }, + { + "name": "Joystonm", + "id": 116254639, + "comment_id": 2965183033, + "created_at": "2025-06-12T05:37:40Z", + "repoId": 143328315, + "pullRequestNo": 11070 + }, + { + "name": "noname4life", + "id": 77653287, + "comment_id": 2983573198, + "created_at": "2025-06-18T10:07:09Z", + "repoId": 143328315, + "pullRequestNo": 11117 + }, + { + "name": "D1m7asis", + "id": 80602676, + "comment_id": 2985345244, + "created_at": "2025-06-18T18:42:15Z", + "repoId": 143328315, + "pullRequestNo": 11124 + }, + { + "name": "Alex-xd", + "id": 11256006, + "comment_id": 2999207900, + "created_at": "2025-06-24T07:50:23Z", + "repoId": 143328315, + "pullRequestNo": 11160 + }, + { + "name": "PriyanshuPz", + "id": 112266318, + "comment_id": 3000590454, + "created_at": "2025-06-24T13:51:44Z", + "repoId": 143328315, + "pullRequestNo": 11163 + }, + { + "name": "rawathemant246", + "id": 99639231, + "comment_id": 2999067598, + "created_at": "2025-06-24T06:59:35Z", + "repoId": 143328315, + "pullRequestNo": 11159 + }, + { + "name": "aryanmalik-iet", + "id": 187411120, + "comment_id": 3007270696, + "created_at": "2025-06-26T06:24:35Z", + "repoId": 143328315, + "pullRequestNo": 11186 + }, + { + "name": "iabhi4", + "id": 61010675, + "comment_id": 3017197726, + "created_at": "2025-06-29T22:22:14Z", + "repoId": 143328315, + "pullRequestNo": 11212 + }, + { + "name": "dotWee", + "id": 8060356, + "comment_id": 3072932250, + "created_at": "2025-07-15T09:45:05Z", + "repoId": 143328315, + "pullRequestNo": 11300 + }, + { + "name": "buallen", + "id": 54055907, + "comment_id": 3078683990, + "created_at": "2025-07-16T13:40:32Z", + "repoId": 143328315, + "pullRequestNo": 11234 + }, + { + "name": "Raahim-Lone", + "id": 175012415, + "comment_id": 3120439531, + "created_at": "2025-07-25T21:35:44Z", + "repoId": 143328315, + "pullRequestNo": 11365 + }, + { + "name": "kaizenjinco", + "id": 78314961, + "comment_id": 3124537097, + "created_at": "2025-07-27T16:53:30Z", + "repoId": 143328315, + "pullRequestNo": 11367 + }, + { + "name": "huang-x-h", + "id": 381860, + "comment_id": 3132498852, + "created_at": "2025-07-29T13:16:29Z", + "repoId": 143328315, + "pullRequestNo": 11126 + }, + { + "name": "aperepel", + "id": 119367, + "comment_id": 3137657308, + "created_at": "2025-07-30T20:03:35Z", + "repoId": 143328315, + "pullRequestNo": 11385 + }, + { + "name": "abhayasr", + "id": 108477628, + "comment_id": 3164476409, + "created_at": "2025-08-07T14:39:49Z", + "repoId": 143328315, + "pullRequestNo": 11291 + }, + { + "name": "logan-mo", + "id": 63550599, + "comment_id": 3167373652, + "created_at": "2025-08-08T10:27:53Z", + "repoId": 143328315, + "pullRequestNo": 11414 + }, + { + "name": "kylediaz", + "id": 35979917, + "comment_id": 3180690963, + "created_at": "2025-08-12T19:21:02Z", + "repoId": 143328315, + "pullRequestNo": 11427 + }, + { + "name": "Kenxpx", + "id": 155082290, + "comment_id": 3194287003, + "created_at": "2025-08-17T10:15:06Z", + "repoId": 143328315, + "pullRequestNo": 11450 + }, + { + "name": "Nancy9ice", + "id": 103530451, + "comment_id": 3197557060, + "created_at": "2025-08-18T16:11:20Z", + "repoId": 143328315, + "pullRequestNo": 11453 + }, + { + "name": "Matvey-Kuk", + "id": 3284841, + "comment_id": 3197947416, + "created_at": "2025-08-18T18:18:26Z", + "repoId": 143328315, + "pullRequestNo": 11452 + }, + { + "name": "louisneal", + "id": 47094728, + "comment_id": 3222541351, + "created_at": "2025-08-26T04:06:55Z", + "repoId": 143328315, + "pullRequestNo": 11478 + }, + { + "name": "sejubar", + "id": 154475559, + "comment_id": 3240009269, + "created_at": "2025-08-31T09:59:19Z", + "repoId": 143328315, + "pullRequestNo": 11495 + }, + { + "name": "sudsmenon", + "id": 11342520, + "comment_id": 3250743797, + "created_at": "2025-09-03T20:48:18Z", + "repoId": 143328315, + "pullRequestNo": 11510 + }, + { + "name": "TaniyaKatigar", + "id": 214086943, + "comment_id": 3262560837, + "created_at": "2025-09-06T16:30:40Z", + "repoId": 143328315, + "pullRequestNo": 11530 + }, + { + "name": "GeorgeGithiri5", + "id": 46107866, + "comment_id": 3269367783, + "created_at": "2025-09-09T07:49:06Z", + "repoId": 143328315, + "pullRequestNo": 11541 + }, + { + "name": "gauiPPP", + "id": 43440362, + "comment_id": 3284159007, + "created_at": "2025-09-12T07:46:21Z", + "repoId": 143328315, + "pullRequestNo": 11554 + }, + { + "name": "morningman", + "id": 2899462, + "comment_id": 3293544413, + "created_at": "2025-09-15T19:07:52Z", + "repoId": 143328315, + "pullRequestNo": 11574 + }, + { + "name": "sadiqkhzn", + "id": 24961132, + "comment_id": 3312201690, + "created_at": "2025-09-19T13:26:49Z", + "repoId": 143328315, + "pullRequestNo": 11596 + }, + { + "name": "yumosx", + "id": 141902143, + "comment_id": 3322908961, + "created_at": "2025-09-23T08:21:07Z", + "repoId": 143328315, + "pullRequestNo": 11605 + }, + { + "name": "aimurphy", + "id": 36110273, + "comment_id": 3335211124, + "created_at": "2025-09-25T17:38:00Z", + "repoId": 143328315, + "pullRequestNo": 11618 + }, + { + "name": "richardokonicha", + "id": 48168290, + "comment_id": 3346750889, + "created_at": "2025-09-29T12:48:00Z", + "repoId": 143328315, + "pullRequestNo": 11552 + }, + { + "name": "vigbav36", + "id": 90998381, + "comment_id": 3361788337, + "created_at": "2025-10-02T15:24:35Z", + "repoId": 143328315, + "pullRequestNo": 11666 + }, + { + "name": "yashisthebatman", + "id": 149709821, + "comment_id": 3364470461, + "created_at": "2025-10-03T06:48:03Z", + "repoId": 143328315, + "pullRequestNo": 11676 + }, + { + "name": "survivant", + "id": 191879, + "comment_id": 3369115643, + "created_at": "2025-10-05T15:02:15Z", + "repoId": 143328315, + "pullRequestNo": 11684 + }, + { + "name": "Sai-Sravya-Thumati", + "id": 64857617, + "comment_id": 3370705793, + "created_at": "2025-10-06T09:31:16Z", + "repoId": 143328315, + "pullRequestNo": 11686 + }, + { + "name": "cclauss", + "id": 3709715, + "comment_id": 3364277206, + "created_at": "2025-10-03T05:08:38Z", + "repoId": 143328315, + "pullRequestNo": 11673 + }, + { + "name": "ParasNingune", + "id": 153178176, + "comment_id": 3388187853, + "created_at": "2025-10-10T03:48:32Z", + "repoId": 143328315, + "pullRequestNo": 11703 + }, + { + "name": "HarshitR2004", + "id": 159914116, + "comment_id": 3388359328, + "created_at": "2025-10-10T05:37:12Z", + "repoId": 143328315, + "pullRequestNo": 11704 + }, + { + "name": "Nirzak", + "id": 11460645, + "comment_id": 3393522813, + "created_at": "2025-10-11T17:20:41Z", + "repoId": 143328315, + "pullRequestNo": 11726 + }, + { + "name": "faizan842", + "id": 91795555, + "comment_id": 3407632893, + "created_at": "2025-10-15T17:55:57Z", + "repoId": 143328315, + "pullRequestNo": 11748 + }, + { + "name": "AhmadYasser1", + "id": 77586860, + "comment_id": 3419161297, + "created_at": "2025-10-19T02:48:49Z", + "repoId": 143328315, + "pullRequestNo": 11766 + }, + { + "name": "Nikhil172913832", + "id": 140622713, + "comment_id": 3443931056, + "created_at": "2025-10-24T16:13:14Z", + "repoId": 143328315, + "pullRequestNo": 11786 + }, + { + "name": "jiaqicheng1998", + "id": 65794980, + "comment_id": 3459506446, + "created_at": "2025-10-29T03:48:36Z", + "repoId": 143328315, + "pullRequestNo": 11793 + }, + { + "name": "Aashish079", + "id": 106550372, + "comment_id": 3461223031, + "created_at": "2025-10-29T12:19:16Z", + "repoId": 143328315, + "pullRequestNo": 11812 + }, + { + "name": "guddu-debasis", + "id": 167549811, + "comment_id": 3463419567, + "created_at": "2025-10-29T19:15:44Z", + "repoId": 143328315, + "pullRequestNo": 11821 + }, + { + "name": "jeis4wpi", + "id": 42679190, + "comment_id": 3467642515, + "created_at": "2025-10-30T11:55:54Z", + "repoId": 143328315, + "pullRequestNo": 11822 + }, + { + "name": "ak4shravikumar", + "id": 189372043, + "comment_id": 3469119609, + "created_at": "2025-10-30T17:15:30Z", + "repoId": 143328315, + "pullRequestNo": 11828 + }, + { + "name": "rajesh-adk-137", + "id": 89499267, + "comment_id": 3470873094, + "created_at": "2025-10-31T00:51:14Z", + "repoId": 143328315, + "pullRequestNo": 11835 + }, + { + "name": "KrishThakur23", + "id": 214495511, + "comment_id": 3475330781, + "created_at": "2025-11-01T01:05:56Z", + "repoId": 143328315, + "pullRequestNo": 11841 + }, + { + "name": "ritoban23", + "id": 124308320, + "comment_id": 3476917215, + "created_at": "2025-11-01T22:16:42Z", + "repoId": 143328315, + "pullRequestNo": 11843 + }, + { + "name": "bala-ceg", + "id": 70808619, + "comment_id": 3478836423, + "created_at": "2025-11-03T04:05:40Z", + "repoId": 143328315, + "pullRequestNo": 11844 + }, + { + "name": "HamoonDBA", + "id": 3939424, + "comment_id": 3499521731, + "created_at": "2025-11-06T21:49:51Z", + "repoId": 143328315, + "pullRequestNo": 11858 + }, + { + "name": "md-ziauddin", + "id": 29926473, + "comment_id": 3533762471, + "created_at": "2025-11-14T17:15:19Z", + "repoId": 143328315, + "pullRequestNo": 11888 + }, + { + "name": "suman-X", + "id": 137594910, + "comment_id": 3534136586, + "created_at": "2025-11-14T18:54:22Z", + "repoId": 143328315, + "pullRequestNo": 11890 + }, + { + "name": "suman-X", + "id": 137594910, + "comment_id": 3534230691, + "created_at": "2025-11-14T19:21:59Z", + "repoId": 143328315, + "pullRequestNo": 11890 + }, + { + "name": "SyedaAnshrahGillani", + "id": 90501474, + "comment_id": 3616952272, + "created_at": "2025-12-05T13:33:42Z", + "repoId": 143328315, + "pullRequestNo": 11973 + }, + { + "name": "neversettle17-101", + "id": 41864816, + "comment_id": 3620426556, + "created_at": "2025-12-06T13:56:57Z", + "repoId": 143328315, + "pullRequestNo": 11975 + }, + { + "name": "duskobogdanovski", + "id": 21080468, + "comment_id": 3656079267, + "created_at": "2025-12-15T14:55:07Z", + "repoId": 143328315, + "pullRequestNo": 12013 + }, + { + "name": "kelvinvelasquez-SDE", + "id": 112011775, + "comment_id": 3675658408, + "created_at": "2025-12-19T16:19:32Z", + "repoId": 143328315, + "pullRequestNo": 12029 + }, + { + "name": "PPeitsch", + "id": 88450637, + "comment_id": 3704693294, + "created_at": "2026-01-02T07:50:33Z", + "repoId": 143328315, + "pullRequestNo": 12048 + }, + { + "name": "SachinMyadam", + "id": 110909093, + "comment_id": 3716118688, + "created_at": "2026-01-06T20:02:10Z", + "repoId": 143328315, + "pullRequestNo": 12054 + }, + { + "name": "xuwei95", + "id": 18109811, + "comment_id": 3723114411, + "created_at": "2026-01-08T10:01:14Z", + "repoId": 143328315, + "pullRequestNo": 12063 + }, + { + "name": "Nandha-kumar-S", + "id": 85221220, + "comment_id": 3727602927, + "created_at": "2026-01-09T07:52:10Z", + "repoId": 143328315, + "pullRequestNo": 12082 + }, + { + "name": "Sweetdevil144", + "id": 117591942, + "comment_id": 3761427133, + "created_at": "2026-01-16T19:12:39Z", + "repoId": 143328315, + "pullRequestNo": 12110 + }, + { + "name": "Sriram-B-Srivatsa", + "id": 144884365, + "comment_id": 3765374596, + "created_at": "2026-01-18T14:51:54Z", + "repoId": 143328315, + "pullRequestNo": 12113 + }, + { + "name": "zhaojinxin409", + "id": 5874804, + "comment_id": 3771260955, + "created_at": "2026-01-20T06:34:45Z", + "repoId": 143328315, + "pullRequestNo": 12122 + }, + { + "name": "murataslan1", + "id": 78961478, + "comment_id": 3784602307, + "created_at": "2026-01-22T14:06:33Z", + "repoId": 143328315, + "pullRequestNo": 12004 + }, + { + "name": "C1ARKGABLE", + "id": 13039858, + "comment_id": 3792661007, + "created_at": "2026-01-23T21:53:19Z", + "repoId": 143328315, + "pullRequestNo": 11988 + }, + { + "name": "AndrewFarley", + "id": 470163, + "comment_id": 3801391357, + "created_at": "2026-01-26T19:40:00Z", + "repoId": 143328315, + "pullRequestNo": 12123 + }, + { + "name": "007slm", + "id": 1670036, + "comment_id": 3803635367, + "created_at": "2026-01-27T07:48:21Z", + "repoId": 143328315, + "pullRequestNo": 12155 + }, + { + "name": "C0staTin", + "id": 12409467, + "comment_id": 3812795861, + "created_at": "2026-01-28T17:36:00Z", + "repoId": 143328315, + "pullRequestNo": 12151 + }, + { + "name": "Amogh-2404", + "id": 114862749, + "comment_id": 3814926744, + "created_at": "2026-01-29T02:00:24Z", + "repoId": 143328315, + "pullRequestNo": 12167 + }, + { + "name": "themavik", + "id": 179817126, + "comment_id": 3936291923, + "created_at": "2026-02-20T17:50:39Z", + "repoId": 143328315, + "pullRequestNo": 12213 + }, + { + "name": "ianu82", + "id": 86010258, + "comment_id": 3973995110, + "created_at": "2026-02-27T16:55:27Z", + "repoId": 143328315, + "pullRequestNo": 12251 + }, + { + "name": "Mirza-Samad-Ahmed-Baig", + "id": 89132160, + "comment_id": 4054729064, + "created_at": "2026-03-13T12:24:17Z", + "repoId": 143328315, + "pullRequestNo": 12290 + }, + { + "name": "Krishnav1237", + "id": 147693159, + "comment_id": 4061239564, + "created_at": "2026-03-14T19:45:50Z", + "repoId": 143328315, + "pullRequestNo": 12294 + }, + { + "name": "StefanTrsunov", + "id": 91495981, + "comment_id": 4070493719, + "created_at": "2026-03-16T20:45:48Z", + "repoId": 143328315, + "pullRequestNo": 12297 + }, + { + "name": "Tzsapphire", + "id": 209363831, + "comment_id": 4106737895, + "created_at": "2026-03-22T18:27:23Z", + "repoId": 143328315, + "pullRequestNo": 12317 + }, + { + "name": "jnMetaCode", + "id": 12096460, + "comment_id": 4111619407, + "created_at": "2026-03-23T15:43:15Z", + "repoId": 143328315, + "pullRequestNo": 12279 } ] } \ No newline at end of file diff --git a/docker/docker-bake.hcl b/docker/docker-bake.hcl index d7ad61ed0c8..852a82f7839 100644 --- a/docker/docker-bake.hcl +++ b/docker/docker-bake.hcl @@ -105,23 +105,23 @@ target "images" { item = [ { name = "bare" - extras = ".[agents,kb,mysql,postgresql,snowflake,bigquery,mssql,mssql-odbc,salesforce,duckdb_faiss]" + extras = ".[agents,kb,mysql,postgresql,snowflake,bigquery,mssql,mssql-odbc,salesforce,duckdb_faiss,pgvector]" target = "" }, { name = "devel" - extras = ".[agents,kb,mysql,postgresql,snowflake,bigquery,mssql,mssql-odbc,salesforce,duckdb_faiss]" # Required for running integration tests + extras = ".[agents,kb,mysql,postgresql,snowflake,bigquery,mssql,mssql-odbc,salesforce,duckdb_faiss,pgvector]" # Required for running integration tests target = "dev" }, { # If you make any changes here, make them to cloud-cpu as well name = "cloud" - extras = ".[mysql,statsforecast-extra,neuralforecast-extra,timegpt,mssql,mssql-odbc,gmail,snowflake,clickhouse,bigquery,elasticsearch,s3,databricks,oracle,opentelemetry,langfuse,jira,salesforce,gong,hubspot,netsuite,shopify,agents,kb] darts datasetsforecast transformers" + extras = ".[mysql,mssql,mssql-odbc,snowflake,bigquery,databricks,oracle,opentelemetry,langfuse,salesforce,hubspot,netsuite,shopify,agents,kb,pgvector] darts datasetsforecast transformers" target = "" }, { name = "cloud-cpu" - extras = ".[mysql,statsforecast-extra,neuralforecast-extra,timegpt,mssql,mssql-odbc,gmail,snowflake,clickhouse,bigquery,elasticsearch,s3,databricks,oracle,opentelemetry,langfuse,jira,salesforce,gong,hubspot,netsuite,shopify,agents,kb] darts datasetsforecast transformers" + extras = ".[mysql,mssql,mssql-odbc,snowflake,bigquery,databricks,oracle,opentelemetry,langfuse,salesforce,hubspot,netsuite,shopify,agents,kb,pgvector] darts datasetsforecast transformers" target = "" }, ] diff --git a/docker/mindsdb.Dockerfile b/docker/mindsdb.Dockerfile index ae706ed02df..70f40751f58 100644 --- a/docker/mindsdb.Dockerfile +++ b/docker/mindsdb.Dockerfile @@ -18,8 +18,6 @@ COPY mindsdb/__about__.py mindsdb/ # Which will mean the next stage can be cached, even if the cache for the above stage was invalidated. - - # Use the stage from above to install our deps with as much caching as possible FROM python:3.10-slim AS build WORKDIR /mindsdb @@ -56,7 +54,7 @@ COPY --from=deps /mindsdb . # - and finally declare `/mindsdb` as the target dir. ENV UV_LINK_MODE=copy \ UV_PYTHON_DOWNLOADS=never \ - UV_PYTHON=python3.10 \ + UV_PYTHON=python3.10.20 \ UV_PROJECT_ENVIRONMENT=/mindsdb \ VIRTUAL_ENV=/venv \ PATH=/venv/bin:$PATH @@ -71,6 +69,13 @@ RUN --mount=type=cache,target=/root/.cache \ FROM build AS extras + +# Apply latest security patches so the final image picks up fixes +# even when the build stage layers are cached +RUN --mount=target=/var/lib/apt,type=cache,sharing=locked \ + --mount=target=/var/cache/apt,type=cache,sharing=locked \ + apt-get update -qy && apt-get upgrade -qy + ARG EXTRAS # Install extras on top of the bare mindsdb # The torch index is provided for "-cpu" images which install the cpu-only version of torch @@ -93,8 +98,10 @@ ENV PATH=/venv/bin:$PATH EXPOSE 47334/tcp EXPOSE 47335/tcp -# Pre-load tokenizer from Huggingface, and UI -RUN python -m mindsdb --config=/root/mindsdb_config.json --load-tokenizer --update-gui +HEALTHCHECK --interval=30s --timeout=10s --retries=5 --start-period=60s CMD curl -fsS "http://localhost:47334/api/status" + +# Pre-load web GUI +RUN python -m mindsdb --config=/root/mindsdb_config.json --update-gui # Same as extras image, but with dev dependencies installed. # This image is used in our docker-compose and for local development with volume mounting diff --git a/docs/README.md b/docs 2/README.md similarity index 100% rename from docs/README.md rename to docs 2/README.md diff --git a/docs/assets/SLBot-Hero-Whizfizz.png b/docs 2/assets/SLBot-Hero-Whizfizz.png similarity index 100% rename from docs/assets/SLBot-Hero-Whizfizz.png rename to docs 2/assets/SLBot-Hero-Whizfizz.png diff --git a/docs/assets/SLBot-response1.png b/docs 2/assets/SLBot-response1.png similarity index 100% rename from docs/assets/SLBot-response1.png rename to docs 2/assets/SLBot-response1.png diff --git a/docs/assets/SLBot-response2.png b/docs 2/assets/SLBot-response2.png similarity index 100% rename from docs/assets/SLBot-response2.png rename to docs 2/assets/SLBot-response2.png diff --git a/docs/assets/SLBot-response3.png b/docs 2/assets/SLBot-response3.png similarity index 100% rename from docs/assets/SLBot-response3.png rename to docs 2/assets/SLBot-response3.png diff --git a/docs/assets/SLBot-response4.png b/docs 2/assets/SLBot-response4.png similarity index 100% rename from docs/assets/SLBot-response4.png rename to docs 2/assets/SLBot-response4.png diff --git a/docs/assets/ai-integrations.png b/docs 2/assets/ai-integrations.png similarity index 100% rename from docs/assets/ai-integrations.png rename to docs 2/assets/ai-integrations.png diff --git a/docs/assets/automation.png b/docs 2/assets/automation.png similarity index 100% rename from docs/assets/automation.png rename to docs 2/assets/automation.png diff --git a/docs/assets/byom_diagram.png b/docs 2/assets/byom_diagram.png similarity index 100% rename from docs/assets/byom_diagram.png rename to docs 2/assets/byom_diagram.png diff --git a/docs/assets/byom_empty_form.png b/docs 2/assets/byom_empty_form.png similarity index 100% rename from docs/assets/byom_empty_form.png rename to docs 2/assets/byom_empty_form.png diff --git a/docs/assets/byom_form.png b/docs 2/assets/byom_form.png similarity index 100% rename from docs/assets/byom_form.png rename to docs 2/assets/byom_form.png diff --git a/docs/assets/byom_upload_custom_model.png b/docs 2/assets/byom_upload_custom_model.png similarity index 100% rename from docs/assets/byom_upload_custom_model.png rename to docs 2/assets/byom_upload_custom_model.png diff --git a/docs/assets/chatbot_diagram.png b/docs 2/assets/chatbot_diagram.png similarity index 100% rename from docs/assets/chatbot_diagram.png rename to docs 2/assets/chatbot_diagram.png diff --git a/docs/assets/cloud/gui_query.png b/docs 2/assets/cloud/gui_query.png similarity index 100% rename from docs/assets/cloud/gui_query.png rename to docs 2/assets/cloud/gui_query.png diff --git a/docs/assets/cloud/main_mdb.png b/docs 2/assets/cloud/main_mdb.png similarity index 100% rename from docs/assets/cloud/main_mdb.png rename to docs 2/assets/cloud/main_mdb.png diff --git a/docs/assets/connect_tableau.png b/docs 2/assets/connect_tableau.png similarity index 100% rename from docs/assets/connect_tableau.png rename to docs 2/assets/connect_tableau.png diff --git a/docs/assets/connect_tableau_2.png b/docs 2/assets/connect_tableau_2.png similarity index 100% rename from docs/assets/connect_tableau_2.png rename to docs 2/assets/connect_tableau_2.png diff --git a/docs/assets/connect_tableau_3.png b/docs 2/assets/connect_tableau_3.png similarity index 100% rename from docs/assets/connect_tableau_3.png rename to docs 2/assets/connect_tableau_3.png diff --git a/docs/assets/connect_tableau_4.png b/docs 2/assets/connect_tableau_4.png similarity index 100% rename from docs/assets/connect_tableau_4.png rename to docs 2/assets/connect_tableau_4.png diff --git a/docs/assets/connect_tableau_5.png b/docs 2/assets/connect_tableau_5.png similarity index 100% rename from docs/assets/connect_tableau_5.png rename to docs 2/assets/connect_tableau_5.png diff --git a/docs/assets/connect_tableau_6.png b/docs 2/assets/connect_tableau_6.png similarity index 100% rename from docs/assets/connect_tableau_6.png rename to docs 2/assets/connect_tableau_6.png diff --git a/docs/assets/connect_tableau_7.png b/docs 2/assets/connect_tableau_7.png similarity index 100% rename from docs/assets/connect_tableau_7.png rename to docs 2/assets/connect_tableau_7.png diff --git a/docs/assets/docker/docker_desktop/containers-running-extension.png b/docs 2/assets/docker/docker_desktop/containers-running-extension.png similarity index 100% rename from docs/assets/docker/docker_desktop/containers-running-extension.png rename to docs 2/assets/docker/docker_desktop/containers-running-extension.png diff --git a/docs/assets/docker/docker_desktop/enable-extension-containers.png b/docs 2/assets/docker/docker_desktop/enable-extension-containers.png similarity index 100% rename from docs/assets/docker/docker_desktop/enable-extension-containers.png rename to docs 2/assets/docker/docker_desktop/enable-extension-containers.png diff --git a/docs/assets/docker/docker_desktop/enable-win-dev-mode.png b/docs 2/assets/docker/docker_desktop/enable-win-dev-mode.png similarity index 100% rename from docs/assets/docker/docker_desktop/enable-win-dev-mode.png rename to docs 2/assets/docker/docker_desktop/enable-win-dev-mode.png diff --git a/docs/assets/docker/docker_desktop/mindsdb-container-logs.png b/docs 2/assets/docker/docker_desktop/mindsdb-container-logs.png similarity index 100% rename from docs/assets/docker/docker_desktop/mindsdb-container-logs.png rename to docs 2/assets/docker/docker_desktop/mindsdb-container-logs.png diff --git a/docs/assets/docker/docker_desktop/mindsdb_docker_desktop.png b/docs 2/assets/docker/docker_desktop/mindsdb_docker_desktop.png similarity index 100% rename from docs/assets/docker/docker_desktop/mindsdb_docker_desktop.png rename to docs 2/assets/docker/docker_desktop/mindsdb_docker_desktop.png diff --git a/docs/assets/docker/docker_desktop/pull-latest-image.png b/docs 2/assets/docker/docker_desktop/pull-latest-image.png similarity index 100% rename from docs/assets/docker/docker_desktop/pull-latest-image.png rename to docs 2/assets/docker/docker_desktop/pull-latest-image.png diff --git a/docs/assets/faqs_download.csv.png b/docs 2/assets/faqs_download.csv.png similarity index 100% rename from docs/assets/faqs_download.csv.png rename to docs 2/assets/faqs_download.csv.png diff --git a/docs/assets/files/upload_file.png b/docs 2/assets/files/upload_file.png similarity index 100% rename from docs/assets/files/upload_file.png rename to docs 2/assets/files/upload_file.png diff --git a/docs/assets/files/upload_file_from_computer.png b/docs 2/assets/files/upload_file_from_computer.png similarity index 100% rename from docs/assets/files/upload_file_from_computer.png rename to docs 2/assets/files/upload_file_from_computer.png diff --git a/docs/assets/files/upload_file_from_url.png b/docs 2/assets/files/upload_file_from_url.png similarity index 100% rename from docs/assets/files/upload_file_from_url.png rename to docs 2/assets/files/upload_file_from_url.png diff --git a/docs/assets/install-dependencies-gui.png b/docs 2/assets/install-dependencies-gui.png similarity index 100% rename from docs/assets/install-dependencies-gui.png rename to docs 2/assets/install-dependencies-gui.png diff --git a/docs/assets/integrations/Arjuna.png b/docs 2/assets/integrations/Arjuna.png similarity index 100% rename from docs/assets/integrations/Arjuna.png rename to docs 2/assets/integrations/Arjuna.png diff --git a/docs/assets/jssdk_install_output.png b/docs 2/assets/jssdk_install_output.png similarity index 100% rename from docs/assets/jssdk_install_output.png rename to docs 2/assets/jssdk_install_output.png diff --git a/docs/assets/kb_data_insertion.png b/docs 2/assets/kb_data_insertion.png similarity index 100% rename from docs/assets/kb_data_insertion.png rename to docs 2/assets/kb_data_insertion.png diff --git a/docs/assets/kb_hybrid_search.jpg b/docs 2/assets/kb_hybrid_search.jpg similarity index 100% rename from docs/assets/kb_hybrid_search.jpg rename to docs 2/assets/kb_hybrid_search.jpg diff --git a/docs/assets/mcp.png b/docs 2/assets/mcp.png similarity index 100% rename from docs/assets/mcp.png rename to docs 2/assets/mcp.png diff --git a/docs/assets/mcp_cursor_chat.png b/docs 2/assets/mcp_cursor_chat.png similarity index 100% rename from docs/assets/mcp_cursor_chat.png rename to docs 2/assets/mcp_cursor_chat.png diff --git a/docs/assets/mcp_cursor_chat_mode.png b/docs 2/assets/mcp_cursor_chat_mode.png similarity index 100% rename from docs/assets/mcp_cursor_chat_mode.png rename to docs 2/assets/mcp_cursor_chat_mode.png diff --git a/docs/assets/mcp_cursor_chat_tool.png b/docs 2/assets/mcp_cursor_chat_tool.png similarity index 100% rename from docs/assets/mcp_cursor_chat_tool.png rename to docs 2/assets/mcp_cursor_chat_tool.png diff --git a/docs/assets/mcp_cursor_mcp_server.png b/docs 2/assets/mcp_cursor_mcp_server.png similarity index 100% rename from docs/assets/mcp_cursor_mcp_server.png rename to docs 2/assets/mcp_cursor_mcp_server.png diff --git a/docs/assets/mcp_cursor_settings.png b/docs 2/assets/mcp_cursor_settings.png similarity index 100% rename from docs/assets/mcp_cursor_settings.png rename to docs 2/assets/mcp_cursor_settings.png diff --git a/docs/assets/metabase_add_database.png b/docs 2/assets/metabase_add_database.png similarity index 100% rename from docs/assets/metabase_add_database.png rename to docs 2/assets/metabase_add_database.png diff --git a/docs/assets/metabase_connected.png b/docs 2/assets/metabase_connected.png similarity index 100% rename from docs/assets/metabase_connected.png rename to docs 2/assets/metabase_connected.png diff --git a/docs/assets/metabase_run_query_failure.png b/docs 2/assets/metabase_run_query_failure.png similarity index 100% rename from docs/assets/metabase_run_query_failure.png rename to docs 2/assets/metabase_run_query_failure.png diff --git a/docs/assets/metabase_run_query_home_rentals.png b/docs 2/assets/metabase_run_query_home_rentals.png similarity index 100% rename from docs/assets/metabase_run_query_home_rentals.png rename to docs 2/assets/metabase_run_query_home_rentals.png diff --git a/docs/assets/metabase_run_query_show_tables.png b/docs 2/assets/metabase_run_query_show_tables.png similarity index 100% rename from docs/assets/metabase_run_query_show_tables.png rename to docs 2/assets/metabase_run_query_show_tables.png diff --git a/docs/assets/minds/Dashboard_Minds.png b/docs 2/assets/minds/Dashboard_Minds.png similarity index 100% rename from docs/assets/minds/Dashboard_Minds.png rename to docs 2/assets/minds/Dashboard_Minds.png diff --git a/docs/assets/minds/DatasourcesConn_Minds.png b/docs 2/assets/minds/DatasourcesConn_Minds.png similarity index 100% rename from docs/assets/minds/DatasourcesConn_Minds.png rename to docs 2/assets/minds/DatasourcesConn_Minds.png diff --git a/docs/assets/minds/DatasourcesTab_Minds.png b/docs 2/assets/minds/DatasourcesTab_Minds.png similarity index 100% rename from docs/assets/minds/DatasourcesTab_Minds.png rename to docs 2/assets/minds/DatasourcesTab_Minds.png diff --git a/docs/assets/minds/DatasourcesType_Minds.png b/docs 2/assets/minds/DatasourcesType_Minds.png similarity index 100% rename from docs/assets/minds/DatasourcesType_Minds.png rename to docs 2/assets/minds/DatasourcesType_Minds.png diff --git a/docs/assets/minds/MindChat_Minds.png b/docs 2/assets/minds/MindChat_Minds.png similarity index 100% rename from docs/assets/minds/MindChat_Minds.png rename to docs 2/assets/minds/MindChat_Minds.png diff --git a/docs/assets/minds/MindsTab_Minds.png b/docs 2/assets/minds/MindsTab_Minds.png similarity index 100% rename from docs/assets/minds/MindsTab_Minds.png rename to docs 2/assets/minds/MindsTab_Minds.png diff --git a/docs/assets/minds/MindsWorkflow.png b/docs 2/assets/minds/MindsWorkflow.png similarity index 100% rename from docs/assets/minds/MindsWorkflow.png rename to docs 2/assets/minds/MindsWorkflow.png diff --git a/docs/assets/minds/NewMind_Minds.png b/docs 2/assets/minds/NewMind_Minds.png similarity index 100% rename from docs/assets/minds/NewMind_Minds.png rename to docs 2/assets/minds/NewMind_Minds.png diff --git a/docs/assets/minds/Playground_Mind.png b/docs 2/assets/minds/Playground_Mind.png similarity index 100% rename from docs/assets/minds/Playground_Mind.png rename to docs 2/assets/minds/Playground_Mind.png diff --git a/docs/assets/minds/PreviewData_Minds.png b/docs 2/assets/minds/PreviewData_Minds.png similarity index 100% rename from docs/assets/minds/PreviewData_Minds.png rename to docs 2/assets/minds/PreviewData_Minds.png diff --git a/docs/assets/mindsdb-editor.png b/docs 2/assets/mindsdb-editor.png similarity index 100% rename from docs/assets/mindsdb-editor.png rename to docs 2/assets/mindsdb-editor.png diff --git a/docs/assets/mindsdb-fqe.png b/docs 2/assets/mindsdb-fqe.png similarity index 100% rename from docs/assets/mindsdb-fqe.png rename to docs 2/assets/mindsdb-fqe.png diff --git a/docs/assets/mindsdb_gui_editor/create_model_1.png b/docs 2/assets/mindsdb_gui_editor/create_model_1.png similarity index 100% rename from docs/assets/mindsdb_gui_editor/create_model_1.png rename to docs 2/assets/mindsdb_gui_editor/create_model_1.png diff --git a/docs/assets/mindsdb_gui_editor/create_model_2.png b/docs 2/assets/mindsdb_gui_editor/create_model_2.png similarity index 100% rename from docs/assets/mindsdb_gui_editor/create_model_2.png rename to docs 2/assets/mindsdb_gui_editor/create_model_2.png diff --git a/docs/assets/mindsdb_gui_editor/mindsdb_editor.png b/docs 2/assets/mindsdb_gui_editor/mindsdb_editor.png similarity index 100% rename from docs/assets/mindsdb_gui_editor/mindsdb_editor.png rename to docs 2/assets/mindsdb_gui_editor/mindsdb_editor.png diff --git a/docs/assets/mindsdb_gui_editor/multiple_query_editor.png b/docs 2/assets/mindsdb_gui_editor/multiple_query_editor.png similarity index 100% rename from docs/assets/mindsdb_gui_editor/multiple_query_editor.png rename to docs 2/assets/mindsdb_gui_editor/multiple_query_editor.png diff --git a/docs/assets/mindsdb_gui_editor/object_explorer.png b/docs 2/assets/mindsdb_gui_editor/object_explorer.png similarity index 100% rename from docs/assets/mindsdb_gui_editor/object_explorer.png rename to docs 2/assets/mindsdb_gui_editor/object_explorer.png diff --git a/docs/assets/mindsdb_gui_editor/object_explorer_query.png b/docs 2/assets/mindsdb_gui_editor/object_explorer_query.png similarity index 100% rename from docs/assets/mindsdb_gui_editor/object_explorer_query.png rename to docs 2/assets/mindsdb_gui_editor/object_explorer_query.png diff --git a/docs/assets/mindsdb_gui_editor/query_editor.png b/docs 2/assets/mindsdb_gui_editor/query_editor.png similarity index 100% rename from docs/assets/mindsdb_gui_editor/query_editor.png rename to docs 2/assets/mindsdb_gui_editor/query_editor.png diff --git a/docs/assets/mindsdb_gui_editor/results_viewer.png b/docs 2/assets/mindsdb_gui_editor/results_viewer.png similarity index 100% rename from docs/assets/mindsdb_gui_editor/results_viewer.png rename to docs 2/assets/mindsdb_gui_editor/results_viewer.png diff --git a/docs/assets/mindsdb_gui_respond.png b/docs 2/assets/mindsdb_gui_respond.png similarity index 100% rename from docs/assets/mindsdb_gui_respond.png rename to docs 2/assets/mindsdb_gui_respond.png diff --git a/docs/assets/mindsdb_gui_respond_agents.png b/docs 2/assets/mindsdb_gui_respond_agents.png similarity index 100% rename from docs/assets/mindsdb_gui_respond_agents.png rename to docs 2/assets/mindsdb_gui_respond_agents.png diff --git a/docs/assets/mindsdb_gui_respond_chat.png b/docs 2/assets/mindsdb_gui_respond_chat.png similarity index 100% rename from docs/assets/mindsdb_gui_respond_chat.png rename to docs 2/assets/mindsdb_gui_respond_chat.png diff --git a/docs/assets/model-management.png b/docs 2/assets/model-management.png similarity index 100% rename from docs/assets/model-management.png rename to docs 2/assets/model-management.png diff --git a/docs/assets/pythonsdk_install_output.png b/docs 2/assets/pythonsdk_install_output.png similarity index 100% rename from docs/assets/pythonsdk_install_output.png rename to docs 2/assets/pythonsdk_install_output.png diff --git a/docs/assets/sql/data-insights-1.png b/docs 2/assets/sql/data-insights-1.png similarity index 100% rename from docs/assets/sql/data-insights-1.png rename to docs 2/assets/sql/data-insights-1.png diff --git a/docs/assets/sql/data-insights-2.png b/docs 2/assets/sql/data-insights-2.png similarity index 100% rename from docs/assets/sql/data-insights-2.png rename to docs 2/assets/sql/data-insights-2.png diff --git a/docs/assets/sql/data-insights-3.png b/docs 2/assets/sql/data-insights-3.png similarity index 100% rename from docs/assets/sql/data-insights-3.png rename to docs 2/assets/sql/data-insights-3.png diff --git a/docs/assets/sql/data-insights-4.png b/docs 2/assets/sql/data-insights-4.png similarity index 100% rename from docs/assets/sql/data-insights-4.png rename to docs 2/assets/sql/data-insights-4.png diff --git a/docs/assets/sql/data-insights-5.png b/docs 2/assets/sql/data-insights-5.png similarity index 100% rename from docs/assets/sql/data-insights-5.png rename to docs 2/assets/sql/data-insights-5.png diff --git a/docs/assets/sql/data-insights-6.png b/docs 2/assets/sql/data-insights-6.png similarity index 100% rename from docs/assets/sql/data-insights-6.png rename to docs 2/assets/sql/data-insights-6.png diff --git a/docs/assets/sql/dbeaver_1.png b/docs 2/assets/sql/dbeaver_1.png similarity index 100% rename from docs/assets/sql/dbeaver_1.png rename to docs 2/assets/sql/dbeaver_1.png diff --git a/docs/assets/sql/dbeaver_2.png b/docs 2/assets/sql/dbeaver_2.png similarity index 100% rename from docs/assets/sql/dbeaver_2.png rename to docs 2/assets/sql/dbeaver_2.png diff --git a/docs/assets/sql/dbeaver_3.png b/docs 2/assets/sql/dbeaver_3.png similarity index 100% rename from docs/assets/sql/dbeaver_3.png rename to docs 2/assets/sql/dbeaver_3.png diff --git a/docs/assets/sql/dbeaver_4.png b/docs 2/assets/sql/dbeaver_4.png similarity index 100% rename from docs/assets/sql/dbeaver_4.png rename to docs 2/assets/sql/dbeaver_4.png diff --git a/docs/assets/sql/dbeaver_5.png b/docs 2/assets/sql/dbeaver_5.png similarity index 100% rename from docs/assets/sql/dbeaver_5.png rename to docs 2/assets/sql/dbeaver_5.png diff --git a/docs/assets/sql/grafana_1.png b/docs 2/assets/sql/grafana_1.png similarity index 100% rename from docs/assets/sql/grafana_1.png rename to docs 2/assets/sql/grafana_1.png diff --git a/docs/assets/sql/grafana_2.png b/docs 2/assets/sql/grafana_2.png similarity index 100% rename from docs/assets/sql/grafana_2.png rename to docs 2/assets/sql/grafana_2.png diff --git a/docs/assets/sql/grafana_3.png b/docs 2/assets/sql/grafana_3.png similarity index 100% rename from docs/assets/sql/grafana_3.png rename to docs 2/assets/sql/grafana_3.png diff --git a/docs/assets/sql/grafana_4.png b/docs 2/assets/sql/grafana_4.png similarity index 100% rename from docs/assets/sql/grafana_4.png rename to docs 2/assets/sql/grafana_4.png diff --git a/docs/assets/sql/grafana_5.png b/docs 2/assets/sql/grafana_5.png similarity index 100% rename from docs/assets/sql/grafana_5.png rename to docs 2/assets/sql/grafana_5.png diff --git a/docs/assets/sql/income_vs_debt.png b/docs 2/assets/sql/income_vs_debt.png similarity index 100% rename from docs/assets/sql/income_vs_debt.png rename to docs 2/assets/sql/income_vs_debt.png diff --git a/docs/assets/sql/income_vs_debt_known_value.png b/docs 2/assets/sql/income_vs_debt_known_value.png similarity index 100% rename from docs/assets/sql/income_vs_debt_known_value.png rename to docs 2/assets/sql/income_vs_debt_known_value.png diff --git a/docs/assets/sql/income_vs_debt_prediction.png b/docs 2/assets/sql/income_vs_debt_prediction.png similarity index 100% rename from docs/assets/sql/income_vs_debt_prediction.png rename to docs 2/assets/sql/income_vs_debt_prediction.png diff --git a/docs/assets/sql/income_vs_debt_predictor.png b/docs 2/assets/sql/income_vs_debt_predictor.png similarity index 100% rename from docs/assets/sql/income_vs_debt_predictor.png rename to docs 2/assets/sql/income_vs_debt_predictor.png diff --git a/docs/assets/sql/income_vs_debt_unknown_value.png b/docs 2/assets/sql/income_vs_debt_unknown_value.png similarity index 100% rename from docs/assets/sql/income_vs_debt_unknown_value.png rename to docs 2/assets/sql/income_vs_debt_unknown_value.png diff --git a/docs/assets/sql/kb_retrieval_example1.png b/docs 2/assets/sql/kb_retrieval_example1.png similarity index 100% rename from docs/assets/sql/kb_retrieval_example1.png rename to docs 2/assets/sql/kb_retrieval_example1.png diff --git a/docs/assets/sql/kb_retrieval_example2.png b/docs 2/assets/sql/kb_retrieval_example2.png similarity index 100% rename from docs/assets/sql/kb_retrieval_example2.png rename to docs 2/assets/sql/kb_retrieval_example2.png diff --git a/docs/assets/sql/kb_retrieval_example3.png b/docs 2/assets/sql/kb_retrieval_example3.png similarity index 100% rename from docs/assets/sql/kb_retrieval_example3.png rename to docs 2/assets/sql/kb_retrieval_example3.png diff --git a/docs/assets/sql/tutorials/customer_churn/.gitkeep b/docs 2/assets/sql/tutorials/customer_churn/.gitkeep similarity index 100% rename from docs/assets/sql/tutorials/customer_churn/.gitkeep rename to docs 2/assets/sql/tutorials/customer_churn/.gitkeep diff --git a/docs/assets/sql/tutorials/generating_images_1.png b/docs 2/assets/sql/tutorials/generating_images_1.png similarity index 100% rename from docs/assets/sql/tutorials/generating_images_1.png rename to docs 2/assets/sql/tutorials/generating_images_1.png diff --git a/docs/assets/sql/tutorials/generating_images_2.png b/docs 2/assets/sql/tutorials/generating_images_2.png similarity index 100% rename from docs/assets/sql/tutorials/generating_images_2.png rename to docs 2/assets/sql/tutorials/generating_images_2.png diff --git a/docs/assets/sql/upload_file1.png b/docs 2/assets/sql/upload_file1.png similarity index 100% rename from docs/assets/sql/upload_file1.png rename to docs 2/assets/sql/upload_file1.png diff --git a/docs/assets/sql/upload_file2.png b/docs 2/assets/sql/upload_file2.png similarity index 100% rename from docs/assets/sql/upload_file2.png rename to docs 2/assets/sql/upload_file2.png diff --git a/docs/assets/sql/use.png b/docs 2/assets/sql/use.png similarity index 100% rename from docs/assets/sql/use.png rename to docs 2/assets/sql/use.png diff --git a/docs/assets/supported_integrations.png b/docs 2/assets/supported_integrations.png similarity index 100% rename from docs/assets/supported_integrations.png rename to docs 2/assets/supported_integrations.png diff --git a/docs/assets/tutorials/crops/.gitkeep b/docs 2/assets/tutorials/crops/.gitkeep similarity index 100% rename from docs/assets/tutorials/crops/.gitkeep rename to docs 2/assets/tutorials/crops/.gitkeep diff --git a/docs/assets/tutorials/llm-chatbot-ui/chat.png b/docs 2/assets/tutorials/llm-chatbot-ui/chat.png similarity index 100% rename from docs/assets/tutorials/llm-chatbot-ui/chat.png rename to docs 2/assets/tutorials/llm-chatbot-ui/chat.png diff --git a/docs/assets/tutorials/llm-chatbot-ui/prompt.png b/docs 2/assets/tutorials/llm-chatbot-ui/prompt.png similarity index 100% rename from docs/assets/tutorials/llm-chatbot-ui/prompt.png rename to docs 2/assets/tutorials/llm-chatbot-ui/prompt.png diff --git a/docs/assets/tutorials/llm-chatbot-ui/publish.png b/docs 2/assets/tutorials/llm-chatbot-ui/publish.png similarity index 100% rename from docs/assets/tutorials/llm-chatbot-ui/publish.png rename to docs 2/assets/tutorials/llm-chatbot-ui/publish.png diff --git a/docs/assets/tutorials/llm-chatbot-ui/settings.png b/docs 2/assets/tutorials/llm-chatbot-ui/settings.png similarity index 100% rename from docs/assets/tutorials/llm-chatbot-ui/settings.png rename to docs 2/assets/tutorials/llm-chatbot-ui/settings.png diff --git a/docs/assets/tutorials/llm-chatbot-ui/settings2.png b/docs 2/assets/tutorials/llm-chatbot-ui/settings2.png similarity index 100% rename from docs/assets/tutorials/llm-chatbot-ui/settings2.png rename to docs 2/assets/tutorials/llm-chatbot-ui/settings2.png diff --git a/docs/assets/tutorials/llm-chatbot-ui/slack-chat.png b/docs 2/assets/tutorials/llm-chatbot-ui/slack-chat.png similarity index 100% rename from docs/assets/tutorials/llm-chatbot-ui/slack-chat.png rename to docs 2/assets/tutorials/llm-chatbot-ui/slack-chat.png diff --git a/docs/assets/tutorials/llm-chatbot-ui/slack.png b/docs 2/assets/tutorials/llm-chatbot-ui/slack.png similarity index 100% rename from docs/assets/tutorials/llm-chatbot-ui/slack.png rename to docs 2/assets/tutorials/llm-chatbot-ui/slack.png diff --git a/docs/assets/tutorials/llm-chatbot-ui/welcome.png b/docs 2/assets/tutorials/llm-chatbot-ui/welcome.png similarity index 100% rename from docs/assets/tutorials/llm-chatbot-ui/welcome.png rename to docs 2/assets/tutorials/llm-chatbot-ui/welcome.png diff --git a/docs/assets/tutorials/monkeylearn/.gitkeep b/docs 2/assets/tutorials/monkeylearn/.gitkeep similarity index 100% rename from docs/assets/tutorials/monkeylearn/.gitkeep rename to docs 2/assets/tutorials/monkeylearn/.gitkeep diff --git a/docs/assets/tutorials/monkeylearn/1.create_ml.png b/docs 2/assets/tutorials/monkeylearn/1.create_ml.png similarity index 100% rename from docs/assets/tutorials/monkeylearn/1.create_ml.png rename to docs 2/assets/tutorials/monkeylearn/1.create_ml.png diff --git a/docs/assets/tutorials/monkeylearn/10.select_prediction.png b/docs 2/assets/tutorials/monkeylearn/10.select_prediction.png similarity index 100% rename from docs/assets/tutorials/monkeylearn/10.select_prediction.png rename to docs 2/assets/tutorials/monkeylearn/10.select_prediction.png diff --git a/docs/assets/tutorials/monkeylearn/4.describe.png b/docs 2/assets/tutorials/monkeylearn/4.describe.png similarity index 100% rename from docs/assets/tutorials/monkeylearn/4.describe.png rename to docs 2/assets/tutorials/monkeylearn/4.describe.png diff --git a/docs/assets/tutorials/monkeylearn/5.select_prediction.png b/docs 2/assets/tutorials/monkeylearn/5.select_prediction.png similarity index 100% rename from docs/assets/tutorials/monkeylearn/5.select_prediction.png rename to docs 2/assets/tutorials/monkeylearn/5.select_prediction.png diff --git a/docs/assets/tutorials/monkeylearn/createmodel1.png b/docs 2/assets/tutorials/monkeylearn/createmodel1.png similarity index 100% rename from docs/assets/tutorials/monkeylearn/createmodel1.png rename to docs 2/assets/tutorials/monkeylearn/createmodel1.png diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console-new-DS.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console-new-DS.png similarity index 100% rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console-new-DS.png rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console-new-DS.png diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console.png similarity index 100% rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console.png rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console.png diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-CS-SQL.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-CS-SQL.png similarity index 100% rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-CS-SQL.png rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-CS-SQL.png diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-IP-allow.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-IP-allow.png similarity index 100% rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-IP-allow.png rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-IP-allow.png diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-sql.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-sql.png similarity index 100% rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-sql.png rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-sql.png diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-new-DS-SQL.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-new-DS-SQL.png similarity index 100% rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-new-DS-SQL.png rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-new-DS-SQL.png diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-sky-allowlist.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-sky-allowlist.png similarity index 100% rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-sky-allowlist.png rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-sky-allowlist.png diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-testing-live-twitter.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-testing-live-twitter.png similarity index 100% rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-testing-live-twitter.png rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-testing-live-twitter.png diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-snoopstien.png b/docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-snoopstien.png similarity index 100% rename from docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-snoopstien.png rename to docs 2/assets/tutorials/twitter_chatbot/Twitter-chatbot-snoopstien.png diff --git a/docs/assets/tutorials/twitter_chatbot/animated-gif-skysql-service-create.gif b/docs 2/assets/tutorials/twitter_chatbot/animated-gif-skysql-service-create.gif similarity index 100% rename from docs/assets/tutorials/twitter_chatbot/animated-gif-skysql-service-create.gif rename to docs 2/assets/tutorials/twitter_chatbot/animated-gif-skysql-service-create.gif diff --git a/docs/assets/tutorials/twitter_chatbot/mariadb-sky-connect.gif b/docs 2/assets/tutorials/twitter_chatbot/mariadb-sky-connect.gif similarity index 100% rename from docs/assets/tutorials/twitter_chatbot/mariadb-sky-connect.gif rename to docs 2/assets/tutorials/twitter_chatbot/mariadb-sky-connect.gif diff --git a/docs/assets/twilio-chatbot-diagram.png b/docs 2/assets/twilio-chatbot-diagram.png similarity index 100% rename from docs/assets/twilio-chatbot-diagram.png rename to docs 2/assets/twilio-chatbot-diagram.png diff --git a/docs/assets/twilio-chatbot-response.png b/docs 2/assets/twilio-chatbot-response.png similarity index 100% rename from docs/assets/twilio-chatbot-response.png rename to docs 2/assets/twilio-chatbot-response.png diff --git a/docs/assets/twilio-image-model-image.png b/docs 2/assets/twilio-image-model-image.png similarity index 100% rename from docs/assets/twilio-image-model-image.png rename to docs 2/assets/twilio-image-model-image.png diff --git a/docs/assets/twilio-image-model-response.png b/docs 2/assets/twilio-image-model-response.png similarity index 100% rename from docs/assets/twilio-image-model-response.png rename to docs 2/assets/twilio-image-model-response.png diff --git a/docs/assets/twilio-text-model-response.png b/docs 2/assets/twilio-text-model-response.png similarity index 100% rename from docs/assets/twilio-text-model-response.png rename to docs 2/assets/twilio-text-model-response.png diff --git a/docs/assets/upload_custom_function.png b/docs 2/assets/upload_custom_function.png similarity index 100% rename from docs/assets/upload_custom_function.png rename to docs 2/assets/upload_custom_function.png diff --git a/docs/assets/upload_custom_function2.png b/docs 2/assets/upload_custom_function2.png similarity index 100% rename from docs/assets/upload_custom_function2.png rename to docs 2/assets/upload_custom_function2.png diff --git a/docs/assets/upload_custom_function_empty_form.png b/docs 2/assets/upload_custom_function_empty_form.png similarity index 100% rename from docs/assets/upload_custom_function_empty_form.png rename to docs 2/assets/upload_custom_function_empty_form.png diff --git a/docs/assets/use_cases/ai_agents.jpg b/docs 2/assets/use_cases/ai_agents.jpg similarity index 100% rename from docs/assets/use_cases/ai_agents.jpg rename to docs 2/assets/use_cases/ai_agents.jpg diff --git a/docs/assets/use_cases/ai_workflow_automation.jpg b/docs 2/assets/use_cases/ai_workflow_automation.jpg similarity index 100% rename from docs/assets/use_cases/ai_workflow_automation.jpg rename to docs 2/assets/use_cases/ai_workflow_automation.jpg diff --git a/docs/assets/use_cases/aipowered_data_retrieval.jpg b/docs 2/assets/use_cases/aipowered_data_retrieval.jpg similarity index 100% rename from docs/assets/use_cases/aipowered_data_retrieval.jpg rename to docs 2/assets/use_cases/aipowered_data_retrieval.jpg diff --git a/docs/assets/use_cases/automated_finetuning.jpg b/docs 2/assets/use_cases/automated_finetuning.jpg similarity index 100% rename from docs/assets/use_cases/automated_finetuning.jpg rename to docs 2/assets/use_cases/automated_finetuning.jpg diff --git a/docs/assets/use_cases/data_enrichment.jpg b/docs 2/assets/use_cases/data_enrichment.jpg similarity index 100% rename from docs/assets/use_cases/data_enrichment.jpg rename to docs 2/assets/use_cases/data_enrichment.jpg diff --git a/docs/assets/use_cases/indatabase_ml.jpg b/docs 2/assets/use_cases/indatabase_ml.jpg similarity index 100% rename from docs/assets/use_cases/indatabase_ml.jpg rename to docs 2/assets/use_cases/indatabase_ml.jpg diff --git a/docs/assets/use_cases/predictive_analytics.jpg b/docs 2/assets/use_cases/predictive_analytics.jpg similarity index 100% rename from docs/assets/use_cases/predictive_analytics.jpg rename to docs 2/assets/use_cases/predictive_analytics.jpg diff --git a/docs/callbacks.mdx b/docs 2/callbacks.mdx similarity index 100% rename from docs/callbacks.mdx rename to docs 2/callbacks.mdx diff --git a/docs/contribute/app-handlers.mdx b/docs 2/contribute/app-handlers.mdx similarity index 98% rename from docs/contribute/app-handlers.mdx rename to docs 2/contribute/app-handlers.mdx index 0c0a24639e1..040d3e2bc37 100644 --- a/docs/contribute/app-handlers.mdx +++ b/docs 2/contribute/app-handlers.mdx @@ -118,13 +118,13 @@ Here is a step-by-step guide: The `native_query()` method runs commands of the native API syntax. ```py - def native_query(self, query: Any) -> HandlerResponse: + def native_query(self, query: Any) -> TableResponse | OkResponse | ErrorResponse: """Receive raw query and act upon it somehow. Args: query (Any): query in native format (str for sql databases, api's json etc) Returns: - HandlerResponse + TableResponse | OkResponse | ErrorResponse """ ``` diff --git a/docs/contribute/community.mdx b/docs 2/contribute/community.mdx similarity index 100% rename from docs/contribute/community.mdx rename to docs 2/contribute/community.mdx diff --git a/docs/contribute/contribute.mdx b/docs 2/contribute/contribute.mdx similarity index 100% rename from docs/contribute/contribute.mdx rename to docs 2/contribute/contribute.mdx diff --git a/docs/contribute/data-handlers.mdx b/docs 2/contribute/data-handlers.mdx similarity index 72% rename from docs/contribute/data-handlers.mdx rename to docs 2/contribute/data-handlers.mdx index ca796627a7c..cb13aa0621d 100644 --- a/docs/contribute/data-handlers.mdx +++ b/docs 2/contribute/data-handlers.mdx @@ -45,7 +45,15 @@ Authors can opt for adding private methods, new files and folders, or any combin Under the `mindsdb.integrations.libs.utils` library, contributors can find various methods that may be useful while implementing new handlers. - Also, there are wrapper classes for the `DatabaseHandler` instances called [HandlerResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py#L7) and [HandlerStatusResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py#L32). You should use them to ensure proper output formatting. + For response formatting, use the following classes from `mindsdb.integrations.libs.response`: + - [TableResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py) - for queries returning data (SELECT, SHOW, etc.) + - [OkResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py) - for successful operations without data (CREATE, DROP, INSERT, etc.) + - [ErrorResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py) - for error cases + - [HandlerStatusResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py) - for connection status checks + + + The legacy `HandlerResponse` class is deprecated. Use `TableResponse`, `OkResponse`, or `ErrorResponse` instead. + ### Implementation @@ -124,13 +132,13 @@ Here is a step-by-step guide: The `native_query()` method runs commands of the native database language. ```py - def native_query(self, query: Any) -> HandlerResponse: + def native_query(self, query: Any) -> TableResponse | OkResponse | ErrorResponse: """Receive raw query and act upon it somehow. Args: query (Any): query in native format (str for sql databases, etc) Returns: - HandlerResponse + TableResponse | OkResponse | ErrorResponse """ ``` @@ -139,13 +147,13 @@ Here is a step-by-step guide: The query method runs parsed SQL commands. ```py - def query(self, query: ASTNode) -> HandlerResponse: + def query(self, query: ASTNode) -> TableResponse | OkResponse | ErrorResponse: """Receive query as AST (abstract syntax tree) and act upon it somehow. Args: query (ASTNode): sql query represented as AST. May be any kind of query: SELECT, INSERT, DELETE, etc Returns: - HandlerResponse + TableResponse | OkResponse | ErrorResponse """ ``` @@ -154,11 +162,11 @@ Here is a step-by-step guide: The `get_tables()` method lists all the available tables. ```py - def get_tables(self) -> HandlerResponse: + def get_tables(self) -> TableResponse | ErrorResponse: """ Return list of entities Return a list of entities that will be accessible as tables. Returns: - HandlerResponse: should have the same columns as information_schema.tables + TableResponse | ErrorResponse: should have the same columns as information_schema.tables (https://dev.mysql.com/doc/refman/8.0/en/information-schema-tables-table.html) Column 'TABLE_NAME' is mandatory, other is optional. """ @@ -169,12 +177,12 @@ Here is a step-by-step guide: The `get_columns()` method lists all columns of a specified table. ```py - def get_columns(self, table_name: str) -> HandlerResponse: + def get_columns(self, table_name: str) -> TableResponse | ErrorResponse: """ Returns a list of entity columns Args: table_name (str): name of one of tables returned by self.get_tables() Returns: - HandlerResponse: should have the same columns as information_schema.columns + TableResponse | ErrorResponse: data should have the same columns as information_schema.columns (https://dev.mysql.com/doc/refman/8.0/en/information-schema-columns-table.html) Column 'COLUMN_NAME' is mandatory, other is optional. Highly recommended to define also 'DATA_TYPE': it should be one of @@ -182,6 +190,112 @@ Here is a step-by-step guide: """ ``` +### Response Classes + +The data-returning methods (`native_query()`, `query()`, `get_tables()`, `get_columns()`) should return one of the following response classes from `mindsdb.integrations.libs.response`: + +| Response Class | Use Case | Key Attributes | +|---------------|----------|----------------| +| `TableResponse` | Queries that return data (SELECT, SHOW, etc.) | `data`, `data_generator`, `columns`, `affected_rows` | +| `OkResponse` | Successful operations without data (CREATE, DROP, INSERT, UPDATE, DELETE) | `affected_rows` | +| `ErrorResponse` | Error cases | `error_code`, `error_message`, `is_expected_error` | + +#### TableResponse + +`TableResponse` is used when returning data from queries. It supports two modes of data delivery: + +1. **Immediate data**: Pass all data at once via the `data` parameter (pandas DataFrame) +2. **Streaming data**: Pass a generator via the `data_generator` parameter for lazy loading + +```py +from mindsdb.integrations.libs.response import TableResponse, OkResponse, ErrorResponse + +# Immediate data response +def native_query(self, query: str) -> TableResponse: + result = self.execute_query(query) + df = pd.DataFrame(result) + return TableResponse(data=df) + +# Streaming data response (for large datasets) +def native_query(self, query: str) -> TableResponse: + def data_generator(): + cursor = self.connection.cursor() + cursor.execute(query) + while batch := cursor.fetchmany(size=1000): + yield pd.DataFrame(batch) + + return TableResponse(data_generator=data_generator()) +``` + +#### OkResponse + +`OkResponse` is used for operations that don't return data: + +```py +def native_query(self, query: str) -> OkResponse: + cursor = self.connection.cursor() + cursor.execute(query) + self.connection.commit() + return OkResponse(affected_rows=cursor.rowcount) +``` + +#### ErrorResponse + +`ErrorResponse` is used to report errors: + +```py +def native_query(self, query: str) -> ErrorResponse: + try: + # ... execute query + except DatabaseError as e: + return ErrorResponse( + error_code=e.code, + error_message=str(e), + is_expected_error=True # Set to True for user errors (syntax, permissions, etc.) + ) +``` + +### Streaming Support + +For handlers that deal with large datasets, implementing streaming support is recommended. This allows data to be returned in chunks rather than loading everything into memory at once. + +To enable streaming: + +1. Set the `stream_response` class attribute to `True`: + + ```py + class MyDatabaseHandler(DatabaseHandler): + name = "mydatabase" + stream_response = True # Indicates that handler can return data as a generator + ``` + +2. Implement `native_query()` to return a `TableResponse` with a `data_generator`: + + ```py + def native_query(self, query: str, stream: bool = True) -> TableResponse | OkResponse | ErrorResponse: + if stream: + return self._execute_streaming(query) + else: + return self._execute_immediate(query) + + def _execute_streaming(self, query: str) -> TableResponse: + """Execute query and return results as a stream.""" + cursor = self.connection.cursor(name="server_side_cursor") + cursor.execute(query) + + columns = [Column(name=col.name, type=col.type) for col in cursor.description] + + def generate_data(): + while batch := cursor.fetchmany(size=1000): + yield pd.DataFrame(batch, columns=[c.name for c in columns]) + + return TableResponse(columns=columns, data_generator=generate_data()) + ``` + + +For a complete example of streaming implementation, see the [PostgreSQL handler](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/postgres_handler/postgres_handler.py). + + ### Exporting the `connection_args` Dictionary The `connection_args` dictionary contains all of the arguments used to establish the connection along with their descriptions, types, labels, and whether they are required or not. diff --git a/docs/contribute/docs.mdx b/docs 2/contribute/docs.mdx similarity index 100% rename from docs/contribute/docs.mdx rename to docs 2/contribute/docs.mdx diff --git a/docs/contribute/install.mdx b/docs 2/contribute/install.mdx similarity index 100% rename from docs/contribute/install.mdx rename to docs 2/contribute/install.mdx diff --git a/docs/contribute/integrations-readme.mdx b/docs 2/contribute/integrations-readme.mdx similarity index 100% rename from docs/contribute/integrations-readme.mdx rename to docs 2/contribute/integrations-readme.mdx diff --git a/docs/contribute/ml-handlers.mdx b/docs 2/contribute/ml-handlers.mdx similarity index 100% rename from docs/contribute/ml-handlers.mdx rename to docs 2/contribute/ml-handlers.mdx diff --git a/docs/contribute/python-coding-standards.mdx b/docs 2/contribute/python-coding-standards.mdx similarity index 100% rename from docs/contribute/python-coding-standards.mdx rename to docs 2/contribute/python-coding-standards.mdx diff --git a/docs/data_catalog/integrations/overview.mdx b/docs 2/data_catalog/integrations/overview.mdx similarity index 100% rename from docs/data_catalog/integrations/overview.mdx rename to docs 2/data_catalog/integrations/overview.mdx diff --git a/docs/data_catalog/integrations/query.mdx b/docs 2/data_catalog/integrations/query.mdx similarity index 100% rename from docs/data_catalog/integrations/query.mdx rename to docs 2/data_catalog/integrations/query.mdx diff --git a/docs/data_catalog/overview.mdx b/docs 2/data_catalog/overview.mdx similarity index 100% rename from docs/data_catalog/overview.mdx rename to docs 2/data_catalog/overview.mdx diff --git a/docs/docs.json b/docs 2/docs.json similarity index 99% rename from docs/docs.json rename to docs 2/docs.json index f0a1b49cb87..ff5a1352eb2 100644 --- a/docs/docs.json +++ b/docs 2/docs.json @@ -1,6 +1,6 @@ { "$schema": "https://mintlify.com/docs.json", - "theme": "willow", + "theme": "almond", "name": "MindsDB", "colors": { "primary": "#3f8394", @@ -13,6 +13,11 @@ { "tab": "Documentation", "groups": [ + { + "group": "Home", + "hidden": true, + "pages": ["homepage"] + }, { "group": "Get Started", "pages": [ @@ -726,7 +731,8 @@ }, "logo": { "light": "/logo/light.svg", - "dark": "/logo/dark.svg" + "dark": "/logo/dark.svg", + "href": "/homepage" }, "api": { "mdx": { @@ -1079,14 +1085,6 @@ "source": "/custom-model/mlflow", "destination": "/integrations/ai-engines/mlflow" }, - { - "source": "/nixtla/statsforecast", - "destination": "/integrations/ai-engines/statsforecast" - }, - { - "source": "/nixtla/house-sales-statsforecast", - "destination": "/sql/tutorials/house-sales-statsforecast" - }, { "source": "/connect/mindsdb_editor", "destination": "/mindsdb_sql/connect/mindsdb_editor" @@ -1371,10 +1369,6 @@ "source": "/sql/tutorials/house-sales-forecasting", "destination": "/use-cases/predictive_analytics/house-sales-forecasting" }, - { - "source": "/sql/tutorials/expenditures-statsforecast", - "destination": "/use-cases/predictive_analytics/expenditures-statsforecast" - }, { "source": "/sql/tutorials/eeg-forecasting", "destination": "/use-cases/predictive_analytics/eeg-forecasting" @@ -1462,6 +1456,14 @@ { "source": "/mcp/cursor_usage", "destination": "/model-context-protocol/cursor_usage" + }, + { + "source": "/use-cases/overview", + "destination": "/homepage" + }, + { + "source": "/", + "destination": "/homepage" } ] } diff --git a/docs/faqs/benefits.mdx b/docs 2/faqs/benefits.mdx similarity index 100% rename from docs/faqs/benefits.mdx rename to docs 2/faqs/benefits.mdx diff --git a/docs/faqs/disposable-email-doman-and-openai.mdx b/docs 2/faqs/disposable-email-doman-and-openai.mdx similarity index 100% rename from docs/faqs/disposable-email-doman-and-openai.mdx rename to docs 2/faqs/disposable-email-doman-and-openai.mdx diff --git a/docs/faqs/mindsdb-with-php.mdx b/docs 2/faqs/mindsdb-with-php.mdx similarity index 100% rename from docs/faqs/mindsdb-with-php.mdx rename to docs 2/faqs/mindsdb-with-php.mdx diff --git a/docs/faqs/missing-required-cpu-features.mdx b/docs 2/faqs/missing-required-cpu-features.mdx similarity index 100% rename from docs/faqs/missing-required-cpu-features.mdx rename to docs 2/faqs/missing-required-cpu-features.mdx diff --git a/docs/faqs/persist-predictions.mdx b/docs 2/faqs/persist-predictions.mdx similarity index 100% rename from docs/faqs/persist-predictions.mdx rename to docs 2/faqs/persist-predictions.mdx diff --git a/docs/favicon-dark.png b/docs 2/favicon-dark.png similarity index 100% rename from docs/favicon-dark.png rename to docs 2/favicon-dark.png diff --git a/docs/favicon.png b/docs 2/favicon.png similarity index 100% rename from docs/favicon.png rename to docs 2/favicon.png diff --git a/docs/favicon_old.png b/docs 2/favicon_old.png similarity index 100% rename from docs/favicon_old.png rename to docs 2/favicon_old.png diff --git a/docs/features/ai-integrations.mdx b/docs 2/features/ai-integrations.mdx similarity index 100% rename from docs/features/ai-integrations.mdx rename to docs 2/features/ai-integrations.mdx diff --git a/docs/features/automation.mdx b/docs 2/features/automation.mdx similarity index 100% rename from docs/features/automation.mdx rename to docs 2/features/automation.mdx diff --git a/docs/features/data-integrations.mdx b/docs 2/features/data-integrations.mdx similarity index 100% rename from docs/features/data-integrations.mdx rename to docs 2/features/data-integrations.mdx diff --git a/docs/features/model-management.mdx b/docs 2/features/model-management.mdx similarity index 100% rename from docs/features/model-management.mdx rename to docs 2/features/model-management.mdx diff --git a/docs/generative-ai-tables.mdx b/docs 2/generative-ai-tables.mdx similarity index 100% rename from docs/generative-ai-tables.mdx rename to docs 2/generative-ai-tables.mdx diff --git a/docs 2/homepage.mdx b/docs 2/homepage.mdx new file mode 100644 index 00000000000..65a14624dc1 --- /dev/null +++ b/docs 2/homepage.mdx @@ -0,0 +1,29 @@ +--- +mode: "custom" +--- + +
+

+ Documentation +

+

+ Everything you need to get up and running. Choose a path below to get started. +

+
+ +
+
+ + I want to use the Query Engine to build AI features that learn from my data and answer questions intelligently. +



+ Go to MindsDB Documentation. +
+
+
+ + I want to use MindsDB Anton, an AI coworker, to explore my data and uncover insights β€” no code required. +



+ Go to Anton GitHub repository. +
+
+
diff --git a/docs/integrations/ai-engines/amazon-bedrock.mdx b/docs 2/integrations/ai-engines/amazon-bedrock.mdx similarity index 100% rename from docs/integrations/ai-engines/amazon-bedrock.mdx rename to docs 2/integrations/ai-engines/amazon-bedrock.mdx diff --git a/docs/integrations/ai-engines/anomaly.mdx b/docs 2/integrations/ai-engines/anomaly.mdx similarity index 100% rename from docs/integrations/ai-engines/anomaly.mdx rename to docs 2/integrations/ai-engines/anomaly.mdx diff --git a/docs/integrations/ai-engines/anthropic.mdx b/docs 2/integrations/ai-engines/anthropic.mdx similarity index 100% rename from docs/integrations/ai-engines/anthropic.mdx rename to docs 2/integrations/ai-engines/anthropic.mdx diff --git a/docs/integrations/ai-engines/autokeras.mdx b/docs 2/integrations/ai-engines/autokeras.mdx similarity index 100% rename from docs/integrations/ai-engines/autokeras.mdx rename to docs 2/integrations/ai-engines/autokeras.mdx diff --git a/docs/integrations/ai-engines/autosklearn.mdx b/docs 2/integrations/ai-engines/autosklearn.mdx similarity index 100% rename from docs/integrations/ai-engines/autosklearn.mdx rename to docs 2/integrations/ai-engines/autosklearn.mdx diff --git a/docs/integrations/ai-engines/byom.mdx b/docs 2/integrations/ai-engines/byom.mdx similarity index 91% rename from docs/integrations/ai-engines/byom.mdx rename to docs 2/integrations/ai-engines/byom.mdx index 736af426317..ffdc564ff57 100644 --- a/docs/integrations/ai-engines/byom.mdx +++ b/docs 2/integrations/ai-engines/byom.mdx @@ -25,7 +25,7 @@ Let's briefly go over the files that need to be uploaded: ```py class CustomPredictor(): - ​ + def train(self, df, target_col, args=None): return '' @@ -39,38 +39,41 @@ Let's briefly go over the files that need to be uploaded: ```py import os import pandas as pd - ​ + from sklearn.cross_decomposition import PLSRegression from sklearn import preprocessing - ​ + class CustomPredictor(): - ​ + def train(self, df, target_col, args=None): print(args, '1111') - ​ + self.target_col = target_col y = df[self.target_col] x = df.drop(columns=self.target_col) x_cols = list(x.columns) - ​ + x_scaler = preprocessing.StandardScaler().fit(x) y_scaler = preprocessing.StandardScaler().fit(y.values.reshape(-1, 1)) - ​ + xs = x_scaler.transform(x) ys = y_scaler.transform(y.values.reshape(-1, 1)) - ​ + pls = PLSRegression(n_components=1) pls.fit(xs, ys) - ​ + + self.pls = pls + self.y_scaler = y_scaler + T = pls.x_scores_ W = pls.x_weights_ P = pls.x_loadings_ R = pls.x_rotations_ - ​ + self.x_cols = x_cols self.x_scaler = x_scaler self.P = P - ​ + def calc_limit(df): res = None for column in df.columns: @@ -89,32 +92,32 @@ Let's briefly go over the files that need to be uploaded: except: res = tbl return res - ​ + trdf = pd.DataFrame() trdf[self.target_col] = y.values trdf['T1'] = T.squeeze() limit = calc_limit(trdf).reset_index() - ​ + self.limit = limit - ​ + return "Trained predictor ready to be stored" - ​ + def predict(self, df): - ​ - yt = df[self.target_col].values + + xt = df[self.x_cols] - ​ + xt = self.x_scaler.transform(xt) - ​ + excess_cols = list(set(df.columns) - set(self.x_cols)) - ​ + pred_df = df[excess_cols].copy() - ​ - pred_df[self.target_col] = yt + + ys_pred = self.pls.predict(xt) + y_pred = self.y_scaler.inverse_transform(ys_pred).ravel() + pred_df[self.target_col] = y_pred + pred_df['T1'] = (xt @ self.P).squeeze() - ​ - pred_df = pd.merge(pred_df, self.limit[[self.target_col, 'lower', 'upper']], how='left', on=self.target_col) - ​ return pred_df ``` @@ -195,12 +198,14 @@ USING ENGINE = 'custom_model_engine'; ``` -Let's query for predictions by joining the custom model with the data table. +Let's query for predictions by joining the custom model with the data table. Please note that when querying for predictions, do not include the target column in the `input` data selection. ```sql -SELECT input.feature_column, model_target_column -FROM my_integration.my_table as input -JOIN custom_model as model; +SELECT + input.feature_column, + model.target AS predicted_target +FROM my_integration.my_table AS input +JOIN custom_model AS model; ``` diff --git a/docs/integrations/ai-engines/clipdrop.mdx b/docs 2/integrations/ai-engines/clipdrop.mdx similarity index 100% rename from docs/integrations/ai-engines/clipdrop.mdx rename to docs 2/integrations/ai-engines/clipdrop.mdx diff --git a/docs/integrations/ai-engines/cohere.mdx b/docs 2/integrations/ai-engines/cohere.mdx similarity index 100% rename from docs/integrations/ai-engines/cohere.mdx rename to docs 2/integrations/ai-engines/cohere.mdx diff --git a/docs/integrations/ai-engines/google_gemini.mdx b/docs 2/integrations/ai-engines/google_gemini.mdx similarity index 100% rename from docs/integrations/ai-engines/google_gemini.mdx rename to docs 2/integrations/ai-engines/google_gemini.mdx diff --git a/docs/integrations/ai-engines/huggingface.mdx b/docs 2/integrations/ai-engines/huggingface.mdx similarity index 100% rename from docs/integrations/ai-engines/huggingface.mdx rename to docs 2/integrations/ai-engines/huggingface.mdx diff --git a/docs/integrations/ai-engines/huggingface_inference_api.mdx b/docs 2/integrations/ai-engines/huggingface_inference_api.mdx similarity index 100% rename from docs/integrations/ai-engines/huggingface_inference_api.mdx rename to docs 2/integrations/ai-engines/huggingface_inference_api.mdx diff --git a/docs/integrations/ai-engines/langchain.mdx b/docs 2/integrations/ai-engines/langchain.mdx similarity index 100% rename from docs/integrations/ai-engines/langchain.mdx rename to docs 2/integrations/ai-engines/langchain.mdx diff --git a/docs/integrations/ai-engines/langchain_embedding.mdx b/docs 2/integrations/ai-engines/langchain_embedding.mdx similarity index 100% rename from docs/integrations/ai-engines/langchain_embedding.mdx rename to docs 2/integrations/ai-engines/langchain_embedding.mdx diff --git a/docs/integrations/ai-engines/lightfm.mdx b/docs 2/integrations/ai-engines/lightfm.mdx similarity index 100% rename from docs/integrations/ai-engines/lightfm.mdx rename to docs 2/integrations/ai-engines/lightfm.mdx diff --git a/docs/integrations/ai-engines/litellm.mdx b/docs 2/integrations/ai-engines/litellm.mdx similarity index 100% rename from docs/integrations/ai-engines/litellm.mdx rename to docs 2/integrations/ai-engines/litellm.mdx diff --git a/docs/integrations/ai-engines/llamaindex.mdx b/docs 2/integrations/ai-engines/llamaindex.mdx similarity index 100% rename from docs/integrations/ai-engines/llamaindex.mdx rename to docs 2/integrations/ai-engines/llamaindex.mdx diff --git a/docs/integrations/ai-engines/merlion.mdx b/docs 2/integrations/ai-engines/merlion.mdx similarity index 100% rename from docs/integrations/ai-engines/merlion.mdx rename to docs 2/integrations/ai-engines/merlion.mdx diff --git a/docs/integrations/ai-engines/mlflow.mdx b/docs 2/integrations/ai-engines/mlflow.mdx similarity index 100% rename from docs/integrations/ai-engines/mlflow.mdx rename to docs 2/integrations/ai-engines/mlflow.mdx diff --git a/docs/integrations/ai-engines/monkeylearn.mdx b/docs 2/integrations/ai-engines/monkeylearn.mdx similarity index 100% rename from docs/integrations/ai-engines/monkeylearn.mdx rename to docs 2/integrations/ai-engines/monkeylearn.mdx diff --git a/docs/integrations/ai-engines/ollama.mdx b/docs 2/integrations/ai-engines/ollama.mdx similarity index 93% rename from docs/integrations/ai-engines/ollama.mdx rename to docs 2/integrations/ai-engines/ollama.mdx index 287738d54b7..9cca48ce604 100644 --- a/docs/integrations/ai-engines/ollama.mdx +++ b/docs 2/integrations/ai-engines/ollama.mdx @@ -28,17 +28,17 @@ Here are the recommended system specifications: Create an AI engine from the [Ollama handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/ollama_handler). ```sql -CREATE ML_ENGINE ollama_engine +CREATE ML_ENGINE ollama FROM ollama; ``` -Create a model using `ollama_engine` as an engine. +Create a model using `ollama` as an engine. ```sql CREATE MODEL ollama_model PREDICT completion USING - engine = 'ollama_engine', -- engine name as created via CREATE ML_ENGINE + engine = 'ollama', -- engine name as created via CREATE ML_ENGINE model_name = 'model-name', -- model run with 'ollama run model-name' ollama_serve_url = 'http://localhost:11434'; ``` @@ -51,7 +51,7 @@ You can find [available models here](https://github.com/ollama/ollama?tab=readme ## Usage -The following usage examples utilize `ollama_engine` to create a model with the `CREATE MODEL` statement. +The following usage examples utilize `ollama` to create a model with the `CREATE MODEL` statement. Deploy and use the `llama3` model. @@ -63,7 +63,7 @@ Now deploy this model within MindsDB. CREATE MODEL llama3_model PREDICT completion USING - engine = 'ollama_engine', + engine = 'ollama', model_name = 'llama3'; ``` diff --git a/docs/integrations/ai-engines/openai.mdx b/docs 2/integrations/ai-engines/openai.mdx similarity index 100% rename from docs/integrations/ai-engines/openai.mdx rename to docs 2/integrations/ai-engines/openai.mdx diff --git a/docs/integrations/ai-engines/popularity-recommender.mdx b/docs 2/integrations/ai-engines/popularity-recommender.mdx similarity index 100% rename from docs/integrations/ai-engines/popularity-recommender.mdx rename to docs 2/integrations/ai-engines/popularity-recommender.mdx diff --git a/docs/integrations/ai-engines/portkey.mdx b/docs 2/integrations/ai-engines/portkey.mdx similarity index 100% rename from docs/integrations/ai-engines/portkey.mdx rename to docs 2/integrations/ai-engines/portkey.mdx diff --git a/docs/integrations/ai-engines/pycaret.mdx b/docs 2/integrations/ai-engines/pycaret.mdx similarity index 100% rename from docs/integrations/ai-engines/pycaret.mdx rename to docs 2/integrations/ai-engines/pycaret.mdx diff --git a/docs/integrations/ai-engines/ray-serve.mdx b/docs 2/integrations/ai-engines/ray-serve.mdx similarity index 100% rename from docs/integrations/ai-engines/ray-serve.mdx rename to docs 2/integrations/ai-engines/ray-serve.mdx diff --git a/docs/integrations/ai-engines/replicate-audio.mdx b/docs 2/integrations/ai-engines/replicate-audio.mdx similarity index 100% rename from docs/integrations/ai-engines/replicate-audio.mdx rename to docs 2/integrations/ai-engines/replicate-audio.mdx diff --git a/docs/integrations/ai-engines/replicate-img2text.mdx b/docs 2/integrations/ai-engines/replicate-img2text.mdx similarity index 100% rename from docs/integrations/ai-engines/replicate-img2text.mdx rename to docs 2/integrations/ai-engines/replicate-img2text.mdx diff --git a/docs/integrations/ai-engines/replicate-llm.mdx b/docs 2/integrations/ai-engines/replicate-llm.mdx similarity index 100% rename from docs/integrations/ai-engines/replicate-llm.mdx rename to docs 2/integrations/ai-engines/replicate-llm.mdx diff --git a/docs/integrations/ai-engines/replicate-text2img.mdx b/docs 2/integrations/ai-engines/replicate-text2img.mdx similarity index 100% rename from docs/integrations/ai-engines/replicate-text2img.mdx rename to docs 2/integrations/ai-engines/replicate-text2img.mdx diff --git a/docs/integrations/ai-engines/replicate-text2video.mdx b/docs 2/integrations/ai-engines/replicate-text2video.mdx similarity index 100% rename from docs/integrations/ai-engines/replicate-text2video.mdx rename to docs 2/integrations/ai-engines/replicate-text2video.mdx diff --git a/docs/integrations/ai-engines/twelvelabs.mdx b/docs 2/integrations/ai-engines/twelvelabs.mdx similarity index 100% rename from docs/integrations/ai-engines/twelvelabs.mdx rename to docs 2/integrations/ai-engines/twelvelabs.mdx diff --git a/docs/integrations/ai-engines/vertex.mdx b/docs 2/integrations/ai-engines/vertex.mdx similarity index 100% rename from docs/integrations/ai-engines/vertex.mdx rename to docs 2/integrations/ai-engines/vertex.mdx diff --git a/docs/integrations/ai-engines/xgboost.mdx b/docs 2/integrations/ai-engines/xgboost.mdx similarity index 100% rename from docs/integrations/ai-engines/xgboost.mdx rename to docs 2/integrations/ai-engines/xgboost.mdx diff --git a/docs/integrations/ai-overview.mdx b/docs 2/integrations/ai-overview.mdx similarity index 100% rename from docs/integrations/ai-overview.mdx rename to docs 2/integrations/ai-overview.mdx diff --git a/docs/integrations/app-integrations/binance.mdx b/docs 2/integrations/app-integrations/binance.mdx similarity index 77% rename from docs/integrations/app-integrations/binance.mdx rename to docs 2/integrations/app-integrations/binance.mdx index 6e622db5c3e..596e8a32531 100644 --- a/docs/integrations/app-integrations/binance.mdx +++ b/docs 2/integrations/app-integrations/binance.mdx @@ -69,51 +69,3 @@ LIMIT 10000; Supported intervals are [listed here](https://binance-docs.github.io/apidocs/spot/en/#kline-candlestick-data) - -### Train a Model - -Here is how to create a time series model using 10000 trading intervals in the past with a duration of 1m. - -```sql -CREATE MODEL mindsdb.btc_forecast_model -FROM my_binance -( - SELECT * FROM aggregated_trade_data - WHERE symbol = 'BTCUSDT' - AND close_time < '2023-01-01' - AND interval = '1m' - LIMIT 10000; -) - -PREDICT open_price - -ORDER BY open_time -WINDOW 100 -HORIZON 10; -``` - - -For more accuracy, the limit can be set to a higher value (e.g. 100,000) - - -### Making Predictions - -First, let's create a view for the most recent BTCUSDT aggregate trade data: - -```sql -CREATE VIEW recent_btcusdt_data AS ( - SELECT * FROM my_binance.aggregated_trade_data - WHERE symbol = 'BTCUSDT' -) -``` - -Now let's predict the future price of BTC: - -```sql -SELECT m.* -FROM recent_btcusdt_data AS t -JOIN mindsdb.btc_forecast_model AS m -WHERE m.open_time > LATEST -``` - -This will give the predicted BTC price for the next 10 minutes (as the horizon is set to 10) in terms of USDT. diff --git a/docs/integrations/app-integrations/confluence.mdx b/docs 2/integrations/app-integrations/confluence.mdx similarity index 100% rename from docs/integrations/app-integrations/confluence.mdx rename to docs 2/integrations/app-integrations/confluence.mdx diff --git a/docs/integrations/app-integrations/dockerhub.mdx b/docs 2/integrations/app-integrations/dockerhub.mdx similarity index 100% rename from docs/integrations/app-integrations/dockerhub.mdx rename to docs 2/integrations/app-integrations/dockerhub.mdx diff --git a/docs/integrations/app-integrations/email.mdx b/docs 2/integrations/app-integrations/email.mdx similarity index 100% rename from docs/integrations/app-integrations/email.mdx rename to docs 2/integrations/app-integrations/email.mdx diff --git a/docs/integrations/app-integrations/github.mdx b/docs 2/integrations/app-integrations/github.mdx similarity index 100% rename from docs/integrations/app-integrations/github.mdx rename to docs 2/integrations/app-integrations/github.mdx diff --git a/docs/integrations/app-integrations/gitlab.mdx b/docs 2/integrations/app-integrations/gitlab.mdx similarity index 100% rename from docs/integrations/app-integrations/gitlab.mdx rename to docs 2/integrations/app-integrations/gitlab.mdx diff --git a/docs/integrations/app-integrations/gmail.mdx b/docs 2/integrations/app-integrations/gmail.mdx similarity index 100% rename from docs/integrations/app-integrations/gmail.mdx rename to docs 2/integrations/app-integrations/gmail.mdx diff --git a/docs/integrations/app-integrations/gong.mdx b/docs 2/integrations/app-integrations/gong.mdx similarity index 100% rename from docs/integrations/app-integrations/gong.mdx rename to docs 2/integrations/app-integrations/gong.mdx diff --git a/docs/integrations/app-integrations/google-analytics.mdx b/docs 2/integrations/app-integrations/google-analytics.mdx similarity index 100% rename from docs/integrations/app-integrations/google-analytics.mdx rename to docs 2/integrations/app-integrations/google-analytics.mdx diff --git a/docs/integrations/app-integrations/google-calendar.mdx b/docs 2/integrations/app-integrations/google-calendar.mdx similarity index 100% rename from docs/integrations/app-integrations/google-calendar.mdx rename to docs 2/integrations/app-integrations/google-calendar.mdx diff --git a/docs/integrations/app-integrations/hackernews.mdx b/docs 2/integrations/app-integrations/hackernews.mdx similarity index 100% rename from docs/integrations/app-integrations/hackernews.mdx rename to docs 2/integrations/app-integrations/hackernews.mdx diff --git a/docs/integrations/app-integrations/hubspot.mdx b/docs 2/integrations/app-integrations/hubspot.mdx similarity index 100% rename from docs/integrations/app-integrations/hubspot.mdx rename to docs 2/integrations/app-integrations/hubspot.mdx diff --git a/docs/integrations/app-integrations/instatus.mdx b/docs 2/integrations/app-integrations/instatus.mdx similarity index 90% rename from docs/integrations/app-integrations/instatus.mdx rename to docs 2/integrations/app-integrations/instatus.mdx index 93624974bd7..65f97e68fc1 100644 --- a/docs/integrations/app-integrations/instatus.mdx +++ b/docs 2/integrations/app-integrations/instatus.mdx @@ -41,7 +41,7 @@ To create a new status page, use the `INSERT` statement: ```sql INSERT INTO mindsdb_instatus.status_pages (email, name, subdomain, components, logoUrl, faviconUrl, websiteUrl, language, useLargeHeader, brandColor, okColor, disruptedColor, degradedColor, downColor, noticeColor, unknownColor, googleAnalytics, subscribeBySms, smsService, twilioSid, twilioToken, twilioSender, nexmoKey, nexmoSecret, nexmoSender, htmlInMeta, htmlAboveHeader, htmlBelowHeader, htmlAboveFooter, htmlBelowFooter, htmlBelowSummary, cssGlobal, launchDate, dateFormat, dateFormatShort, timeFormat) -VALUES ('yourname@gmail.com', 'mindsdb', 'mindsdb-instatus', '["Website", "App", "API"]', 'https://instatus.com/sample.png', 'https://instatus.com/favicon-32x32.png', 'https://instatus.com', 'en', true, '#111', '#33B17E', '#FF8C03', '#ECC94B', '#DC123D', '#70808F', '#DFE0E1', 'UA-00000000-1', true, 'twilio', 'YOUR_TWILIO_SID', 'YOUR_TWILIO_TOKEN', 'YOUR_TWILIO_SENDER', null, null, null, null, null, null, null, null, null, null, 'MMMMMM d, yyyy', 'MMM yyyy', 'p'); +VALUES ('yourname@gmail.com', 'mindsdb', 'mindsdb-instatus', '["Website", "App", "API"]', 'https://instatus.com/sample.png', 'https://instatus.com/favicon-32x32.png', 'https://instatus.com', 'en', 'true', '#111', '#33B17E', '#FF8C03', '#ECC94B', '#DC123D', '#70808F', '#DFE0E1', 'UA-00000000-1', 'true', 'twilio', 'YOUR_TWILIO_SID', 'YOUR_TWILIO_TOKEN', 'YOUR_TWILIO_SENDER', null, null, null, null, null, null, null, null, null, null, null, 'MMMMMM d, yyyy', 'MMM yyyy', 'p'); ``` diff --git a/docs/integrations/app-integrations/intercom.mdx b/docs 2/integrations/app-integrations/intercom.mdx similarity index 100% rename from docs/integrations/app-integrations/intercom.mdx rename to docs 2/integrations/app-integrations/intercom.mdx diff --git a/docs 2/integrations/app-integrations/jira.mdx b/docs 2/integrations/app-integrations/jira.mdx new file mode 100644 index 00000000000..3671aa31af4 --- /dev/null +++ b/docs 2/integrations/app-integrations/jira.mdx @@ -0,0 +1,100 @@ +--- +title: Jira +sidebarTitle: Jira +--- + +This documentation describes the integration of MindsDB with [Jira](https://www.atlassian.com/software/jira/guides/getting-started/introduction), the #1 agile project management tool used by teams to plan, track, release and support world-class software with confidence. +The integration allows MindsDB to access data from Jira and enhance it with AI capabilities. + +## Prerequisites + +Before proceeding, ensure the following prerequisites are met: + +1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). +2. To connect Jira to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). + +## Connection + +Establish a connection to Jira from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/jira_handler) as an engine. + +```sql +CREATE DATABASE jira_datasource +WITH + ENGINE = 'jira', + PARAMETERS = { + "jira_url": "https://example.atlassian.net", + "jira_username": "john.doe@example.com", + "jira_api_token": "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6", + "cloud": true + }; +``` + +Required connection parameters include the following: + +- `jira_url`: The base URL for your Jira instance/server. +- `cloud` (optional): Set `true` for Jira Cloud or `false` for Jira Server. Defaults to `true`. +- Jira Cloud credentials: + - `jira_username` + - `jira_api_token` +- Jira Server credentials (set `cloud: false`): + - Either `jira_personal_access_token`, **or** + - `jira_username` and `jira_password` + + +Refer this [guide](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/) for instructions on how to create API tokens for your account. + + +## Usage + +Retrieve data from a specified table by providing the integration and table names: + +```sql +SELECT * +FROM jira_datasource.table_name +LIMIT 10; +``` + + +The above example utilize `jira_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. + + +## Available tables + +The handler registers the following tables: + +- `projects`: Basic project metadata. +- `issues`: Normalized issue fields (project, summary, description, priority, status, labels, components, creator/reporter/assignee, timestamps). +- `attachments`: Attachments derived from issues. +- `comments`: Comments derived from issues. +- `users`: Users available to the current Jira context. Column set depends on `cloud`: + - Cloud columns: `accountId, accountType, emailAddress, displayName, active, timeZone, locale, applicationRoles, avatarUrls, groups` + - Server columns: `key, name, emailAddress, displayName, active, timeZone, locale, lastLoginTime, applicationRoles, avatarUrls, groups, deleted, expand` +- `groups`: User groups (`groupId, name, html`). + +Attachments and comments are fetched by first loading issues. Use `LIMIT` whenever possible to reduce API calls. + +## Query examples + +List projects: + +```sql +SELECT id, key, name +FROM jira_datasource.projects; +``` + +Fetch recent issues for a project: + +```sql +SELECT key, summary, status, assignee, created +FROM jira_datasource.issues +WHERE project_key = 'ENG' +LIMIT 50; +``` + +Retrieve comments for a specific issue: + +```sql +SELECT body, author, created +FROM jira_datasource.comments +WHERE issue_key = 'ENG-123'; +``` diff --git a/docs/integrations/app-integrations/mediawiki.mdx b/docs 2/integrations/app-integrations/mediawiki.mdx similarity index 100% rename from docs/integrations/app-integrations/mediawiki.mdx rename to docs 2/integrations/app-integrations/mediawiki.mdx diff --git a/docs/integrations/app-integrations/microsoft-onedrive.mdx b/docs 2/integrations/app-integrations/microsoft-onedrive.mdx similarity index 100% rename from docs/integrations/app-integrations/microsoft-onedrive.mdx rename to docs 2/integrations/app-integrations/microsoft-onedrive.mdx diff --git a/docs/integrations/app-integrations/microsoft-teams.mdx b/docs 2/integrations/app-integrations/microsoft-teams.mdx similarity index 100% rename from docs/integrations/app-integrations/microsoft-teams.mdx rename to docs 2/integrations/app-integrations/microsoft-teams.mdx diff --git a/docs/integrations/app-integrations/netsuite.mdx b/docs 2/integrations/app-integrations/netsuite.mdx similarity index 100% rename from docs/integrations/app-integrations/netsuite.mdx rename to docs 2/integrations/app-integrations/netsuite.mdx diff --git a/docs/integrations/app-integrations/newsapi.mdx b/docs 2/integrations/app-integrations/newsapi.mdx similarity index 100% rename from docs/integrations/app-integrations/newsapi.mdx rename to docs 2/integrations/app-integrations/newsapi.mdx diff --git a/docs/integrations/app-integrations/paypal.mdx b/docs 2/integrations/app-integrations/paypal.mdx similarity index 100% rename from docs/integrations/app-integrations/paypal.mdx rename to docs 2/integrations/app-integrations/paypal.mdx diff --git a/docs/integrations/app-integrations/plaid.mdx b/docs 2/integrations/app-integrations/plaid.mdx similarity index 100% rename from docs/integrations/app-integrations/plaid.mdx rename to docs 2/integrations/app-integrations/plaid.mdx diff --git a/docs/integrations/app-integrations/pypi.mdx b/docs 2/integrations/app-integrations/pypi.mdx similarity index 100% rename from docs/integrations/app-integrations/pypi.mdx rename to docs 2/integrations/app-integrations/pypi.mdx diff --git a/docs/integrations/app-integrations/reddit.mdx b/docs 2/integrations/app-integrations/reddit.mdx similarity index 100% rename from docs/integrations/app-integrations/reddit.mdx rename to docs 2/integrations/app-integrations/reddit.mdx diff --git a/docs 2/integrations/app-integrations/rest-api.mdx b/docs 2/integrations/app-integrations/rest-api.mdx new file mode 100644 index 00000000000..1ead2f519ae --- /dev/null +++ b/docs 2/integrations/app-integrations/rest-api.mdx @@ -0,0 +1,148 @@ +--- +title: REST API +sidebarTitle: REST API +--- + +In this section, we present how to connect any REST API to MindsDB using bearer-token authentication. + +The REST API handler is a generic integration that lets you forward HTTP requests to any API through MindsDB using stored credentials. Unlike named integrations (HubSpot, Shopify, etc.), it requires no handler-specific knowledge β€” just a base URL and a bearer token. + +This is useful for APIs that MindsDB doesn't have a dedicated handler for, or when you only need direct HTTP access without SQL table mapping. + +## Connection + +The required arguments to establish a connection are as follows: + +- `base_url`: the base URL of the REST API (e.g. `https://api.example.com`). All request paths are appended to this URL. +- `bearer_token`: the token used for authentication. Injected as `Authorization: Bearer ` on every request. + +Optional arguments: + +- `default_headers`: a JSON object of static headers added to every request (e.g. `{"Accept": "application/json"}`). +- `allowed_hosts`: a list of allowed hostnames for requests. Defaults to the hostname of `base_url`. Use `["*"]` to disable host containment. +- `test_path`: the path used by the test endpoint to verify connectivity. Defaults to `/`. + +To connect a REST API to MindsDB, create a new database: + +```sql +CREATE DATABASE my_api +WITH ENGINE = 'rest_api', +PARAMETERS = { + "base_url": "https://api.example.com", + "bearer_token": "your_token_here" +}; +``` + +### Example: Connect to HubSpot + +```sql +CREATE DATABASE my_hubspot +WITH ENGINE = 'rest_api', +PARAMETERS = { + "base_url": "https://api.hubapi.com", + "bearer_token": "pat-eu1-..." +}; +``` + +### Example: Connect with default headers and a custom test path + +```sql +CREATE DATABASE my_internal_api +WITH ENGINE = 'rest_api', +PARAMETERS = { + "base_url": "https://internal.example.com/api/v2", + "bearer_token": "sk-...", + "default_headers": {"Accept": "application/json"}, + "test_path": "/health" +}; +``` + +### Example: Multiple allowed hosts + +```sql +CREATE DATABASE my_multi_region_api +WITH ENGINE = 'rest_api', +PARAMETERS = { + "base_url": "https://api.example.com", + "bearer_token": "your_token", + "allowed_hosts": ["api.example.com", "api.eu.example.com"] +}; +``` + +## Usage + +This handler is **passthrough-only** β€” it does not expose SQL tables. All interaction is through the REST passthrough endpoint. + +### Sending requests + +Forward HTTP requests to the upstream API: + +``` +POST /api/integrations/my_api/passthrough +``` + +```json +{ + "method": "GET", + "path": "/v1/users", + "query": {"limit": "10"}, + "headers": {"Accept": "application/json"} +} +``` + +The response wraps the upstream HTTP response: + +```json +{ + "status_code": 200, + "headers": {"content-type": "application/json"}, + "body": {"results": [...]}, + "content_type": "application/json" +} +``` + +Supported HTTP methods: `GET`, `POST`, `PUT`, `PATCH`, `DELETE`. + +### Testing the connection + +Verify that the base URL, token, and host allowlist are configured correctly: + +``` +POST /api/integrations/my_api/passthrough/test +``` + +A successful response: + +```json +{"ok": true, "status_code": 200, "host": "api.example.com", "latency_ms": 140} +``` + +A failed response: + +```json +{"ok": false, "error_code": "auth_failed", "message": "upstream rejected credentials; base URL and allowlist look correct"} +``` + +## Security + +- Credentials are stored in MindsDB and never exposed to the caller. +- Requests are restricted to hostnames in the allowlist. Private and loopback IP addresses are rejected by default. +- Callers cannot override `Authorization`, `Host`, `Cookie`, or `Proxy-*` headers. +- If the upstream API echoes the token in responses, it is replaced with `[REDACTED_API_KEY]`. +- Request bodies are capped at 1 MB, response bodies at 10 MB. + + +**`host 'X' is not in the datasource allowlist`** + +The request path resolved to a different hostname than `base_url`. Add the hostname to `allowed_hosts`, or use `["*"]` to disable host containment (not recommended for production). + + + +**`upstream rejected credentials (401/403)`** + +The token is invalid, expired, or missing required scopes. Verify the token with the upstream API provider. + + + +For more information about available actions and development plans, visit [this page](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/rest_api_handler/README.md). + diff --git a/docs/integrations/app-integrations/salesforce.mdx b/docs 2/integrations/app-integrations/salesforce.mdx similarity index 100% rename from docs/integrations/app-integrations/salesforce.mdx rename to docs 2/integrations/app-integrations/salesforce.mdx diff --git a/docs/integrations/app-integrations/sendinblue.mdx b/docs 2/integrations/app-integrations/sendinblue.mdx similarity index 100% rename from docs/integrations/app-integrations/sendinblue.mdx rename to docs 2/integrations/app-integrations/sendinblue.mdx diff --git a/docs/integrations/app-integrations/shopify.mdx b/docs 2/integrations/app-integrations/shopify.mdx similarity index 100% rename from docs/integrations/app-integrations/shopify.mdx rename to docs 2/integrations/app-integrations/shopify.mdx diff --git a/docs/integrations/app-integrations/slack.mdx b/docs 2/integrations/app-integrations/slack.mdx similarity index 100% rename from docs/integrations/app-integrations/slack.mdx rename to docs 2/integrations/app-integrations/slack.mdx diff --git a/docs/integrations/app-integrations/strapi.mdx b/docs 2/integrations/app-integrations/strapi.mdx similarity index 90% rename from docs/integrations/app-integrations/strapi.mdx rename to docs 2/integrations/app-integrations/strapi.mdx index cdb66e063e4..e92d560d632 100644 --- a/docs/integrations/app-integrations/strapi.mdx +++ b/docs 2/integrations/app-integrations/strapi.mdx @@ -14,7 +14,7 @@ To use the Strapi Handler, initialize it with the following parameters: - `host`: Strapi server host. - `port`: Strapi server port (typically 1337). - `api_token`: Strapi server API token for authentication. -- `plural_api_ids`: List of plural API IDs for the collections. +- `endpoints`: List of collection endpoints. To get started, create a Strapi engine database with the following SQL command: @@ -25,7 +25,7 @@ PARAMETERS = { "host" : "", --- Host (can be an IP address or URL). "port" : "", --- Common port is 1337. "api_token": "", --- API token of the Strapi server. - "plural_api_ids" : [""] --- Plural API IDs of the collections. + "endpoints" : [""] --- Collection endpoints. }; ``` @@ -43,7 +43,7 @@ Filter data based on specific criteria: ```sql SELECT * FROM myshop. -WHERE id = +WHERE documentId = ''; ``` Insert new data into a collection: @@ -64,7 +64,7 @@ Modify existing data in a collection: ```sql UPDATE myshop. SET = , = , ... -WHERE id = ; +WHERE documentId = ''; ``` diff --git a/docs/integrations/app-integrations/stripe.mdx b/docs 2/integrations/app-integrations/stripe.mdx similarity index 100% rename from docs/integrations/app-integrations/stripe.mdx rename to docs 2/integrations/app-integrations/stripe.mdx diff --git a/docs/integrations/app-integrations/symbl.mdx b/docs 2/integrations/app-integrations/symbl.mdx similarity index 100% rename from docs/integrations/app-integrations/symbl.mdx rename to docs 2/integrations/app-integrations/symbl.mdx diff --git a/docs/integrations/app-integrations/twitter.mdx b/docs 2/integrations/app-integrations/twitter.mdx similarity index 100% rename from docs/integrations/app-integrations/twitter.mdx rename to docs 2/integrations/app-integrations/twitter.mdx diff --git a/docs/integrations/app-integrations/web-crawler.mdx b/docs 2/integrations/app-integrations/web-crawler.mdx similarity index 100% rename from docs/integrations/app-integrations/web-crawler.mdx rename to docs 2/integrations/app-integrations/web-crawler.mdx diff --git a/docs/integrations/app-integrations/youtube.mdx b/docs 2/integrations/app-integrations/youtube.mdx similarity index 100% rename from docs/integrations/app-integrations/youtube.mdx rename to docs 2/integrations/app-integrations/youtube.mdx diff --git a/docs/integrations/data-integrations/airtable.mdx b/docs 2/integrations/data-integrations/airtable.mdx similarity index 100% rename from docs/integrations/data-integrations/airtable.mdx rename to docs 2/integrations/data-integrations/airtable.mdx diff --git a/docs/integrations/data-integrations/all-data-integrations.mdx b/docs 2/integrations/data-integrations/all-data-integrations.mdx similarity index 100% rename from docs/integrations/data-integrations/all-data-integrations.mdx rename to docs 2/integrations/data-integrations/all-data-integrations.mdx diff --git a/docs/integrations/data-integrations/amazon-aurora.mdx b/docs 2/integrations/data-integrations/amazon-aurora.mdx similarity index 100% rename from docs/integrations/data-integrations/amazon-aurora.mdx rename to docs 2/integrations/data-integrations/amazon-aurora.mdx diff --git a/docs/integrations/data-integrations/amazon-dynamodb.mdx b/docs 2/integrations/data-integrations/amazon-dynamodb.mdx similarity index 100% rename from docs/integrations/data-integrations/amazon-dynamodb.mdx rename to docs 2/integrations/data-integrations/amazon-dynamodb.mdx diff --git a/docs/integrations/data-integrations/amazon-redshift.mdx b/docs 2/integrations/data-integrations/amazon-redshift.mdx similarity index 100% rename from docs/integrations/data-integrations/amazon-redshift.mdx rename to docs 2/integrations/data-integrations/amazon-redshift.mdx diff --git a/docs/integrations/data-integrations/amazon-s3.mdx b/docs 2/integrations/data-integrations/amazon-s3.mdx similarity index 100% rename from docs/integrations/data-integrations/amazon-s3.mdx rename to docs 2/integrations/data-integrations/amazon-s3.mdx diff --git a/docs/integrations/data-integrations/apache-cassandra.mdx b/docs 2/integrations/data-integrations/apache-cassandra.mdx similarity index 100% rename from docs/integrations/data-integrations/apache-cassandra.mdx rename to docs 2/integrations/data-integrations/apache-cassandra.mdx diff --git a/docs/integrations/data-integrations/apache-druid.mdx b/docs 2/integrations/data-integrations/apache-druid.mdx similarity index 100% rename from docs/integrations/data-integrations/apache-druid.mdx rename to docs 2/integrations/data-integrations/apache-druid.mdx diff --git a/docs/integrations/data-integrations/apache-hive.mdx b/docs 2/integrations/data-integrations/apache-hive.mdx similarity index 100% rename from docs/integrations/data-integrations/apache-hive.mdx rename to docs 2/integrations/data-integrations/apache-hive.mdx diff --git a/docs/integrations/data-integrations/apache-ignite.mdx b/docs 2/integrations/data-integrations/apache-ignite.mdx similarity index 100% rename from docs/integrations/data-integrations/apache-ignite.mdx rename to docs 2/integrations/data-integrations/apache-ignite.mdx diff --git a/docs/integrations/data-integrations/apache-impala.mdx b/docs 2/integrations/data-integrations/apache-impala.mdx similarity index 100% rename from docs/integrations/data-integrations/apache-impala.mdx rename to docs 2/integrations/data-integrations/apache-impala.mdx diff --git a/docs/integrations/data-integrations/apache-pinot.mdx b/docs 2/integrations/data-integrations/apache-pinot.mdx similarity index 100% rename from docs/integrations/data-integrations/apache-pinot.mdx rename to docs 2/integrations/data-integrations/apache-pinot.mdx diff --git a/docs/integrations/data-integrations/apache-solr.mdx b/docs 2/integrations/data-integrations/apache-solr.mdx similarity index 100% rename from docs/integrations/data-integrations/apache-solr.mdx rename to docs 2/integrations/data-integrations/apache-solr.mdx diff --git a/docs/integrations/data-integrations/ckan.mdx b/docs 2/integrations/data-integrations/ckan.mdx similarity index 100% rename from docs/integrations/data-integrations/ckan.mdx rename to docs 2/integrations/data-integrations/ckan.mdx diff --git a/docs/integrations/data-integrations/clickhouse.mdx b/docs 2/integrations/data-integrations/clickhouse.mdx similarity index 100% rename from docs/integrations/data-integrations/clickhouse.mdx rename to docs 2/integrations/data-integrations/clickhouse.mdx diff --git a/docs/integrations/data-integrations/cloud-spanner.mdx b/docs 2/integrations/data-integrations/cloud-spanner.mdx similarity index 100% rename from docs/integrations/data-integrations/cloud-spanner.mdx rename to docs 2/integrations/data-integrations/cloud-spanner.mdx diff --git a/docs/integrations/data-integrations/cockroachdb.mdx b/docs 2/integrations/data-integrations/cockroachdb.mdx similarity index 100% rename from docs/integrations/data-integrations/cockroachdb.mdx rename to docs 2/integrations/data-integrations/cockroachdb.mdx diff --git a/docs/integrations/data-integrations/couchbase.mdx b/docs 2/integrations/data-integrations/couchbase.mdx similarity index 100% rename from docs/integrations/data-integrations/couchbase.mdx rename to docs 2/integrations/data-integrations/couchbase.mdx diff --git a/docs/integrations/data-integrations/cratedb.mdx b/docs 2/integrations/data-integrations/cratedb.mdx similarity index 100% rename from docs/integrations/data-integrations/cratedb.mdx rename to docs 2/integrations/data-integrations/cratedb.mdx diff --git a/docs/integrations/data-integrations/d0lt.mdx b/docs 2/integrations/data-integrations/d0lt.mdx similarity index 100% rename from docs/integrations/data-integrations/d0lt.mdx rename to docs 2/integrations/data-integrations/d0lt.mdx diff --git a/docs/integrations/data-integrations/databend.mdx b/docs 2/integrations/data-integrations/databend.mdx similarity index 100% rename from docs/integrations/data-integrations/databend.mdx rename to docs 2/integrations/data-integrations/databend.mdx diff --git a/docs/integrations/data-integrations/databricks.mdx b/docs 2/integrations/data-integrations/databricks.mdx similarity index 100% rename from docs/integrations/data-integrations/databricks.mdx rename to docs 2/integrations/data-integrations/databricks.mdx diff --git a/docs/integrations/data-integrations/datastax.mdx b/docs 2/integrations/data-integrations/datastax.mdx similarity index 100% rename from docs/integrations/data-integrations/datastax.mdx rename to docs 2/integrations/data-integrations/datastax.mdx diff --git a/docs/integrations/data-integrations/duckdb.mdx b/docs 2/integrations/data-integrations/duckdb.mdx similarity index 100% rename from docs/integrations/data-integrations/duckdb.mdx rename to docs 2/integrations/data-integrations/duckdb.mdx diff --git a/docs/integrations/data-integrations/edgelessdb.mdx b/docs 2/integrations/data-integrations/edgelessdb.mdx similarity index 100% rename from docs/integrations/data-integrations/edgelessdb.mdx rename to docs 2/integrations/data-integrations/edgelessdb.mdx diff --git a/docs/integrations/data-integrations/elasticsearch.mdx b/docs 2/integrations/data-integrations/elasticsearch.mdx similarity index 100% rename from docs/integrations/data-integrations/elasticsearch.mdx rename to docs 2/integrations/data-integrations/elasticsearch.mdx diff --git a/docs/integrations/data-integrations/firebird.mdx b/docs 2/integrations/data-integrations/firebird.mdx similarity index 100% rename from docs/integrations/data-integrations/firebird.mdx rename to docs 2/integrations/data-integrations/firebird.mdx diff --git a/docs/integrations/data-integrations/google-bigquery.mdx b/docs 2/integrations/data-integrations/google-bigquery.mdx similarity index 100% rename from docs/integrations/data-integrations/google-bigquery.mdx rename to docs 2/integrations/data-integrations/google-bigquery.mdx diff --git a/docs/integrations/data-integrations/google-cloud-sql.mdx b/docs 2/integrations/data-integrations/google-cloud-sql.mdx similarity index 100% rename from docs/integrations/data-integrations/google-cloud-sql.mdx rename to docs 2/integrations/data-integrations/google-cloud-sql.mdx diff --git a/docs/integrations/data-integrations/google-sheets.mdx b/docs 2/integrations/data-integrations/google-sheets.mdx similarity index 100% rename from docs/integrations/data-integrations/google-sheets.mdx rename to docs 2/integrations/data-integrations/google-sheets.mdx diff --git a/docs/integrations/data-integrations/greptimedb.mdx b/docs 2/integrations/data-integrations/greptimedb.mdx similarity index 100% rename from docs/integrations/data-integrations/greptimedb.mdx rename to docs 2/integrations/data-integrations/greptimedb.mdx diff --git a/docs/integrations/data-integrations/ibm-db2.mdx b/docs 2/integrations/data-integrations/ibm-db2.mdx similarity index 100% rename from docs/integrations/data-integrations/ibm-db2.mdx rename to docs 2/integrations/data-integrations/ibm-db2.mdx diff --git a/docs/integrations/data-integrations/ibm-informix.mdx b/docs 2/integrations/data-integrations/ibm-informix.mdx similarity index 100% rename from docs/integrations/data-integrations/ibm-informix.mdx rename to docs 2/integrations/data-integrations/ibm-informix.mdx diff --git a/docs/integrations/data-integrations/influxdb.mdx b/docs 2/integrations/data-integrations/influxdb.mdx similarity index 100% rename from docs/integrations/data-integrations/influxdb.mdx rename to docs 2/integrations/data-integrations/influxdb.mdx diff --git a/docs/integrations/data-integrations/mariadb.mdx b/docs 2/integrations/data-integrations/mariadb.mdx similarity index 100% rename from docs/integrations/data-integrations/mariadb.mdx rename to docs 2/integrations/data-integrations/mariadb.mdx diff --git a/docs/integrations/data-integrations/matrixone.mdx b/docs 2/integrations/data-integrations/matrixone.mdx similarity index 100% rename from docs/integrations/data-integrations/matrixone.mdx rename to docs 2/integrations/data-integrations/matrixone.mdx diff --git a/docs/integrations/data-integrations/microsoft-access.mdx b/docs 2/integrations/data-integrations/microsoft-access.mdx similarity index 100% rename from docs/integrations/data-integrations/microsoft-access.mdx rename to docs 2/integrations/data-integrations/microsoft-access.mdx diff --git a/docs/integrations/data-integrations/microsoft-sql-server.mdx b/docs 2/integrations/data-integrations/microsoft-sql-server.mdx similarity index 100% rename from docs/integrations/data-integrations/microsoft-sql-server.mdx rename to docs 2/integrations/data-integrations/microsoft-sql-server.mdx diff --git a/docs/integrations/data-integrations/monetdb.mdx b/docs 2/integrations/data-integrations/monetdb.mdx similarity index 100% rename from docs/integrations/data-integrations/monetdb.mdx rename to docs 2/integrations/data-integrations/monetdb.mdx diff --git a/docs/integrations/data-integrations/mongodb.mdx b/docs 2/integrations/data-integrations/mongodb.mdx similarity index 100% rename from docs/integrations/data-integrations/mongodb.mdx rename to docs 2/integrations/data-integrations/mongodb.mdx diff --git a/docs/integrations/data-integrations/mysql.mdx b/docs 2/integrations/data-integrations/mysql.mdx similarity index 100% rename from docs/integrations/data-integrations/mysql.mdx rename to docs 2/integrations/data-integrations/mysql.mdx diff --git a/docs/integrations/data-integrations/oceanbase.mdx b/docs 2/integrations/data-integrations/oceanbase.mdx similarity index 100% rename from docs/integrations/data-integrations/oceanbase.mdx rename to docs 2/integrations/data-integrations/oceanbase.mdx diff --git a/docs/integrations/data-integrations/opengauss.mdx b/docs 2/integrations/data-integrations/opengauss.mdx similarity index 100% rename from docs/integrations/data-integrations/opengauss.mdx rename to docs 2/integrations/data-integrations/opengauss.mdx diff --git a/docs/integrations/data-integrations/oracle.mdx b/docs 2/integrations/data-integrations/oracle.mdx similarity index 100% rename from docs/integrations/data-integrations/oracle.mdx rename to docs 2/integrations/data-integrations/oracle.mdx diff --git a/docs/integrations/data-integrations/orioledb.mdx b/docs 2/integrations/data-integrations/orioledb.mdx similarity index 100% rename from docs/integrations/data-integrations/orioledb.mdx rename to docs 2/integrations/data-integrations/orioledb.mdx diff --git a/docs/integrations/data-integrations/planetscale.mdx b/docs 2/integrations/data-integrations/planetscale.mdx similarity index 100% rename from docs/integrations/data-integrations/planetscale.mdx rename to docs 2/integrations/data-integrations/planetscale.mdx diff --git a/docs/integrations/data-integrations/postgresql.mdx b/docs 2/integrations/data-integrations/postgresql.mdx similarity index 100% rename from docs/integrations/data-integrations/postgresql.mdx rename to docs 2/integrations/data-integrations/postgresql.mdx diff --git a/docs/integrations/data-integrations/questdb.mdx b/docs 2/integrations/data-integrations/questdb.mdx similarity index 100% rename from docs/integrations/data-integrations/questdb.mdx rename to docs 2/integrations/data-integrations/questdb.mdx diff --git a/docs/integrations/data-integrations/sap-hana.mdx b/docs 2/integrations/data-integrations/sap-hana.mdx similarity index 100% rename from docs/integrations/data-integrations/sap-hana.mdx rename to docs 2/integrations/data-integrations/sap-hana.mdx diff --git a/docs/integrations/data-integrations/sap-sql-anywhere.mdx b/docs 2/integrations/data-integrations/sap-sql-anywhere.mdx similarity index 100% rename from docs/integrations/data-integrations/sap-sql-anywhere.mdx rename to docs 2/integrations/data-integrations/sap-sql-anywhere.mdx diff --git a/docs/integrations/data-integrations/scylladb.mdx b/docs 2/integrations/data-integrations/scylladb.mdx similarity index 100% rename from docs/integrations/data-integrations/scylladb.mdx rename to docs 2/integrations/data-integrations/scylladb.mdx diff --git a/docs/integrations/data-integrations/singlestore.mdx b/docs 2/integrations/data-integrations/singlestore.mdx similarity index 100% rename from docs/integrations/data-integrations/singlestore.mdx rename to docs 2/integrations/data-integrations/singlestore.mdx diff --git a/docs/integrations/data-integrations/snowflake.mdx b/docs 2/integrations/data-integrations/snowflake.mdx similarity index 100% rename from docs/integrations/data-integrations/snowflake.mdx rename to docs 2/integrations/data-integrations/snowflake.mdx diff --git a/docs/integrations/data-integrations/sqlite.mdx b/docs 2/integrations/data-integrations/sqlite.mdx similarity index 100% rename from docs/integrations/data-integrations/sqlite.mdx rename to docs 2/integrations/data-integrations/sqlite.mdx diff --git a/docs/integrations/data-integrations/starrocks.mdx b/docs 2/integrations/data-integrations/starrocks.mdx similarity index 100% rename from docs/integrations/data-integrations/starrocks.mdx rename to docs 2/integrations/data-integrations/starrocks.mdx diff --git a/docs/integrations/data-integrations/supabase.mdx b/docs 2/integrations/data-integrations/supabase.mdx similarity index 100% rename from docs/integrations/data-integrations/supabase.mdx rename to docs 2/integrations/data-integrations/supabase.mdx diff --git a/docs/integrations/data-integrations/surrealdb.mdx b/docs 2/integrations/data-integrations/surrealdb.mdx similarity index 100% rename from docs/integrations/data-integrations/surrealdb.mdx rename to docs 2/integrations/data-integrations/surrealdb.mdx diff --git a/docs/integrations/data-integrations/tdengine.mdx b/docs 2/integrations/data-integrations/tdengine.mdx similarity index 100% rename from docs/integrations/data-integrations/tdengine.mdx rename to docs 2/integrations/data-integrations/tdengine.mdx diff --git a/docs/integrations/data-integrations/teradata.mdx b/docs 2/integrations/data-integrations/teradata.mdx similarity index 100% rename from docs/integrations/data-integrations/teradata.mdx rename to docs 2/integrations/data-integrations/teradata.mdx diff --git a/docs/integrations/data-integrations/tidb.mdx b/docs 2/integrations/data-integrations/tidb.mdx similarity index 100% rename from docs/integrations/data-integrations/tidb.mdx rename to docs 2/integrations/data-integrations/tidb.mdx diff --git a/docs/integrations/data-integrations/timescaledb.mdx b/docs 2/integrations/data-integrations/timescaledb.mdx similarity index 100% rename from docs/integrations/data-integrations/timescaledb.mdx rename to docs 2/integrations/data-integrations/timescaledb.mdx diff --git a/docs/integrations/data-integrations/trino.mdx b/docs 2/integrations/data-integrations/trino.mdx similarity index 100% rename from docs/integrations/data-integrations/trino.mdx rename to docs 2/integrations/data-integrations/trino.mdx diff --git a/docs/integrations/data-integrations/vertica.mdx b/docs 2/integrations/data-integrations/vertica.mdx similarity index 100% rename from docs/integrations/data-integrations/vertica.mdx rename to docs 2/integrations/data-integrations/vertica.mdx diff --git a/docs/integrations/data-integrations/vitess.mdx b/docs 2/integrations/data-integrations/vitess.mdx similarity index 100% rename from docs/integrations/data-integrations/vitess.mdx rename to docs 2/integrations/data-integrations/vitess.mdx diff --git a/docs/integrations/data-integrations/yugabytedb.mdx b/docs 2/integrations/data-integrations/yugabytedb.mdx similarity index 100% rename from docs/integrations/data-integrations/yugabytedb.mdx rename to docs 2/integrations/data-integrations/yugabytedb.mdx diff --git a/docs/integrations/data-overview.mdx b/docs 2/integrations/data-overview.mdx similarity index 100% rename from docs/integrations/data-overview.mdx rename to docs 2/integrations/data-overview.mdx diff --git a/docs/integrations/files/csv-xlsx-xls.mdx b/docs 2/integrations/files/csv-xlsx-xls.mdx similarity index 100% rename from docs/integrations/files/csv-xlsx-xls.mdx rename to docs 2/integrations/files/csv-xlsx-xls.mdx diff --git a/docs/integrations/files/json.mdx b/docs 2/integrations/files/json.mdx similarity index 100% rename from docs/integrations/files/json.mdx rename to docs 2/integrations/files/json.mdx diff --git a/docs/integrations/files/parquet.mdx b/docs 2/integrations/files/parquet.mdx similarity index 100% rename from docs/integrations/files/parquet.mdx rename to docs 2/integrations/files/parquet.mdx diff --git a/docs/integrations/files/pdf.mdx b/docs 2/integrations/files/pdf.mdx similarity index 100% rename from docs/integrations/files/pdf.mdx rename to docs 2/integrations/files/pdf.mdx diff --git a/docs/integrations/files/txt.mdx b/docs 2/integrations/files/txt.mdx similarity index 100% rename from docs/integrations/files/txt.mdx rename to docs 2/integrations/files/txt.mdx diff --git a/docs/integrations/integrations.mdx b/docs 2/integrations/integrations.mdx similarity index 100% rename from docs/integrations/integrations.mdx rename to docs 2/integrations/integrations.mdx diff --git a/docs/integrations/sample-database.mdx b/docs 2/integrations/sample-database.mdx similarity index 100% rename from docs/integrations/sample-database.mdx rename to docs 2/integrations/sample-database.mdx diff --git a/docs/integrations/support.mdx b/docs 2/integrations/support.mdx similarity index 98% rename from docs/integrations/support.mdx rename to docs 2/integrations/support.mdx index 4f52b81fdf3..d76df9af518 100644 --- a/docs/integrations/support.mdx +++ b/docs 2/integrations/support.mdx @@ -51,7 +51,6 @@ Below is the list of all community integrations. | Llama Index | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/llama_index_handler) | | Anthropic | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/anthropic_handler) | | MariaDB | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/mariadb_handler) | -| TimeGPT | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/timegpt_handler) | | X (Twitter) | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/twitter_handler) | | GitHub | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/github_handler) | | Hugging Face Inference API | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/huggingface_api_handler) | @@ -61,7 +60,6 @@ Below is the list of all community integrations. | Confluence | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/confluence_handler) | | Gmail | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/gmail_handler) | | Couchbase | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/couchbase_handler) | -| StatsForecast | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/statsforecast_handler) | | Twelve Labs | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/twelve_labs_handler) | | Anomaly Detection | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/anomaly_detection_handler) | | YouTube | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/youtube_handler) | @@ -147,7 +145,6 @@ Below is the list of all community integrations. | MonetDB | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/monetdb_handler) | | MonkeyLearn | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/monkeylearn_handler) | | Microsoft Teams | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/ms_teams_handler) | -| NeuralForecast | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/neuralforecast_handler) | | NewsAPI | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/newsapi_handler) | | Notion | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/notion_handler) | | npm | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/npm_handler) | diff --git a/docs/integrations/vector-db-integrations/chromadb.mdx b/docs 2/integrations/vector-db-integrations/chromadb.mdx similarity index 100% rename from docs/integrations/vector-db-integrations/chromadb.mdx rename to docs 2/integrations/vector-db-integrations/chromadb.mdx diff --git a/docs/integrations/vector-db-integrations/couchbase.mdx b/docs 2/integrations/vector-db-integrations/couchbase.mdx similarity index 100% rename from docs/integrations/vector-db-integrations/couchbase.mdx rename to docs 2/integrations/vector-db-integrations/couchbase.mdx diff --git a/docs/integrations/vector-db-integrations/milvus.mdx b/docs 2/integrations/vector-db-integrations/milvus.mdx similarity index 100% rename from docs/integrations/vector-db-integrations/milvus.mdx rename to docs 2/integrations/vector-db-integrations/milvus.mdx diff --git a/docs/integrations/vector-db-integrations/pgvector.mdx b/docs 2/integrations/vector-db-integrations/pgvector.mdx similarity index 100% rename from docs/integrations/vector-db-integrations/pgvector.mdx rename to docs 2/integrations/vector-db-integrations/pgvector.mdx diff --git a/docs/integrations/vector-db-integrations/pinecone.mdx b/docs 2/integrations/vector-db-integrations/pinecone.mdx similarity index 100% rename from docs/integrations/vector-db-integrations/pinecone.mdx rename to docs 2/integrations/vector-db-integrations/pinecone.mdx diff --git a/docs/integrations/vector-db-integrations/weaviate.mdx b/docs 2/integrations/vector-db-integrations/weaviate.mdx similarity index 100% rename from docs/integrations/vector-db-integrations/weaviate.mdx rename to docs 2/integrations/vector-db-integrations/weaviate.mdx diff --git a/docs/logo/dark.svg b/docs 2/logo/dark.svg similarity index 100% rename from docs/logo/dark.svg rename to docs 2/logo/dark.svg diff --git a/docs/logo/light.svg b/docs 2/logo/light.svg similarity index 100% rename from docs/logo/light.svg rename to docs 2/logo/light.svg diff --git a/docs/minds.mdx b/docs 2/minds.mdx similarity index 100% rename from docs/minds.mdx rename to docs 2/minds.mdx diff --git a/docs/minds/authentication.mdx b/docs 2/minds/authentication.mdx similarity index 100% rename from docs/minds/authentication.mdx rename to docs 2/minds/authentication.mdx diff --git a/docs/minds/data_sources.mdx b/docs 2/minds/data_sources.mdx similarity index 100% rename from docs/minds/data_sources.mdx rename to docs 2/minds/data_sources.mdx diff --git a/docs/minds/minds_ips.mdx b/docs 2/minds/minds_ips.mdx similarity index 100% rename from docs/minds/minds_ips.mdx rename to docs 2/minds/minds_ips.mdx diff --git a/docs/minds/minds_overview.mdx b/docs 2/minds/minds_overview.mdx similarity index 100% rename from docs/minds/minds_overview.mdx rename to docs 2/minds/minds_overview.mdx diff --git a/docs/minds/python_sdk/assistants_api.mdx b/docs 2/minds/python_sdk/assistants_api.mdx similarity index 100% rename from docs/minds/python_sdk/assistants_api.mdx rename to docs 2/minds/python_sdk/assistants_api.mdx diff --git a/docs/minds/python_sdk/completions_api.mdx b/docs 2/minds/python_sdk/completions_api.mdx similarity index 100% rename from docs/minds/python_sdk/completions_api.mdx rename to docs 2/minds/python_sdk/completions_api.mdx diff --git a/docs/minds/python_sdk/mind_setup.mdx b/docs 2/minds/python_sdk/mind_setup.mdx similarity index 100% rename from docs/minds/python_sdk/mind_setup.mdx rename to docs 2/minds/python_sdk/mind_setup.mdx diff --git a/docs/minds/python_sdk/overview.mdx b/docs 2/minds/python_sdk/overview.mdx similarity index 100% rename from docs/minds/python_sdk/overview.mdx rename to docs 2/minds/python_sdk/overview.mdx diff --git a/docs/minds/quickstart.mdx b/docs 2/minds/quickstart.mdx similarity index 100% rename from docs/minds/quickstart.mdx rename to docs 2/minds/quickstart.mdx diff --git a/docs/minds/quickstart_best_practices.mdx b/docs 2/minds/quickstart_best_practices.mdx similarity index 100% rename from docs/minds/quickstart_best_practices.mdx rename to docs 2/minds/quickstart_best_practices.mdx diff --git a/docs/minds/quickstart_custom_mind.mdx b/docs 2/minds/quickstart_custom_mind.mdx similarity index 100% rename from docs/minds/quickstart_custom_mind.mdx rename to docs 2/minds/quickstart_custom_mind.mdx diff --git a/docs/minds/rest_api/add_data_to_mind.mdx b/docs 2/minds/rest_api/add_data_to_mind.mdx similarity index 100% rename from docs/minds/rest_api/add_data_to_mind.mdx rename to docs 2/minds/rest_api/add_data_to_mind.mdx diff --git a/docs/minds/rest_api/chat.mdx b/docs 2/minds/rest_api/chat.mdx similarity index 100% rename from docs/minds/rest_api/chat.mdx rename to docs 2/minds/rest_api/chat.mdx diff --git a/docs/minds/rest_api/create_datasource.mdx b/docs 2/minds/rest_api/create_datasource.mdx similarity index 100% rename from docs/minds/rest_api/create_datasource.mdx rename to docs 2/minds/rest_api/create_datasource.mdx diff --git a/docs/minds/rest_api/create_message.mdx b/docs 2/minds/rest_api/create_message.mdx similarity index 100% rename from docs/minds/rest_api/create_message.mdx rename to docs 2/minds/rest_api/create_message.mdx diff --git a/docs/minds/rest_api/create_mind.mdx b/docs 2/minds/rest_api/create_mind.mdx similarity index 100% rename from docs/minds/rest_api/create_mind.mdx rename to docs 2/minds/rest_api/create_mind.mdx diff --git a/docs/minds/rest_api/create_run.mdx b/docs 2/minds/rest_api/create_run.mdx similarity index 100% rename from docs/minds/rest_api/create_run.mdx rename to docs 2/minds/rest_api/create_run.mdx diff --git a/docs/minds/rest_api/create_thread.mdx b/docs 2/minds/rest_api/create_thread.mdx similarity index 100% rename from docs/minds/rest_api/create_thread.mdx rename to docs 2/minds/rest_api/create_thread.mdx diff --git a/docs/minds/rest_api/delete_datasource.mdx b/docs 2/minds/rest_api/delete_datasource.mdx similarity index 100% rename from docs/minds/rest_api/delete_datasource.mdx rename to docs 2/minds/rest_api/delete_datasource.mdx diff --git a/docs/minds/rest_api/delete_mind.mdx b/docs 2/minds/rest_api/delete_mind.mdx similarity index 100% rename from docs/minds/rest_api/delete_mind.mdx rename to docs 2/minds/rest_api/delete_mind.mdx diff --git a/docs/minds/rest_api/delete_thread.mdx b/docs 2/minds/rest_api/delete_thread.mdx similarity index 100% rename from docs/minds/rest_api/delete_thread.mdx rename to docs 2/minds/rest_api/delete_thread.mdx diff --git a/docs/minds/rest_api/get_datasource.mdx b/docs 2/minds/rest_api/get_datasource.mdx similarity index 100% rename from docs/minds/rest_api/get_datasource.mdx rename to docs 2/minds/rest_api/get_datasource.mdx diff --git a/docs/minds/rest_api/get_mind.mdx b/docs 2/minds/rest_api/get_mind.mdx similarity index 100% rename from docs/minds/rest_api/get_mind.mdx rename to docs 2/minds/rest_api/get_mind.mdx diff --git a/docs/minds/rest_api/list_datasources.mdx b/docs 2/minds/rest_api/list_datasources.mdx similarity index 100% rename from docs/minds/rest_api/list_datasources.mdx rename to docs 2/minds/rest_api/list_datasources.mdx diff --git a/docs/minds/rest_api/list_messages.mdx b/docs 2/minds/rest_api/list_messages.mdx similarity index 100% rename from docs/minds/rest_api/list_messages.mdx rename to docs 2/minds/rest_api/list_messages.mdx diff --git a/docs/minds/rest_api/list_minds.mdx b/docs 2/minds/rest_api/list_minds.mdx similarity index 100% rename from docs/minds/rest_api/list_minds.mdx rename to docs 2/minds/rest_api/list_minds.mdx diff --git a/docs/minds/rest_api/overview.mdx b/docs 2/minds/rest_api/overview.mdx similarity index 100% rename from docs/minds/rest_api/overview.mdx rename to docs 2/minds/rest_api/overview.mdx diff --git a/docs/minds/rest_api/overview_assistants.mdx b/docs 2/minds/rest_api/overview_assistants.mdx similarity index 100% rename from docs/minds/rest_api/overview_assistants.mdx rename to docs 2/minds/rest_api/overview_assistants.mdx diff --git a/docs/minds/rest_api/retrieve_run.mdx b/docs 2/minds/rest_api/retrieve_run.mdx similarity index 100% rename from docs/minds/rest_api/retrieve_run.mdx rename to docs 2/minds/rest_api/retrieve_run.mdx diff --git a/docs/minds/rest_api/update_datasource.mdx b/docs 2/minds/rest_api/update_datasource.mdx similarity index 100% rename from docs/minds/rest_api/update_datasource.mdx rename to docs 2/minds/rest_api/update_datasource.mdx diff --git a/docs/minds/rest_api/update_mind.mdx b/docs 2/minds/rest_api/update_mind.mdx similarity index 100% rename from docs/minds/rest_api/update_mind.mdx rename to docs 2/minds/rest_api/update_mind.mdx diff --git a/docs/minds/sample_data.mdx b/docs 2/minds/sample_data.mdx similarity index 100% rename from docs/minds/sample_data.mdx rename to docs 2/minds/sample_data.mdx diff --git a/docs/mindsdb-connect.mdx b/docs 2/mindsdb-connect.mdx similarity index 100% rename from docs/mindsdb-connect.mdx rename to docs 2/mindsdb-connect.mdx diff --git a/docs/mindsdb-fqe.mdx b/docs 2/mindsdb-fqe.mdx similarity index 100% rename from docs/mindsdb-fqe.mdx rename to docs 2/mindsdb-fqe.mdx diff --git a/docs/mindsdb-gui.mdx b/docs 2/mindsdb-gui.mdx similarity index 100% rename from docs/mindsdb-gui.mdx rename to docs 2/mindsdb-gui.mdx diff --git a/docs/mindsdb-handlers.mdx b/docs 2/mindsdb-handlers.mdx similarity index 92% rename from docs/mindsdb-handlers.mdx rename to docs 2/mindsdb-handlers.mdx index c69e09cee4c..0d9a1aaac36 100644 --- a/docs/mindsdb-handlers.mdx +++ b/docs 2/mindsdb-handlers.mdx @@ -76,7 +76,7 @@ Whenever you want to parse a string that contains SQL, we strongly recommend usi ### Formatting Output -In the case of data handlers, when it comes to building the response of the public methods, the output should be wrapped by the [mindsdb.integrations.libs.response.HandlerResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py#L7) or [mindsdb.integrations.libs.response.HandlerStatusResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py#L32) class. These classes are used by the MindsDB executioner to orchestrate and coordinate multiple handler instances in parallel. +In the case of data handlers, the data-returning methods (`native_query()`, `query()`, `get_tables()`, `get_columns()`) should return one of the response classes from [mindsdb.integrations.libs.response](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py) And in the case of ML handlers, output wrapping is automatically done by an intermediate wrapper, the `BaseMLEngineExec` class, so the contributor wouldn't need to worry about it. diff --git a/docs/mindsdb-objects.mdx b/docs 2/mindsdb-objects.mdx similarity index 100% rename from docs/mindsdb-objects.mdx rename to docs 2/mindsdb-objects.mdx diff --git a/docs/mindsdb-respond.mdx b/docs 2/mindsdb-respond.mdx similarity index 100% rename from docs/mindsdb-respond.mdx rename to docs 2/mindsdb-respond.mdx diff --git a/docs/mindsdb-unify.mdx b/docs 2/mindsdb-unify.mdx similarity index 100% rename from docs/mindsdb-unify.mdx rename to docs 2/mindsdb-unify.mdx diff --git a/docs/mindsdb.mdx b/docs 2/mindsdb.mdx similarity index 100% rename from docs/mindsdb.mdx rename to docs 2/mindsdb.mdx diff --git a/docs/mindsdb_sql/agents/agent.mdx b/docs 2/mindsdb_sql/agents/agent.mdx similarity index 100% rename from docs/mindsdb_sql/agents/agent.mdx rename to docs 2/mindsdb_sql/agents/agent.mdx diff --git a/docs/mindsdb_sql/agents/agent_gui.mdx b/docs 2/mindsdb_sql/agents/agent_gui.mdx similarity index 100% rename from docs/mindsdb_sql/agents/agent_gui.mdx rename to docs 2/mindsdb_sql/agents/agent_gui.mdx diff --git a/docs/mindsdb_sql/agents/agent_syntax.mdx b/docs 2/mindsdb_sql/agents/agent_syntax.mdx similarity index 96% rename from docs/mindsdb_sql/agents/agent_syntax.mdx rename to docs 2/mindsdb_sql/agents/agent_syntax.mdx index 9c1d3c01563..42597689170 100644 --- a/docs/mindsdb_sql/agents/agent_syntax.mdx +++ b/docs 2/mindsdb_sql/agents/agent_syntax.mdx @@ -24,7 +24,8 @@ USING "tables": ["datasource_conn_name.table_name", ...] }, prompt_template='describe data', - timeout=10; + timeout=10, + mode='text'; ``` It creates an agent that uses the defined model and has access to the connected data. @@ -315,6 +316,12 @@ This parameter defines the time the agent can take to come back with an answer. For example, when the `timeout` parameter is set to 10, the agent has 10 seconds to return an answer. If the agent takes longer than 10 seconds, it aborts the process and comes back with an answer indicating its failure to return an answer within the defined time interval. +### `mode` + +This parameter defines the agent's response style, allowing users to partially control the output format. Supported values include `text` and `sql`. + +When set, the agent will tailor its responses to match the specified format. Note that the agent may still adapt its output when necessary to ensure clarity or correctness. + ## `SELECT FROM AGENT` Syntax Query an agent to generate responses to questions. diff --git a/docs/mindsdb_sql/agents/chatbot.mdx b/docs 2/mindsdb_sql/agents/chatbot.mdx similarity index 100% rename from docs/mindsdb_sql/agents/chatbot.mdx rename to docs 2/mindsdb_sql/agents/chatbot.mdx diff --git a/docs/mindsdb_sql/connect/connect-mariadb-skysql.mdx b/docs 2/mindsdb_sql/connect/connect-mariadb-skysql.mdx similarity index 100% rename from docs/mindsdb_sql/connect/connect-mariadb-skysql.mdx rename to docs 2/mindsdb_sql/connect/connect-mariadb-skysql.mdx diff --git a/docs/mindsdb_sql/connect/dbeaver.mdx b/docs 2/mindsdb_sql/connect/dbeaver.mdx similarity index 100% rename from docs/mindsdb_sql/connect/dbeaver.mdx rename to docs 2/mindsdb_sql/connect/dbeaver.mdx diff --git a/docs/mindsdb_sql/connect/deepnote.mdx b/docs 2/mindsdb_sql/connect/deepnote.mdx similarity index 100% rename from docs/mindsdb_sql/connect/deepnote.mdx rename to docs 2/mindsdb_sql/connect/deepnote.mdx diff --git a/docs/mindsdb_sql/connect/grafana.mdx b/docs 2/mindsdb_sql/connect/grafana.mdx similarity index 100% rename from docs/mindsdb_sql/connect/grafana.mdx rename to docs 2/mindsdb_sql/connect/grafana.mdx diff --git a/docs/mindsdb_sql/connect/jupysql.mdx b/docs 2/mindsdb_sql/connect/jupysql.mdx similarity index 100% rename from docs/mindsdb_sql/connect/jupysql.mdx rename to docs 2/mindsdb_sql/connect/jupysql.mdx diff --git a/docs/mindsdb_sql/connect/metabase.mdx b/docs 2/mindsdb_sql/connect/metabase.mdx similarity index 100% rename from docs/mindsdb_sql/connect/metabase.mdx rename to docs 2/mindsdb_sql/connect/metabase.mdx diff --git a/docs/mindsdb_sql/connect/mindsdb_editor.mdx b/docs 2/mindsdb_sql/connect/mindsdb_editor.mdx similarity index 100% rename from docs/mindsdb_sql/connect/mindsdb_editor.mdx rename to docs 2/mindsdb_sql/connect/mindsdb_editor.mdx diff --git a/docs/mindsdb_sql/connect/mysql-client.mdx b/docs 2/mindsdb_sql/connect/mysql-client.mdx similarity index 100% rename from docs/mindsdb_sql/connect/mysql-client.mdx rename to docs 2/mindsdb_sql/connect/mysql-client.mdx diff --git a/docs/mindsdb_sql/connect/sql-alchemy.mdx b/docs 2/mindsdb_sql/connect/sql-alchemy.mdx similarity index 100% rename from docs/mindsdb_sql/connect/sql-alchemy.mdx rename to docs 2/mindsdb_sql/connect/sql-alchemy.mdx diff --git a/docs/mindsdb_sql/connect/tableau.mdx b/docs 2/mindsdb_sql/connect/tableau.mdx similarity index 100% rename from docs/mindsdb_sql/connect/tableau.mdx rename to docs 2/mindsdb_sql/connect/tableau.mdx diff --git a/docs/mindsdb_sql/functions/custom_functions.mdx b/docs 2/mindsdb_sql/functions/custom_functions.mdx similarity index 100% rename from docs/mindsdb_sql/functions/custom_functions.mdx rename to docs 2/mindsdb_sql/functions/custom_functions.mdx diff --git a/docs/mindsdb_sql/functions/from_env.mdx b/docs 2/mindsdb_sql/functions/from_env.mdx similarity index 100% rename from docs/mindsdb_sql/functions/from_env.mdx rename to docs 2/mindsdb_sql/functions/from_env.mdx diff --git a/docs/mindsdb_sql/functions/llm_function.mdx b/docs 2/mindsdb_sql/functions/llm_function.mdx similarity index 100% rename from docs/mindsdb_sql/functions/llm_function.mdx rename to docs 2/mindsdb_sql/functions/llm_function.mdx diff --git a/docs/mindsdb_sql/functions/standard-functions.mdx b/docs 2/mindsdb_sql/functions/standard-functions.mdx similarity index 100% rename from docs/mindsdb_sql/functions/standard-functions.mdx rename to docs 2/mindsdb_sql/functions/standard-functions.mdx diff --git a/docs/mindsdb_sql/functions/to_markdown_function.mdx b/docs 2/mindsdb_sql/functions/to_markdown_function.mdx similarity index 100% rename from docs/mindsdb_sql/functions/to_markdown_function.mdx rename to docs 2/mindsdb_sql/functions/to_markdown_function.mdx diff --git a/docs/mindsdb_sql/functions/variables.mdx b/docs 2/mindsdb_sql/functions/variables.mdx similarity index 100% rename from docs/mindsdb_sql/functions/variables.mdx rename to docs 2/mindsdb_sql/functions/variables.mdx diff --git a/docs/mindsdb_sql/knowledge-bases.mdx b/docs 2/mindsdb_sql/knowledge-bases.mdx similarity index 100% rename from docs/mindsdb_sql/knowledge-bases.mdx rename to docs 2/mindsdb_sql/knowledge-bases.mdx diff --git a/docs/mindsdb_sql/knowledge_bases/alter.mdx b/docs 2/mindsdb_sql/knowledge_bases/alter.mdx similarity index 100% rename from docs/mindsdb_sql/knowledge_bases/alter.mdx rename to docs 2/mindsdb_sql/knowledge_bases/alter.mdx diff --git a/docs/mindsdb_sql/knowledge_bases/create.mdx b/docs 2/mindsdb_sql/knowledge_bases/create.mdx similarity index 100% rename from docs/mindsdb_sql/knowledge_bases/create.mdx rename to docs 2/mindsdb_sql/knowledge_bases/create.mdx diff --git a/docs/mindsdb_sql/knowledge_bases/evaluate.mdx b/docs 2/mindsdb_sql/knowledge_bases/evaluate.mdx similarity index 100% rename from docs/mindsdb_sql/knowledge_bases/evaluate.mdx rename to docs 2/mindsdb_sql/knowledge_bases/evaluate.mdx diff --git a/docs/mindsdb_sql/knowledge_bases/examples.mdx b/docs 2/mindsdb_sql/knowledge_bases/examples.mdx similarity index 100% rename from docs/mindsdb_sql/knowledge_bases/examples.mdx rename to docs 2/mindsdb_sql/knowledge_bases/examples.mdx diff --git a/docs/mindsdb_sql/knowledge_bases/hybrid_search.mdx b/docs 2/mindsdb_sql/knowledge_bases/hybrid_search.mdx similarity index 100% rename from docs/mindsdb_sql/knowledge_bases/hybrid_search.mdx rename to docs 2/mindsdb_sql/knowledge_bases/hybrid_search.mdx diff --git a/docs/mindsdb_sql/knowledge_bases/insert_data.mdx b/docs 2/mindsdb_sql/knowledge_bases/insert_data.mdx similarity index 100% rename from docs/mindsdb_sql/knowledge_bases/insert_data.mdx rename to docs 2/mindsdb_sql/knowledge_bases/insert_data.mdx diff --git a/docs/mindsdb_sql/knowledge_bases/overview.mdx b/docs 2/mindsdb_sql/knowledge_bases/overview.mdx similarity index 100% rename from docs/mindsdb_sql/knowledge_bases/overview.mdx rename to docs 2/mindsdb_sql/knowledge_bases/overview.mdx diff --git a/docs/mindsdb_sql/knowledge_bases/query.mdx b/docs 2/mindsdb_sql/knowledge_bases/query.mdx similarity index 100% rename from docs/mindsdb_sql/knowledge_bases/query.mdx rename to docs 2/mindsdb_sql/knowledge_bases/query.mdx diff --git a/docs/mindsdb_sql/overview.mdx b/docs 2/mindsdb_sql/overview.mdx similarity index 100% rename from docs/mindsdb_sql/overview.mdx rename to docs 2/mindsdb_sql/overview.mdx diff --git a/docs/mindsdb_sql/sql/api/alter-view.mdx b/docs 2/mindsdb_sql/sql/api/alter-view.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/alter-view.mdx rename to docs 2/mindsdb_sql/sql/api/alter-view.mdx diff --git a/docs/mindsdb_sql/sql/api/delete.mdx b/docs 2/mindsdb_sql/sql/api/delete.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/delete.mdx rename to docs 2/mindsdb_sql/sql/api/delete.mdx diff --git a/docs/mindsdb_sql/sql/api/describe.mdx b/docs 2/mindsdb_sql/sql/api/describe.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/describe.mdx rename to docs 2/mindsdb_sql/sql/api/describe.mdx diff --git a/docs/mindsdb_sql/sql/api/evaluate.mdx b/docs 2/mindsdb_sql/sql/api/evaluate.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/evaluate.mdx rename to docs 2/mindsdb_sql/sql/api/evaluate.mdx diff --git a/docs/mindsdb_sql/sql/api/finetune.mdx b/docs 2/mindsdb_sql/sql/api/finetune.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/finetune.mdx rename to docs 2/mindsdb_sql/sql/api/finetune.mdx diff --git a/docs/mindsdb_sql/sql/api/insert.mdx b/docs 2/mindsdb_sql/sql/api/insert.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/insert.mdx rename to docs 2/mindsdb_sql/sql/api/insert.mdx diff --git a/docs/mindsdb_sql/sql/api/join-on.mdx b/docs 2/mindsdb_sql/sql/api/join-on.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/join-on.mdx rename to docs 2/mindsdb_sql/sql/api/join-on.mdx diff --git a/docs/mindsdb_sql/sql/api/join.mdx b/docs 2/mindsdb_sql/sql/api/join.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/join.mdx rename to docs 2/mindsdb_sql/sql/api/join.mdx diff --git a/docs/mindsdb_sql/sql/api/manage-models-versions.mdx b/docs 2/mindsdb_sql/sql/api/manage-models-versions.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/manage-models-versions.mdx rename to docs 2/mindsdb_sql/sql/api/manage-models-versions.mdx diff --git a/docs/mindsdb_sql/sql/api/retrain.mdx b/docs 2/mindsdb_sql/sql/api/retrain.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/retrain.mdx rename to docs 2/mindsdb_sql/sql/api/retrain.mdx diff --git a/docs/mindsdb_sql/sql/api/select-files.mdx b/docs 2/mindsdb_sql/sql/api/select-files.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/select-files.mdx rename to docs 2/mindsdb_sql/sql/api/select-files.mdx diff --git a/docs/mindsdb_sql/sql/api/select-view.mdx b/docs 2/mindsdb_sql/sql/api/select-view.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/select-view.mdx rename to docs 2/mindsdb_sql/sql/api/select-view.mdx diff --git a/docs/mindsdb_sql/sql/api/select.mdx b/docs 2/mindsdb_sql/sql/api/select.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/select.mdx rename to docs 2/mindsdb_sql/sql/api/select.mdx diff --git a/docs/mindsdb_sql/sql/api/update.mdx b/docs 2/mindsdb_sql/sql/api/update.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/update.mdx rename to docs 2/mindsdb_sql/sql/api/update.mdx diff --git a/docs/mindsdb_sql/sql/api/use.mdx b/docs 2/mindsdb_sql/sql/api/use.mdx similarity index 100% rename from docs/mindsdb_sql/sql/api/use.mdx rename to docs 2/mindsdb_sql/sql/api/use.mdx diff --git a/docs/mindsdb_sql/sql/create/database.mdx b/docs 2/mindsdb_sql/sql/create/database.mdx similarity index 100% rename from docs/mindsdb_sql/sql/create/database.mdx rename to docs 2/mindsdb_sql/sql/create/database.mdx diff --git a/docs/mindsdb_sql/sql/create/file.mdx b/docs 2/mindsdb_sql/sql/create/file.mdx similarity index 100% rename from docs/mindsdb_sql/sql/create/file.mdx rename to docs 2/mindsdb_sql/sql/create/file.mdx diff --git a/docs/mindsdb_sql/sql/create/jobs.mdx b/docs 2/mindsdb_sql/sql/create/jobs.mdx similarity index 100% rename from docs/mindsdb_sql/sql/create/jobs.mdx rename to docs 2/mindsdb_sql/sql/create/jobs.mdx diff --git a/docs/mindsdb_sql/sql/create/ml-engine.mdx b/docs 2/mindsdb_sql/sql/create/ml-engine.mdx similarity index 92% rename from docs/mindsdb_sql/sql/create/ml-engine.mdx rename to docs 2/mindsdb_sql/sql/create/ml-engine.mdx index 1e10bba6c63..ece54f7916e 100644 --- a/docs/mindsdb_sql/sql/create/ml-engine.mdx +++ b/docs 2/mindsdb_sql/sql/create/ml-engine.mdx @@ -70,14 +70,12 @@ On execution, we get: | NAME | TITLE | DESCRIPTION | VERSION | CONNECTION_ARGS | IMPORT_SUCCESS | IMPORT_ERROR | +-------------------+--------------------+-------------------------------------------------------+---------+-----------------------------------------------------------------------------------------------------------------------------------------------------+----------------+-----------------------------------------------------------------------------+ | "ray_serve" | "RayServe" | "MindsDB handler for Ray Serve" | "0.0.1" | "[NULL]" | "true" | "[NULL]" | -| "neuralforecast" | "NeuralForecast" | "MindsDB handler for Nixtla's NeuralForecast package" | "0.0.1" | "[NULL]" | "true" | "[NULL]" | | "autosklearn" | "Auto-Sklearn" | "MindsDB handler for Auto-Sklearn" | "0.0.2" | "[NULL]" | "false" | "No module named 'autosklearn'" | | "mlflow" | "MLFlow" | "MindsDB handler for MLflow" | "0.0.2" | "[NULL]" | "false" | "No module named 'mlflow'" | | "openai" | "OpenAI" | "MindsDB handler for OpenAI" | "0.0.1" | "[NULL]" | "true" | "[NULL]" | | "merlion" | "Merlion" | "MindsDB handler for Merlion" | "0.0.1" | "[NULL]" | "false" | "object.__init__() takes exactly one argument (the instance to initialize)" | | "byom" | "BYOM" | "MindsDB handler for BYOM" | "0.0.1" | "{'code': {'type': 'path', 'description': 'The path to model code'}, 'modules': {'type': 'path', 'description': 'The path to model requirements'}}" | "true" | "[NULL]" | | "huggingface_api" | "Hugging Face API" | "MindsDB handler for Auto-Sklearn" | "0.0.2" | "[NULL]" | "false" | "No module named 'hugging_py_face'" | -| "statsforecast" | "StatsForecast" | "MindsDB handler for Nixtla's StatsForecast package" | "0.0.0" | "[NULL]" | "true" | "[NULL]" | | "huggingface" | "Hugging Face" | "MindsDB handler for Higging Face" | "0.0.1" | "[NULL]" | "true" | "[NULL]" | | "TPOT" | "Tpot" | "MindsDB handler for TPOT " | "0.0.2" | "[NULL]" | "false" | "No module named 'tpot'" | | "langchain" | "LangChain" | "MindsDB handler for LangChain" | "0.0.1" | "[NULL]" | "true" | "[NULL]" | diff --git a/docs/mindsdb_sql/sql/create/model.mdx b/docs 2/mindsdb_sql/sql/create/model.mdx similarity index 98% rename from docs/mindsdb_sql/sql/create/model.mdx rename to docs 2/mindsdb_sql/sql/create/model.mdx index 3fa60ccfe3c..a68ed13da10 100644 --- a/docs/mindsdb_sql/sql/create/model.mdx +++ b/docs 2/mindsdb_sql/sql/create/model.mdx @@ -8,8 +8,8 @@ sidebarTitle: Create, Train, and Deploy a Model The `CREATE MODEL` statement creates and trains a machine learning (ML) model. - Please note that the `CREATE MODEL` statement is equivalent to the `CREATE MODEL` statement. - We are transitioning to the `CREATE MODEL` statement, but the `CREATE MODEL` statement still works. + Please note that the `CREATE PREDICTOR` statement is equivalent to the `CREATE MODEL` statement. + We are transitioning to the `CREATE MODEL` statement, but the `CREATE PREDICTOR` statement still works. ## Syntax diff --git a/docs/mindsdb_sql/sql/create/project.mdx b/docs 2/mindsdb_sql/sql/create/project.mdx similarity index 100% rename from docs/mindsdb_sql/sql/create/project.mdx rename to docs 2/mindsdb_sql/sql/create/project.mdx diff --git a/docs/mindsdb_sql/sql/create/table.mdx b/docs 2/mindsdb_sql/sql/create/table.mdx similarity index 100% rename from docs/mindsdb_sql/sql/create/table.mdx rename to docs 2/mindsdb_sql/sql/create/table.mdx diff --git a/docs/mindsdb_sql/sql/create/trigger.mdx b/docs 2/mindsdb_sql/sql/create/trigger.mdx similarity index 100% rename from docs/mindsdb_sql/sql/create/trigger.mdx rename to docs 2/mindsdb_sql/sql/create/trigger.mdx diff --git a/docs/mindsdb_sql/sql/create/view.mdx b/docs 2/mindsdb_sql/sql/create/view.mdx similarity index 100% rename from docs/mindsdb_sql/sql/create/view.mdx rename to docs 2/mindsdb_sql/sql/create/view.mdx diff --git a/docs/mindsdb_sql/sql/drop/database.mdx b/docs 2/mindsdb_sql/sql/drop/database.mdx similarity index 100% rename from docs/mindsdb_sql/sql/drop/database.mdx rename to docs 2/mindsdb_sql/sql/drop/database.mdx diff --git a/docs/mindsdb_sql/sql/drop/file.mdx b/docs 2/mindsdb_sql/sql/drop/file.mdx similarity index 100% rename from docs/mindsdb_sql/sql/drop/file.mdx rename to docs 2/mindsdb_sql/sql/drop/file.mdx diff --git a/docs/mindsdb_sql/sql/drop/jobs.mdx b/docs 2/mindsdb_sql/sql/drop/jobs.mdx similarity index 100% rename from docs/mindsdb_sql/sql/drop/jobs.mdx rename to docs 2/mindsdb_sql/sql/drop/jobs.mdx diff --git a/docs/mindsdb_sql/sql/drop/ml-engine.mdx b/docs 2/mindsdb_sql/sql/drop/ml-engine.mdx similarity index 100% rename from docs/mindsdb_sql/sql/drop/ml-engine.mdx rename to docs 2/mindsdb_sql/sql/drop/ml-engine.mdx diff --git a/docs/mindsdb_sql/sql/drop/model.mdx b/docs 2/mindsdb_sql/sql/drop/model.mdx similarity index 100% rename from docs/mindsdb_sql/sql/drop/model.mdx rename to docs 2/mindsdb_sql/sql/drop/model.mdx diff --git a/docs/mindsdb_sql/sql/drop/project.mdx b/docs 2/mindsdb_sql/sql/drop/project.mdx similarity index 100% rename from docs/mindsdb_sql/sql/drop/project.mdx rename to docs 2/mindsdb_sql/sql/drop/project.mdx diff --git a/docs/mindsdb_sql/sql/drop/table.mdx b/docs 2/mindsdb_sql/sql/drop/table.mdx similarity index 100% rename from docs/mindsdb_sql/sql/drop/table.mdx rename to docs 2/mindsdb_sql/sql/drop/table.mdx diff --git a/docs/mindsdb_sql/sql/drop/trigger.mdx b/docs 2/mindsdb_sql/sql/drop/trigger.mdx similarity index 100% rename from docs/mindsdb_sql/sql/drop/trigger.mdx rename to docs 2/mindsdb_sql/sql/drop/trigger.mdx diff --git a/docs/mindsdb_sql/sql/drop/view.mdx b/docs 2/mindsdb_sql/sql/drop/view.mdx similarity index 100% rename from docs/mindsdb_sql/sql/drop/view.mdx rename to docs 2/mindsdb_sql/sql/drop/view.mdx diff --git a/docs/mindsdb_sql/sql/get-batch-predictions.mdx b/docs 2/mindsdb_sql/sql/get-batch-predictions.mdx similarity index 100% rename from docs/mindsdb_sql/sql/get-batch-predictions.mdx rename to docs 2/mindsdb_sql/sql/get-batch-predictions.mdx diff --git a/docs/mindsdb_sql/sql/get-single-prediction.mdx b/docs 2/mindsdb_sql/sql/get-single-prediction.mdx similarity index 100% rename from docs/mindsdb_sql/sql/get-single-prediction.mdx rename to docs 2/mindsdb_sql/sql/get-single-prediction.mdx diff --git a/docs/mindsdb_sql/sql/list-data-handlers.mdx b/docs 2/mindsdb_sql/sql/list-data-handlers.mdx similarity index 100% rename from docs/mindsdb_sql/sql/list-data-handlers.mdx rename to docs 2/mindsdb_sql/sql/list-data-handlers.mdx diff --git a/docs/mindsdb_sql/sql/list-ml-handlers.mdx b/docs 2/mindsdb_sql/sql/list-ml-handlers.mdx similarity index 100% rename from docs/mindsdb_sql/sql/list-ml-handlers.mdx rename to docs 2/mindsdb_sql/sql/list-ml-handlers.mdx diff --git a/docs/mindsdb_sql/sql/list-projects.mdx b/docs 2/mindsdb_sql/sql/list-projects.mdx similarity index 100% rename from docs/mindsdb_sql/sql/list-projects.mdx rename to docs 2/mindsdb_sql/sql/list-projects.mdx diff --git a/docs/mindsdb_sql/sql/native-queries.mdx b/docs 2/mindsdb_sql/sql/native-queries.mdx similarity index 100% rename from docs/mindsdb_sql/sql/native-queries.mdx rename to docs 2/mindsdb_sql/sql/native-queries.mdx diff --git a/docs/mindsdb_sql/sql/query-jobs.mdx b/docs 2/mindsdb_sql/sql/query-jobs.mdx similarity index 100% rename from docs/mindsdb_sql/sql/query-jobs.mdx rename to docs 2/mindsdb_sql/sql/query-jobs.mdx diff --git a/docs/mindsdb_sql/sql/query-triggers.mdx b/docs 2/mindsdb_sql/sql/query-triggers.mdx similarity index 100% rename from docs/mindsdb_sql/sql/query-triggers.mdx rename to docs 2/mindsdb_sql/sql/query-triggers.mdx diff --git a/docs/mindsdb_sql/sql/show-databases.mdx b/docs 2/mindsdb_sql/sql/show-databases.mdx similarity index 100% rename from docs/mindsdb_sql/sql/show-databases.mdx rename to docs 2/mindsdb_sql/sql/show-databases.mdx diff --git a/docs/mindsdb_sql/sql/show-ml-engines.mdx b/docs 2/mindsdb_sql/sql/show-ml-engines.mdx similarity index 100% rename from docs/mindsdb_sql/sql/show-ml-engines.mdx rename to docs 2/mindsdb_sql/sql/show-ml-engines.mdx diff --git a/docs/mindsdb_sql/sql/show-models.mdx b/docs 2/mindsdb_sql/sql/show-models.mdx similarity index 100% rename from docs/mindsdb_sql/sql/show-models.mdx rename to docs 2/mindsdb_sql/sql/show-models.mdx diff --git a/docs/mindsdb_sql/sql/use/project.mdx b/docs 2/mindsdb_sql/sql/use/project.mdx similarity index 100% rename from docs/mindsdb_sql/sql/use/project.mdx rename to docs 2/mindsdb_sql/sql/use/project.mdx diff --git a/docs/mindsdb_sql/sql_support/case-when.mdx b/docs 2/mindsdb_sql/sql_support/case-when.mdx similarity index 100% rename from docs/mindsdb_sql/sql_support/case-when.mdx rename to docs 2/mindsdb_sql/sql_support/case-when.mdx diff --git a/docs/mindsdb_sql/sql_support/cte.mdx b/docs 2/mindsdb_sql/sql_support/cte.mdx similarity index 100% rename from docs/mindsdb_sql/sql_support/cte.mdx rename to docs 2/mindsdb_sql/sql_support/cte.mdx diff --git a/docs/mindsdb_sql/syntax.mdx b/docs 2/mindsdb_sql/syntax.mdx similarity index 100% rename from docs/mindsdb_sql/syntax.mdx rename to docs 2/mindsdb_sql/syntax.mdx diff --git a/docs/model-context-protocol/anthropic.mdx b/docs 2/model-context-protocol/anthropic.mdx similarity index 97% rename from docs/model-context-protocol/anthropic.mdx rename to docs 2/model-context-protocol/anthropic.mdx index ba8609f5b23..0b594db756a 100644 --- a/docs/model-context-protocol/anthropic.mdx +++ b/docs 2/model-context-protocol/anthropic.mdx @@ -35,7 +35,7 @@ response = client.beta.messages.create( mcp_servers = [ { "type": "url", - "url": "https://5a52-88-203-84-191.ngrok-free.app/mcp/sse", + "url": "https:///mcp/sse", "name": "mindsdb-mcp", "authorization_token": "" } diff --git a/docs/model-context-protocol/cursor_usage.mdx b/docs 2/model-context-protocol/cursor_usage.mdx similarity index 100% rename from docs/model-context-protocol/cursor_usage.mdx rename to docs 2/model-context-protocol/cursor_usage.mdx diff --git a/docs/model-context-protocol/openai.mdx b/docs 2/model-context-protocol/openai.mdx similarity index 98% rename from docs/model-context-protocol/openai.mdx rename to docs 2/model-context-protocol/openai.mdx index c3d8ea2df54..3d9736dde9e 100644 --- a/docs/model-context-protocol/openai.mdx +++ b/docs 2/model-context-protocol/openai.mdx @@ -32,7 +32,7 @@ response = client.responses.create( { "type": "mcp", "server_label": "mdb", - "server_url": "https://5a52-88-203-84-191.ngrok-free.app/mcp/sse", + "server_url": "https:///mcp/sse", "headers": { "Authorization": "Bearer " }, "require_approval": "never", } diff --git a/docs/model-context-protocol/overview.mdx b/docs 2/model-context-protocol/overview.mdx similarity index 100% rename from docs/model-context-protocol/overview.mdx rename to docs 2/model-context-protocol/overview.mdx diff --git a/docs 2/model-context-protocol/usage.mdx b/docs 2/model-context-protocol/usage.mdx new file mode 100644 index 00000000000..43ed653b8d8 --- /dev/null +++ b/docs 2/model-context-protocol/usage.mdx @@ -0,0 +1,95 @@ +--- +title: MindsDB's MCP Server Usage and Tools +sidebarTitle: Usage +--- + +**MindsDB** is an MCP server that enables your MCP applications to answer questions over large-scale federated data spanning databases, data warehouses, and SaaS applications. + +## Start MindsDB as an MCP Server + +Follow the steps below to use MindsDB as an MCP server. + +1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). + +2. [Connect your data source](/mindsdb_sql/sql/create/database) and/or [upload files](/mindsdb_sql/sql/create/file) to MindsDB in order to ask questions over your data. + + + You can use our sample dataset that stores the sales manager data. + + ```sql + CREATE DATABASE sales_manager_data + WITH ENGINE = "postgres", + PARAMETERS = { + "user": "demo_user", + "password": "demo_password", + "host": "samples.mindsdb.com", + "port": "5432", + "database": "sales_manager_data" + }; + ``` + + +3. Start MindsDB MCP server. + + * **Without authentication** (suitable for local tools): + + ```bash + docker run --name mindsdb_container -p 47334:47334 mindsdb/mindsdb + ``` + + * **With PAT authentication** (suitable for remote): + + ```bash + docker run --name mindsdb_container -p 47334:47334 -e MINDSDB_USERNAME=admin -e MINDSDB_PASSWORD=password123 mindsdb/mindsdb + ``` + + Get a Bearer token: + ```bash + curl -X POST -d '{"username":"admin","password":"password123"}' -H "Content-Type: application/json" http://localhost:47334/api/login + ``` + Use this token as `Authorization: Bearer ` in your MCP client. + + * **With OAuth 2.0** (for enterprise deployments): configure `MINDSDB_MCP_OAUTH_ENABLED=true` along with `MINDSDB_MCP_OAUTH_ISSUER_URL`, `MINDSDB_MCP_OAUTH_CLIENT_ID`, and `MINDSDB_MCP_OAUTH_CLIENT_SECRET`. + +4. To confirm the MindsDB MCP server is running use `http://127.0.0.1:47334/mcp/status`. A successful response means your MCP environment is ready. + + +## MCP Capabilities + +### Tools + +**`query`** β€” Executes SQL queries against MindsDB using MySQL syntax. + +Parameters: +- `query` (required): SQL query string +- `context` (optional): Dict with default database, e.g. `{"db": "my_postgres"}` + +Returns one of: +- `{"type": "table", "column_names": [...], "data": [...]}` β€” for SELECT results +- `{"type": "ok", "affected_rows": N}` β€” for INSERT/UPDATE/DELETE +- `{"type": "error", "error_code": N, "error_message": "..."}` β€” on failure + +### Resources + +MCP resources expose schema information for discovery: + +| Resource URI | Description | +|---|---| +| `schema://databases` | Lists all connected data sources | +| `schema://databases/{db}/tables` | Lists tables in a database | +| `schema://databases/{db}/tables/{table}/columns` | Lists columns with types | +| `schema://knowledge_bases` | Lists knowledge bases | + +### Prompts + +**`sample_table`** β€” Generates instructions to fetch 5 sample rows and describe a table's structure. + +## Transport Modes + +- **HTTP (SSE)**: `http://127.0.0.1:47334/mcp/sse` +- **HTTP (Streamable)**: `http://127.0.0.1:47334/mcp/streamable` +- **Stdio**: run with `--mcp-stdio` flag for local stdio-based transport + +## Configuration + +CORS, rate limiting, DNS rebinding protection, and OAuth settings for the MCP server are configured via the `api.mcp` section of `config.json` or the corresponding environment variables. See [Extend the Default MindsDB Configuration](/setup/custom-config#mcp-api) for the full parameter reference. diff --git a/docs/openapi.yml b/docs 2/openapi.yml similarity index 100% rename from docs/openapi.yml rename to docs 2/openapi.yml diff --git a/docs/overview_sdks_apis.mdx b/docs 2/overview_sdks_apis.mdx similarity index 100% rename from docs/overview_sdks_apis.mdx rename to docs 2/overview_sdks_apis.mdx diff --git a/docs/package-lock.json b/docs 2/package-lock.json similarity index 89% rename from docs/package-lock.json rename to docs 2/package-lock.json index 5d303f5c439..551eba76093 100644 --- a/docs/package-lock.json +++ b/docs 2/package-lock.json @@ -1,18 +1,18 @@ { - "name": "relock-npm-lock-v2-Lu0TRg", + "name": "docs", "lockfileVersion": 3, "requires": true, "packages": { "": { "dependencies": { - "mintlify": "^4.2.296", + "mintlify": "^4.2.500", "sharp": "^0.34.4" } }, "node_modules/@alcalzone/ansi-tokenize": { - "version": "0.2.3", - "resolved": "https://registry.npmjs.org/@alcalzone/ansi-tokenize/-/ansi-tokenize-0.2.3.tgz", - "integrity": "sha512-jsElTJ0sQ4wHRz+C45tfect76BwbTbgkgKByOzpCN9xG61N5V6u/glvg1CsNJhq2xJIFpKHSwG3D2wPPuEYOrQ==", + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/@alcalzone/ansi-tokenize/-/ansi-tokenize-0.2.5.tgz", + "integrity": "sha512-3NX/MpTdroi0aKz134A6RC2Gb2iXVECN4QaAXnvCIxxIm3C3AVB1mkUe8NaaiyvOpDfsrqWhYtj+Q6a62RrTsw==", "license": "MIT", "dependencies": { "ansi-styles": "^6.2.1", @@ -77,18 +77,18 @@ } }, "node_modules/@asyncapi/specs": { - "version": "6.10.0", - "resolved": "https://registry.npmjs.org/@asyncapi/specs/-/specs-6.10.0.tgz", - "integrity": "sha512-vB5oKLsdrLUORIZ5BXortZTlVyGWWMC1Nud/0LtgxQ3Yn2738HigAD6EVqScvpPsDUI/bcLVsYEXN4dtXQHVng==", + "version": "6.8.1", + "resolved": "https://registry.npmjs.org/@asyncapi/specs/-/specs-6.8.1.tgz", + "integrity": "sha512-czHoAk3PeXTLR+X8IUaD+IpT+g+zUvkcgMDJVothBsan+oHN3jfcFcFUNdOPAAFoUCQN1hXF1dWuphWy05THlA==", "license": "Apache-2.0", "dependencies": { "@types/json-schema": "^7.0.11" } }, "node_modules/@babel/code-frame": { - "version": "7.28.6", - "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.28.6.tgz", - "integrity": "sha512-JYgintcMjRiCvS8mMECzaEn+m3PfoQiyqukOMCCVQtoJGYJw8j/8LBJEiqkHLkfwCcs74E3pbAUFNg7d9VNJ+Q==", + "version": "7.29.0", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz", + "integrity": "sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==", "license": "MIT", "dependencies": { "@babel/helper-validator-identifier": "^7.28.5", @@ -115,9 +115,9 @@ "license": "MIT" }, "node_modules/@emnapi/runtime": { - "version": "1.7.1", - "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.7.1.tgz", - "integrity": "sha512-PVtJr5CmLwYAU9PZDMITZoR5iAOShYREoR45EyyLrbntV50mdePTgUn4AmOw90Ifcj+x2kRjdzr1HP3RrNiHGA==", + "version": "1.9.2", + "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.2.tgz", + "integrity": "sha512-3U4+MIWHImeyu1wnmVygh5WlgfYDtyf0k8AbLhMFxOipihf6nrWC4syIm/SwEeec0mNSafiiNnMJwbza/Is6Lw==", "license": "MIT", "optional": true, "dependencies": { @@ -132,51 +132,37 @@ "optional": true }, "node_modules/@floating-ui/core": { - "version": "1.7.3", - "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.3.tgz", - "integrity": "sha512-sGnvb5dmrJaKEZ+LDIpguvdX3bDlEllmv4/ClQ9awcmCZrlx5jQyyMWFM5kBI+EyNOCDDiKk8il0zeuX3Zlg/w==", + "version": "1.7.5", + "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.5.tgz", + "integrity": "sha512-1Ih4WTWyw0+lKyFMcBHGbb5U5FtuHJuujoyyr5zTaWS5EYMeT6Jb2AuDeftsCsEuchO+mM2ij5+q9crhydzLhQ==", "license": "MIT", "peer": true, "dependencies": { - "@floating-ui/utils": "^0.2.10" + "@floating-ui/utils": "^0.2.11" } }, "node_modules/@floating-ui/dom": { - "version": "1.7.4", - "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.4.tgz", - "integrity": "sha512-OOchDgh4F2CchOX94cRVqhvy7b3AFb+/rQXyswmzmGakRfkMgoWVjfnLWkRirfLEfuD4ysVW16eXzwt3jHIzKA==", - "license": "MIT", - "peer": true, - "dependencies": { - "@floating-ui/core": "^1.7.3", - "@floating-ui/utils": "^0.2.10" - } - }, - "node_modules/@floating-ui/react-dom": { - "version": "2.1.6", - "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.6.tgz", - "integrity": "sha512-4JX6rEatQEvlmgU80wZyq9RT96HZJa88q8hp0pBd+LrczeDI4o6uA2M+uvxngVHo4Ihr8uibXxH6+70zhAFrVw==", + "version": "1.7.6", + "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.6.tgz", + "integrity": "sha512-9gZSAI5XM36880PPMm//9dfiEngYoC6Am2izES1FF406YFsjvyBMmeJ2g4SAju3xWwtuynNRFL2s9hgxpLI5SQ==", "license": "MIT", "peer": true, "dependencies": { - "@floating-ui/dom": "^1.7.4" - }, - "peerDependencies": { - "react": ">=16.8.0", - "react-dom": ">=16.8.0" + "@floating-ui/core": "^1.7.5", + "@floating-ui/utils": "^0.2.11" } }, "node_modules/@floating-ui/utils": { - "version": "0.2.10", - "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.10.tgz", - "integrity": "sha512-aGTxbpbg8/b5JfU1HXSrbH3wXZuLPJcNEcZQFMxLs3oSzgtVu6nFPkbbGGUvBcUjKV2YyB9Wxxabo+HEH9tcRQ==", + "version": "0.2.11", + "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.11.tgz", + "integrity": "sha512-RiB/yIh78pcIxl6lLMG0CgBXAZ2Y0eVHqMPYugu+9U0AeT6YBeiJpf7lbdJNIugFP5SIjwNRgo4DhR1Qxi26Gg==", "license": "MIT", "peer": true }, "node_modules/@img/colour": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.0.0.tgz", - "integrity": "sha512-A5P/LfWGFSl6nsckYtjw9da+19jB8hkJ6ACTGcDfEJ0aE+l2n2El7dsVM7UVHZQ9s2lmYMWlrS21YLy2IR1LUw==", + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.1.0.tgz", + "integrity": "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==", "license": "MIT", "engines": { "node": ">=18" @@ -265,6 +251,9 @@ "cpu": [ "arm" ], + "libc": [ + "glibc" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -281,6 +270,9 @@ "cpu": [ "arm64" ], + "libc": [ + "glibc" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -297,6 +289,9 @@ "cpu": [ "ppc64" ], + "libc": [ + "glibc" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -313,6 +308,9 @@ "cpu": [ "riscv64" ], + "libc": [ + "glibc" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -329,6 +327,9 @@ "cpu": [ "s390x" ], + "libc": [ + "glibc" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -345,6 +346,9 @@ "cpu": [ "x64" ], + "libc": [ + "glibc" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -361,6 +365,9 @@ "cpu": [ "arm64" ], + "libc": [ + "musl" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -377,6 +384,9 @@ "cpu": [ "x64" ], + "libc": [ + "musl" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -393,6 +403,9 @@ "cpu": [ "arm" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -415,6 +428,9 @@ "cpu": [ "arm64" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -437,6 +453,9 @@ "cpu": [ "ppc64" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -459,6 +478,9 @@ "cpu": [ "riscv64" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -481,6 +503,9 @@ "cpu": [ "s390x" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -503,6 +528,9 @@ "cpu": [ "x64" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -525,6 +553,9 @@ "cpu": [ "arm64" ], + "libc": [ + "musl" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -547,6 +578,9 @@ "cpu": [ "x64" ], + "libc": [ + "musl" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1104,18 +1138,17 @@ } }, "node_modules/@mintlify/cli": { - "version": "4.0.900", - "resolved": "https://registry.npmjs.org/@mintlify/cli/-/cli-4.0.900.tgz", - "integrity": "sha512-2Do87Ip/uh1kYQG40JY/teAJlybylwuUKsE+M8CF8Og0UNinaOch/xBqhJ2NUYiarLfTXoMOMnxb/t1nWt7fCg==", + "version": "4.0.1103", + "resolved": "https://registry.npmjs.org/@mintlify/cli/-/cli-4.0.1103.tgz", + "integrity": "sha512-/Tz4ydJp0eY4I5oKv4D4FYK0xPm9fpwCfnSye4UzjRU7bVUv34Qzi6px/1PQJbQtpUiISwF7tuWH6tyB5AWknw==", "license": "Elastic-2.0", "dependencies": { "@inquirer/prompts": "7.9.0", - "@mintlify/common": "1.0.682", - "@mintlify/link-rot": "3.0.839", - "@mintlify/models": "0.0.259", - "@mintlify/prebuild": "1.0.817", - "@mintlify/previewing": "4.0.873", - "@mintlify/validation": "0.1.568", + "@mintlify/common": "1.0.844", + "@mintlify/link-rot": "3.0.1019", + "@mintlify/prebuild": "1.0.986", + "@mintlify/previewing": "4.0.1047", + "@mintlify/validation": "0.1.660", "adm-zip": "0.5.16", "chalk": "5.2.0", "color": "4.2.3", @@ -1126,10 +1159,14 @@ "inquirer": "12.3.0", "js-yaml": "4.1.0", "mdast-util-mdx-jsx": "3.2.0", + "open": "^8.4.2", + "openid-client": "^6.8.2", + "posthog-node": "5.17.2", "react": "19.2.3", "semver": "7.7.2", "unist-util-visit": "5.0.0", - "yargs": "17.7.1" + "yargs": "17.7.1", + "zod": "^4.3.6" }, "bin": { "mint": "bin/index.js", @@ -1137,19 +1174,23 @@ }, "engines": { "node": ">=18.0.0" + }, + "optionalDependencies": { + "keytar": "^7.9.0" } }, "node_modules/@mintlify/common": { - "version": "1.0.682", - "resolved": "https://registry.npmjs.org/@mintlify/common/-/common-1.0.682.tgz", - "integrity": "sha512-pa08wRQ1BHJboGLZUhV30P6MWpUawxhSAL3YyudAsmhG4KGjoTO1hlOZid7C61NyHR4K2JQo7GZvKUPDSvuRvg==", + "version": "1.0.844", + "resolved": "https://registry.npmjs.org/@mintlify/common/-/common-1.0.844.tgz", + "integrity": "sha512-uTQ5yGFNvP4wpc5FHvBEkJubg5VNW9R2LL9+IcSg/KraDzRn0vCD9YIdq2f2RdwYDYl6sWGMmYjDxUqrOOZVFg==", "license": "ISC", "dependencies": { "@asyncapi/parser": "3.4.0", + "@asyncapi/specs": "6.8.1", "@mintlify/mdx": "^3.0.4", - "@mintlify/models": "0.0.259", + "@mintlify/models": "0.0.290", "@mintlify/openapi-parser": "^0.0.8", - "@mintlify/validation": "0.1.568", + "@mintlify/validation": "0.1.660", "@sindresorhus/slugify": "2.2.0", "@types/mdast": "4.0.4", "acorn": "8.11.2", @@ -1183,7 +1224,8 @@ "remark-parse": "11.0.0", "remark-rehype": "11.1.1", "remark-stringify": "11.0.0", - "tailwindcss": "3.4.4", + "sucrase": "^3.34.0", + "tailwindcss": "^3.4.17", "unified": "11.0.5", "unist-builder": "4.0.0", "unist-util-map": "4.0.0", @@ -1191,7 +1233,22 @@ "unist-util-remove-position": "5.0.0", "unist-util-visit": "5.0.0", "unist-util-visit-parents": "6.0.1", - "vfile": "6.0.3" + "vfile": "6.0.3", + "xss": "1.0.15" + } + }, + "node_modules/@mintlify/common/node_modules/@floating-ui/react-dom": { + "version": "2.1.8", + "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.8.tgz", + "integrity": "sha512-cC52bHwM/n/CxS87FH0yWdngEZrjdtLW/qVruo68qg+prK7ZQ4YGdut2GyDVpoGeAYe/h899rVeOVm6Oi40k2A==", + "license": "MIT", + "peer": true, + "dependencies": { + "@floating-ui/dom": "^1.7.6" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" } }, "node_modules/@mintlify/common/node_modules/@mintlify/mdx": { @@ -1266,124 +1323,347 @@ "url": "https://opencollective.com/unified" } }, - "node_modules/@mintlify/common/node_modules/mdast-util-mdx-jsx": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.1.3.tgz", - "integrity": "sha512-bfOjvNt+1AcbPLTFMFWY149nJz0OjmewJs3LQQ5pIyVGxP4CdOqNVJL6kTaM5c68p8q82Xv3nCyFfUnuEcH3UQ==", + "node_modules/@mintlify/common/node_modules/@radix-ui/react-arrow": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz", + "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==", "license": "MIT", + "peer": true, "dependencies": { - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "@types/unist": "^3.0.0", - "ccount": "^2.0.0", - "devlop": "^1.1.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0", - "parse-entities": "^4.0.0", - "stringify-entities": "^4.0.0", - "unist-util-stringify-position": "^4.0.0", - "vfile-message": "^4.0.0" + "@radix-ui/react-primitive": "2.1.3" }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } } }, - "node_modules/@mintlify/common/node_modules/next-mdx-remote-client": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.4.tgz", - "integrity": "sha512-psCMdO50tfoT1kAH7OGXZvhyRfiHVK6IqwjmWFV5gtLo4dnqjAgcjcLNeJ92iI26UNlKShxYrBs1GQ6UXxk97A==", - "license": "MPL 2.0", + "node_modules/@mintlify/common/node_modules/@radix-ui/react-dismissable-layer": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz", + "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==", + "license": "MIT", + "peer": true, "dependencies": { - "@babel/code-frame": "^7.27.1", - "@mdx-js/mdx": "^3.1.1", - "@mdx-js/react": "^3.1.1", - "remark-mdx-remove-esm": "^1.2.1", - "serialize-error": "^12.0.0", - "vfile": "^6.0.3", - "vfile-matter": "^5.0.1" - }, - "engines": { - "node": ">=18.18.0" + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-escape-keydown": "1.1.1" }, "peerDependencies": { - "react": ">= 18.3.0 < 19.0.0", - "react-dom": ">= 18.3.0 < 19.0.0" + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } } }, - "node_modules/@mintlify/common/node_modules/react": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", - "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", + "node_modules/@mintlify/common/node_modules/@radix-ui/react-focus-scope": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz", + "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==", "license": "MIT", "peer": true, "dependencies": { - "loose-envify": "^1.1.0" + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1" }, - "engines": { - "node": ">=0.10.0" + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } } }, - "node_modules/@mintlify/common/node_modules/react-dom": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", - "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", + "node_modules/@mintlify/common/node_modules/@radix-ui/react-popover": { + "version": "1.1.15", + "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz", + "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==", "license": "MIT", "peer": true, "dependencies": { - "loose-envify": "^1.1.0", - "scheduler": "^0.23.2" + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-dismissable-layer": "1.1.11", + "@radix-ui/react-focus-guards": "1.1.3", + "@radix-ui/react-focus-scope": "1.1.7", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-popper": "1.2.8", + "@radix-ui/react-portal": "1.1.9", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "aria-hidden": "^1.2.4", + "react-remove-scroll": "^2.6.3" }, "peerDependencies": { - "react": "^18.3.1" + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } } }, - "node_modules/@mintlify/common/node_modules/scheduler": { - "version": "0.23.2", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz", - "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==", + "node_modules/@mintlify/common/node_modules/@radix-ui/react-popper": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz", + "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==", "license": "MIT", "peer": true, "dependencies": { - "loose-envify": "^1.1.0" + "@floating-ui/react-dom": "^2.0.0", + "@radix-ui/react-arrow": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-layout-effect": "1.1.1", + "@radix-ui/react-use-rect": "1.1.1", + "@radix-ui/react-use-size": "1.1.1", + "@radix-ui/rect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } } }, - "node_modules/@mintlify/link-rot": { - "version": "3.0.839", - "resolved": "https://registry.npmjs.org/@mintlify/link-rot/-/link-rot-3.0.839.tgz", - "integrity": "sha512-t9dJDEUWwPa9H9fzGDVtMVQaK9YYyxS6rFkSTe1U2AahglyHBoB4hLF3qOGtr6894x2f2q7JxKkOawBcQ1ERmw==", - "license": "Elastic-2.0", + "node_modules/@mintlify/common/node_modules/@radix-ui/react-portal": { + "version": "1.1.9", + "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz", + "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==", + "license": "MIT", + "peer": true, "dependencies": { - "@mintlify/common": "1.0.682", - "@mintlify/prebuild": "1.0.817", - "@mintlify/previewing": "4.0.873", - "@mintlify/scraping": "4.0.522", - "@mintlify/validation": "0.1.568", - "fs-extra": "11.1.0", - "unist-util-visit": "4.1.2" + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-layout-effect": "1.1.1" }, - "engines": { - "node": ">=18.0.0" + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } } }, - "node_modules/@mintlify/link-rot/node_modules/@types/unist": { - "version": "2.0.11", - "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz", - "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==", - "license": "MIT" - }, - "node_modules/@mintlify/link-rot/node_modules/fs-extra": { - "version": "11.1.0", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.0.tgz", - "integrity": "sha512-0rcTq621PD5jM/e0a3EJoGC/1TC5ZBCERW82LQuwfGnCa1V8w7dpYH1yNu+SLb6E5dkeCBzKEyLGlFrnr+dUyw==", + "node_modules/@mintlify/common/node_modules/@radix-ui/react-presence": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", + "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", "license": "MIT", + "peer": true, "dependencies": { - "graceful-fs": "^4.2.0", - "jsonfile": "^6.0.1", - "universalify": "^2.0.0" + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-use-layout-effect": "1.1.1" }, - "engines": { + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/common/node_modules/@radix-ui/react-primitive": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", + "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", + "license": "MIT", + "peer": true, + "dependencies": { + "@radix-ui/react-slot": "1.2.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/common/node_modules/mdast-util-mdx-jsx": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.1.3.tgz", + "integrity": "sha512-bfOjvNt+1AcbPLTFMFWY149nJz0OjmewJs3LQQ5pIyVGxP4CdOqNVJL6kTaM5c68p8q82Xv3nCyFfUnuEcH3UQ==", + "license": "MIT", + "dependencies": { + "@types/estree-jsx": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "@types/unist": "^3.0.0", + "ccount": "^2.0.0", + "devlop": "^1.1.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0", + "parse-entities": "^4.0.0", + "stringify-entities": "^4.0.0", + "unist-util-stringify-position": "^4.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/@mintlify/common/node_modules/next-mdx-remote-client": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.7.tgz", + "integrity": "sha512-12Ap5Z/tFIETMXFSBTH2IFEhJAso7MvOJ5ICyesA4q6FM4vtAcmb+4ZKa4tV1IVQJLBVqOhaEfIESZzdwjmrQQ==", + "license": "MPL 2.0", + "dependencies": { + "@babel/code-frame": "^7.29.0", + "@mdx-js/mdx": "^3.1.1", + "@mdx-js/react": "^3.1.1", + "remark-mdx-remove-esm": "^1.3.1", + "serialize-error": "^13.0.1", + "vfile": "^6.0.3", + "vfile-matter": "^5.0.1" + }, + "engines": { + "node": ">=20.9.0" + }, + "peerDependencies": { + "react": ">= 18.3.0 < 19.0.0", + "react-dom": ">= 18.3.0 < 19.0.0" + } + }, + "node_modules/@mintlify/common/node_modules/react": { + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", + "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", + "license": "MIT", + "peer": true, + "dependencies": { + "loose-envify": "^1.1.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/@mintlify/common/node_modules/react-dom": { + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", + "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", + "license": "MIT", + "peer": true, + "dependencies": { + "loose-envify": "^1.1.0", + "scheduler": "^0.23.2" + }, + "peerDependencies": { + "react": "^18.3.1" + } + }, + "node_modules/@mintlify/common/node_modules/scheduler": { + "version": "0.23.2", + "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz", + "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==", + "license": "MIT", + "peer": true, + "dependencies": { + "loose-envify": "^1.1.0" + } + }, + "node_modules/@mintlify/link-rot": { + "version": "3.0.1019", + "resolved": "https://registry.npmjs.org/@mintlify/link-rot/-/link-rot-3.0.1019.tgz", + "integrity": "sha512-moUkUUcdfm/ivgavmrcgcnxhJ4XCDAbYPABhQbwo6hP3FHXyTB8jJdbjG/wJLZSzjH3KQpq/+DglMH5cCmSNJQ==", + "license": "Elastic-2.0", + "dependencies": { + "@mintlify/common": "1.0.844", + "@mintlify/prebuild": "1.0.986", + "@mintlify/previewing": "4.0.1047", + "@mintlify/scraping": "4.0.522", + "@mintlify/validation": "0.1.660", + "fs-extra": "11.1.0", + "unist-util-visit": "4.1.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@mintlify/link-rot/node_modules/@types/unist": { + "version": "2.0.11", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz", + "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==", + "license": "MIT" + }, + "node_modules/@mintlify/link-rot/node_modules/fs-extra": { + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.0.tgz", + "integrity": "sha512-0rcTq621PD5jM/e0a3EJoGC/1TC5ZBCERW82LQuwfGnCa1V8w7dpYH1yNu+SLb6E5dkeCBzKEyLGlFrnr+dUyw==", + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + }, + "engines": { "node": ">=14.14" } }, @@ -1430,9 +1710,9 @@ } }, "node_modules/@mintlify/models": { - "version": "0.0.259", - "resolved": "https://registry.npmjs.org/@mintlify/models/-/models-0.0.259.tgz", - "integrity": "sha512-KOpOeh8e7fVQMA4ex3shGgZ8BLYz8RG/ajRAGDo837/zC4sZQ6l/pPom9VuoN8glE8d4X1FtfOSu8YuER35koA==", + "version": "0.0.290", + "resolved": "https://registry.npmjs.org/@mintlify/models/-/models-0.0.290.tgz", + "integrity": "sha512-dkUIepQOpyZmgdapL22wdQi7MXupLyqFWP/ebiP0NYLcRRYBLWFVcpHHfIDGC2mWOZxNCVVZDvg2rTzfccpj6A==", "license": "Elastic-2.0", "dependencies": { "axios": "1.13.2", @@ -1477,15 +1757,15 @@ } }, "node_modules/@mintlify/prebuild": { - "version": "1.0.817", - "resolved": "https://registry.npmjs.org/@mintlify/prebuild/-/prebuild-1.0.817.tgz", - "integrity": "sha512-+NQzmGRg71kHFThtnlJsK75TefKTbQNXSHrZuzs0shSFzI/uJIBDstrjFWhqRdUSyTAUTfQLF5zV0kTsrPz08A==", + "version": "1.0.986", + "resolved": "https://registry.npmjs.org/@mintlify/prebuild/-/prebuild-1.0.986.tgz", + "integrity": "sha512-HGQwegpiP0ZwAg/kpISdtad6t5om32HZ/OCWQGHh2G3+gv2Fjg3hGRttagU88oBT9oKC1N7lJPjhxK8FrvwX3w==", "license": "Elastic-2.0", "dependencies": { - "@mintlify/common": "1.0.682", + "@mintlify/common": "1.0.844", "@mintlify/openapi-parser": "^0.0.8", - "@mintlify/scraping": "4.0.543", - "@mintlify/validation": "0.1.568", + "@mintlify/scraping": "4.0.708", + "@mintlify/validation": "0.1.660", "chalk": "5.3.0", "favicons": "7.2.0", "front-matter": "4.0.2", @@ -1581,6 +1861,9 @@ "cpu": [ "arm" ], + "libc": [ + "glibc" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -1597,6 +1880,9 @@ "cpu": [ "arm64" ], + "libc": [ + "glibc" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -1613,6 +1899,9 @@ "cpu": [ "s390x" ], + "libc": [ + "glibc" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -1629,6 +1918,9 @@ "cpu": [ "x64" ], + "libc": [ + "glibc" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -1645,6 +1937,9 @@ "cpu": [ "arm64" ], + "libc": [ + "musl" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -1661,6 +1956,9 @@ "cpu": [ "x64" ], + "libc": [ + "musl" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -1677,6 +1975,9 @@ "cpu": [ "arm" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1699,6 +2000,9 @@ "cpu": [ "arm64" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1721,6 +2025,9 @@ "cpu": [ "s390x" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1743,6 +2050,9 @@ "cpu": [ "x64" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1765,6 +2075,9 @@ "cpu": [ "arm64" ], + "libc": [ + "musl" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1787,6 +2100,9 @@ "cpu": [ "x64" ], + "libc": [ + "musl" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1860,12 +2176,12 @@ } }, "node_modules/@mintlify/prebuild/node_modules/@mintlify/scraping": { - "version": "4.0.543", - "resolved": "https://registry.npmjs.org/@mintlify/scraping/-/scraping-4.0.543.tgz", - "integrity": "sha512-SrVZabI7OBbMBQ8qhtnvS8Nv+zCQeSrM2HfWDWbUzBxBQhVAW8hJXLVTg6J3ZpTyQOgOBAw5z8rPrBlblEqEIA==", + "version": "4.0.708", + "resolved": "https://registry.npmjs.org/@mintlify/scraping/-/scraping-4.0.708.tgz", + "integrity": "sha512-6GDxVKM7B0NqxXvg4Mm8nVhtybAzkVRZcMGtsp5OoHZrnATZ/C4wv2B82ZnwZvdhzLDATWoSoe3W14IXgYYcCQ==", "license": "Elastic-2.0", "dependencies": { - "@mintlify/common": "1.0.682", + "@mintlify/common": "1.0.844", "@mintlify/openapi-parser": "^0.0.8", "fs-extra": "11.1.1", "hast-util-to-mdast": "10.1.0", @@ -1881,7 +2197,7 @@ "unified": "11.0.5", "unist-util-visit": "5.0.0", "yargs": "17.7.1", - "zod": "3.21.4" + "zod": "3.24.0" }, "bin": { "mintlify-scrape": "bin/cli.js" @@ -2070,15 +2386,25 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/@mintlify/prebuild/node_modules/zod": { + "version": "3.24.0", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.0.tgz", + "integrity": "sha512-Hz+wiY8yD0VLA2k/+nsg2Abez674dDGTai33SwNvMPuf9uIrBC9eFgIMQxBBbHFxVXi8W+5nX9DcAh9YNSQm/w==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, "node_modules/@mintlify/previewing": { - "version": "4.0.873", - "resolved": "https://registry.npmjs.org/@mintlify/previewing/-/previewing-4.0.873.tgz", - "integrity": "sha512-uq5d5q7LybSR/rB4Wzae6zFUqrtV8TNIcZAz8+nzD7bn5hLoYlGADk4HPg3a9Z6tZXSoGumQFWzneDuO7b4q3w==", + "version": "4.0.1047", + "resolved": "https://registry.npmjs.org/@mintlify/previewing/-/previewing-4.0.1047.tgz", + "integrity": "sha512-4/k7a/kXkD8LK7nHvRGEPCvigpeunFk2Ku07wlXLR4tB8OEG6v5ZjLFKVHArd+UuRmjHB/oBcCht3DARaizPOw==", "license": "Elastic-2.0", "dependencies": { - "@mintlify/common": "1.0.682", - "@mintlify/prebuild": "1.0.817", - "@mintlify/validation": "0.1.568", + "@mintlify/common": "1.0.844", + "@mintlify/prebuild": "1.0.986", + "@mintlify/validation": "0.1.660", + "adm-zip": "0.5.16", "better-opn": "3.0.2", "chalk": "5.2.0", "chokidar": "3.5.3", @@ -2194,6 +2520,20 @@ "node": ">=18.0.0" } }, + "node_modules/@mintlify/scraping/node_modules/@floating-ui/react-dom": { + "version": "2.1.8", + "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.8.tgz", + "integrity": "sha512-cC52bHwM/n/CxS87FH0yWdngEZrjdtLW/qVruo68qg+prK7ZQ4YGdut2GyDVpoGeAYe/h899rVeOVm6Oi40k2A==", + "license": "MIT", + "peer": true, + "dependencies": { + "@floating-ui/dom": "^1.7.6" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, "node_modules/@mintlify/scraping/node_modules/@mintlify/common": { "version": "1.0.661", "resolved": "https://registry.npmjs.org/@mintlify/common/-/common-1.0.661.tgz", @@ -2367,71 +2707,315 @@ "zod-to-json-schema": "3.20.4" } }, - "node_modules/@mintlify/scraping/node_modules/axios": { - "version": "1.10.0", - "resolved": "https://registry.npmjs.org/axios/-/axios-1.10.0.tgz", - "integrity": "sha512-/1xYAC4MP/HEG+3duIhFr4ZQXR4sQXOIe+o6sdqzeykGLx6Upp/1p8MHqhINOvGeP7xyNHe7tsiJByc4SSVUxw==", + "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-arrow": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz", + "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==", "license": "MIT", + "peer": true, "dependencies": { - "follow-redirects": "^1.15.6", - "form-data": "^4.0.0", - "proxy-from-env": "^1.1.0" + "@radix-ui/react-primitive": "2.1.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } } }, - "node_modules/@mintlify/scraping/node_modules/fs-extra": { - "version": "11.1.1", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.1.tgz", - "integrity": "sha512-MGIE4HOvQCeUCzmlHs0vXpih4ysz4wg9qiSAu6cd42lVwPbTM1TjV7RusoyQqMmk/95gdQZX72u+YW+c3eEpFQ==", + "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-dismissable-layer": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz", + "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==", "license": "MIT", + "peer": true, "dependencies": { - "graceful-fs": "^4.2.0", - "jsonfile": "^6.0.1", - "universalify": "^2.0.0" + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-escape-keydown": "1.1.1" }, - "engines": { - "node": ">=14.14" + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } } }, - "node_modules/@mintlify/scraping/node_modules/mdast-util-mdx-jsx": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.1.3.tgz", - "integrity": "sha512-bfOjvNt+1AcbPLTFMFWY149nJz0OjmewJs3LQQ5pIyVGxP4CdOqNVJL6kTaM5c68p8q82Xv3nCyFfUnuEcH3UQ==", + "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-focus-scope": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz", + "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==", "license": "MIT", + "peer": true, "dependencies": { - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "@types/unist": "^3.0.0", - "ccount": "^2.0.0", - "devlop": "^1.1.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0", - "parse-entities": "^4.0.0", - "stringify-entities": "^4.0.0", - "unist-util-stringify-position": "^4.0.0", - "vfile-message": "^4.0.0" + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1" }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-popover": { + "version": "1.1.15", + "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz", + "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==", + "license": "MIT", + "peer": true, + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-dismissable-layer": "1.1.11", + "@radix-ui/react-focus-guards": "1.1.3", + "@radix-ui/react-focus-scope": "1.1.7", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-popper": "1.2.8", + "@radix-ui/react-portal": "1.1.9", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "aria-hidden": "^1.2.4", + "react-remove-scroll": "^2.6.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-popper": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz", + "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==", + "license": "MIT", + "peer": true, + "dependencies": { + "@floating-ui/react-dom": "^2.0.0", + "@radix-ui/react-arrow": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-layout-effect": "1.1.1", + "@radix-ui/react-use-rect": "1.1.1", + "@radix-ui/react-use-size": "1.1.1", + "@radix-ui/rect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-portal": { + "version": "1.1.9", + "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz", + "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==", + "license": "MIT", + "peer": true, + "dependencies": { + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-presence": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", + "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", + "license": "MIT", + "peer": true, + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-primitive": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", + "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", + "license": "MIT", + "peer": true, + "dependencies": { + "@radix-ui/react-slot": "1.2.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/scraping/node_modules/axios": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.10.0.tgz", + "integrity": "sha512-/1xYAC4MP/HEG+3duIhFr4ZQXR4sQXOIe+o6sdqzeykGLx6Upp/1p8MHqhINOvGeP7xyNHe7tsiJByc4SSVUxw==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.6", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/@mintlify/scraping/node_modules/fs-extra": { + "version": "11.1.1", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.1.tgz", + "integrity": "sha512-MGIE4HOvQCeUCzmlHs0vXpih4ysz4wg9qiSAu6cd42lVwPbTM1TjV7RusoyQqMmk/95gdQZX72u+YW+c3eEpFQ==", + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + }, + "engines": { + "node": ">=14.14" + } + }, + "node_modules/@mintlify/scraping/node_modules/glob-parent": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", + "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", + "license": "ISC", + "dependencies": { + "is-glob": "^4.0.3" + }, + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/@mintlify/scraping/node_modules/lilconfig": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-2.1.0.tgz", + "integrity": "sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, + "node_modules/@mintlify/scraping/node_modules/mdast-util-mdx-jsx": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.1.3.tgz", + "integrity": "sha512-bfOjvNt+1AcbPLTFMFWY149nJz0OjmewJs3LQQ5pIyVGxP4CdOqNVJL6kTaM5c68p8q82Xv3nCyFfUnuEcH3UQ==", + "license": "MIT", + "dependencies": { + "@types/estree-jsx": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "@types/unist": "^3.0.0", + "ccount": "^2.0.0", + "devlop": "^1.1.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0", + "parse-entities": "^4.0.0", + "stringify-entities": "^4.0.0", + "unist-util-stringify-position": "^4.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/@mintlify/scraping/node_modules/next-mdx-remote-client": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.4.tgz", - "integrity": "sha512-psCMdO50tfoT1kAH7OGXZvhyRfiHVK6IqwjmWFV5gtLo4dnqjAgcjcLNeJ92iI26UNlKShxYrBs1GQ6UXxk97A==", + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.7.tgz", + "integrity": "sha512-12Ap5Z/tFIETMXFSBTH2IFEhJAso7MvOJ5ICyesA4q6FM4vtAcmb+4ZKa4tV1IVQJLBVqOhaEfIESZzdwjmrQQ==", "license": "MPL 2.0", "dependencies": { - "@babel/code-frame": "^7.27.1", + "@babel/code-frame": "^7.29.0", "@mdx-js/mdx": "^3.1.1", "@mdx-js/react": "^3.1.1", - "remark-mdx-remove-esm": "^1.2.1", - "serialize-error": "^12.0.0", + "remark-mdx-remove-esm": "^1.3.1", + "serialize-error": "^13.0.1", "vfile": "^6.0.3", "vfile-matter": "^5.0.1" }, "engines": { - "node": ">=18.18.0" + "node": ">=20.9.0" }, "peerDependencies": { "react": ">= 18.3.0 < 19.0.0", @@ -2489,40 +3073,157 @@ "loose-envify": "^1.1.0" } }, - "node_modules/@mintlify/validation": { - "version": "0.1.568", - "resolved": "https://registry.npmjs.org/@mintlify/validation/-/validation-0.1.568.tgz", - "integrity": "sha512-TsVBQKDU9okr9zncqN33hYAERMdF3a7kgvBNsJCkSqQmhSgtdZw7ZfJV0v1UBnBUZ96yhN4XUF7l4azgHVBLDw==", - "license": "Elastic-2.0", - "dependencies": { - "@mintlify/mdx": "^3.0.4", - "@mintlify/models": "0.0.259", - "arktype": "2.1.27", - "js-yaml": "4.1.0", - "lcm": "0.0.3", - "lodash": "4.17.21", - "object-hash": "3.0.0", - "openapi-types": "12.1.3", - "uuid": "11.1.0", - "zod": "3.21.4", - "zod-to-json-schema": "3.20.4" - } - }, - "node_modules/@mintlify/validation/node_modules/@mintlify/mdx": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/@mintlify/mdx/-/mdx-3.0.4.tgz", - "integrity": "sha512-tJhdpnM5ReJLNJ2fuDRIEr0zgVd6id7/oAIfs26V46QlygiLsc8qx4Rz3LWIX51rUXW/cfakjj0EATxIciIw+g==", + "node_modules/@mintlify/scraping/node_modules/tailwindcss": { + "version": "3.4.4", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.4.tgz", + "integrity": "sha512-ZoyXOdJjISB7/BcLTR6SEsLgKtDStYyYZVLsUtWChO4Ps20CBad7lfJKVDiejocV4ME1hLmyY0WJE3hSDcmQ2A==", "license": "MIT", "dependencies": { - "@shikijs/transformers": "^3.11.0", - "@shikijs/twoslash": "^3.12.2", - "arktype": "^2.1.26", - "hast-util-to-string": "^3.0.1", - "mdast-util-from-markdown": "^2.0.2", - "mdast-util-gfm": "^3.1.0", - "mdast-util-mdx-jsx": "^3.2.0", - "mdast-util-to-hast": "^13.2.0", - "next-mdx-remote-client": "^1.0.3", + "@alloc/quick-lru": "^5.2.0", + "arg": "^5.0.2", + "chokidar": "^3.5.3", + "didyoumean": "^1.2.2", + "dlv": "^1.1.3", + "fast-glob": "^3.3.0", + "glob-parent": "^6.0.2", + "is-glob": "^4.0.3", + "jiti": "^1.21.0", + "lilconfig": "^2.1.0", + "micromatch": "^4.0.5", + "normalize-path": "^3.0.0", + "object-hash": "^3.0.0", + "picocolors": "^1.0.0", + "postcss": "^8.4.23", + "postcss-import": "^15.1.0", + "postcss-js": "^4.0.1", + "postcss-load-config": "^4.0.1", + "postcss-nested": "^6.0.1", + "postcss-selector-parser": "^6.0.11", + "resolve": "^1.22.2", + "sucrase": "^3.32.0" + }, + "bin": { + "tailwind": "lib/cli.js", + "tailwindcss": "lib/cli.js" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@mintlify/scraping/node_modules/tailwindcss/node_modules/postcss-load-config": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.2.tgz", + "integrity": "sha512-bSVhyJGL00wMVoPUzAVAnbEoWyqRxkjv64tUl427SKnPrENtq6hJwUojroMz2VB+Q1edmi4IfrAPpami5VVgMQ==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "lilconfig": "^3.0.0", + "yaml": "^2.3.4" + }, + "engines": { + "node": ">= 14" + }, + "peerDependencies": { + "postcss": ">=8.0.9", + "ts-node": ">=9.0.0" + }, + "peerDependenciesMeta": { + "postcss": { + "optional": true + }, + "ts-node": { + "optional": true + } + } + }, + "node_modules/@mintlify/scraping/node_modules/tailwindcss/node_modules/postcss-load-config/node_modules/lilconfig": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz", + "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==", + "license": "MIT", + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/antonk52" + } + }, + "node_modules/@mintlify/scraping/node_modules/zod": { + "version": "3.21.4", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.21.4.tgz", + "integrity": "sha512-m46AKbrzKVzOzs/DZgVnG5H55N1sv1M8qZU3A8RIKbs3mrACDNeIOeilDymVb2HdmP8uwshOCF4uJ8uM9rCqJw==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/@mintlify/scraping/node_modules/zod-to-json-schema": { + "version": "3.20.4", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.20.4.tgz", + "integrity": "sha512-Un9+kInJ2Zt63n6Z7mLqBifzzPcOyX+b+Exuzf7L1+xqck9Q2EPByyTRduV3kmSPaXaRer1JCsucubpgL1fipg==", + "license": "ISC", + "peerDependencies": { + "zod": "^3.20.0" + } + }, + "node_modules/@mintlify/validation": { + "version": "0.1.660", + "resolved": "https://registry.npmjs.org/@mintlify/validation/-/validation-0.1.660.tgz", + "integrity": "sha512-IHlea3t9ZZcQMOfext3fZuG6/hXXTZPBFJkgeHA9lbG2OkdAVRbSMDY9FvC07sEEX1VQJX+bPimRaXUz/ujyYg==", + "license": "Elastic-2.0", + "dependencies": { + "@mintlify/mdx": "^3.0.4", + "@mintlify/models": "0.0.290", + "arktype": "2.1.27", + "js-yaml": "4.1.0", + "lcm": "0.0.3", + "lodash": "4.17.21", + "neotraverse": "0.6.18", + "object-hash": "3.0.0", + "openapi-types": "12.1.3", + "uuid": "11.1.0", + "zod": "3.24.0", + "zod-to-json-schema": "3.20.4" + } + }, + "node_modules/@mintlify/validation/node_modules/@floating-ui/react-dom": { + "version": "2.1.8", + "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.8.tgz", + "integrity": "sha512-cC52bHwM/n/CxS87FH0yWdngEZrjdtLW/qVruo68qg+prK7ZQ4YGdut2GyDVpoGeAYe/h899rVeOVm6Oi40k2A==", + "license": "MIT", + "peer": true, + "dependencies": { + "@floating-ui/dom": "^1.7.6" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@mintlify/validation/node_modules/@mintlify/mdx": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@mintlify/mdx/-/mdx-3.0.4.tgz", + "integrity": "sha512-tJhdpnM5ReJLNJ2fuDRIEr0zgVd6id7/oAIfs26V46QlygiLsc8qx4Rz3LWIX51rUXW/cfakjj0EATxIciIw+g==", + "license": "MIT", + "dependencies": { + "@shikijs/transformers": "^3.11.0", + "@shikijs/twoslash": "^3.12.2", + "arktype": "^2.1.26", + "hast-util-to-string": "^3.0.1", + "mdast-util-from-markdown": "^2.0.2", + "mdast-util-gfm": "^3.1.0", + "mdast-util-mdx-jsx": "^3.2.0", + "mdast-util-to-hast": "^13.2.0", + "next-mdx-remote-client": "^1.0.3", "rehype-katex": "^7.0.1", "remark-gfm": "^4.0.0", "remark-math": "^6.0.0", @@ -2532,9 +3233,232 @@ "unist-util-visit": "^5.0.0" }, "peerDependencies": { - "@radix-ui/react-popover": "^1.1.15", - "react": "^18.3.1", - "react-dom": "^18.3.1" + "@radix-ui/react-popover": "^1.1.15", + "react": "^18.3.1", + "react-dom": "^18.3.1" + } + }, + "node_modules/@mintlify/validation/node_modules/@radix-ui/react-arrow": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz", + "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==", + "license": "MIT", + "peer": true, + "dependencies": { + "@radix-ui/react-primitive": "2.1.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/validation/node_modules/@radix-ui/react-dismissable-layer": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz", + "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==", + "license": "MIT", + "peer": true, + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-escape-keydown": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/validation/node_modules/@radix-ui/react-focus-scope": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz", + "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==", + "license": "MIT", + "peer": true, + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/validation/node_modules/@radix-ui/react-popover": { + "version": "1.1.15", + "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz", + "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==", + "license": "MIT", + "peer": true, + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-dismissable-layer": "1.1.11", + "@radix-ui/react-focus-guards": "1.1.3", + "@radix-ui/react-focus-scope": "1.1.7", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-popper": "1.2.8", + "@radix-ui/react-portal": "1.1.9", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "aria-hidden": "^1.2.4", + "react-remove-scroll": "^2.6.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/validation/node_modules/@radix-ui/react-popper": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz", + "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==", + "license": "MIT", + "peer": true, + "dependencies": { + "@floating-ui/react-dom": "^2.0.0", + "@radix-ui/react-arrow": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-layout-effect": "1.1.1", + "@radix-ui/react-use-rect": "1.1.1", + "@radix-ui/react-use-size": "1.1.1", + "@radix-ui/rect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/validation/node_modules/@radix-ui/react-portal": { + "version": "1.1.9", + "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz", + "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==", + "license": "MIT", + "peer": true, + "dependencies": { + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/validation/node_modules/@radix-ui/react-presence": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", + "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", + "license": "MIT", + "peer": true, + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@mintlify/validation/node_modules/@radix-ui/react-primitive": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", + "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", + "license": "MIT", + "peer": true, + "dependencies": { + "@radix-ui/react-slot": "1.2.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } } }, "node_modules/@mintlify/validation/node_modules/mdast-util-gfm": { @@ -2557,21 +3481,21 @@ } }, "node_modules/@mintlify/validation/node_modules/next-mdx-remote-client": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.4.tgz", - "integrity": "sha512-psCMdO50tfoT1kAH7OGXZvhyRfiHVK6IqwjmWFV5gtLo4dnqjAgcjcLNeJ92iI26UNlKShxYrBs1GQ6UXxk97A==", + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.7.tgz", + "integrity": "sha512-12Ap5Z/tFIETMXFSBTH2IFEhJAso7MvOJ5ICyesA4q6FM4vtAcmb+4ZKa4tV1IVQJLBVqOhaEfIESZzdwjmrQQ==", "license": "MPL 2.0", "dependencies": { - "@babel/code-frame": "^7.27.1", + "@babel/code-frame": "^7.29.0", "@mdx-js/mdx": "^3.1.1", "@mdx-js/react": "^3.1.1", - "remark-mdx-remove-esm": "^1.2.1", - "serialize-error": "^12.0.0", + "remark-mdx-remove-esm": "^1.3.1", + "serialize-error": "^13.0.1", "vfile": "^6.0.3", "vfile-matter": "^5.0.1" }, "engines": { - "node": ">=18.18.0" + "node": ">=20.9.0" }, "peerDependencies": { "react": ">= 18.3.0 < 19.0.0", @@ -2615,6 +3539,24 @@ "loose-envify": "^1.1.0" } }, + "node_modules/@mintlify/validation/node_modules/zod": { + "version": "3.24.0", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.0.tgz", + "integrity": "sha512-Hz+wiY8yD0VLA2k/+nsg2Abez674dDGTai33SwNvMPuf9uIrBC9eFgIMQxBBbHFxVXi8W+5nX9DcAh9YNSQm/w==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/@mintlify/validation/node_modules/zod-to-json-schema": { + "version": "3.20.4", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.20.4.tgz", + "integrity": "sha512-Un9+kInJ2Zt63n6Z7mLqBifzzPcOyX+b+Exuzf7L1+xqck9Q2EPByyTRduV3kmSPaXaRer1JCsucubpgL1fipg==", + "license": "ISC", + "peerDependencies": { + "zod": "^3.20.0" + } + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -2659,6 +3601,15 @@ "fast-deep-equal": "^3.1.3" } }, + "node_modules/@posthog/core": { + "version": "1.7.1", + "resolved": "https://registry.npmjs.org/@posthog/core/-/core-1.7.1.tgz", + "integrity": "sha512-kjK0eFMIpKo9GXIbts8VtAknsoZ18oZorANdtuTj1CbgS28t4ZVq//HAWhnxEuXRTrtkd+SUJ6Ux3j2Af8NCuA==", + "license": "MIT", + "dependencies": { + "cross-spawn": "^7.0.6" + } + }, "node_modules/@puppeteer/browsers": { "version": "2.3.0", "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.3.0.tgz", @@ -2731,6 +3682,32 @@ "node": ">=8" } }, + "node_modules/@puppeteer/browsers/node_modules/tar-fs": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.2.tgz", + "integrity": "sha512-QGxxTxxyleAdyM3kpFs14ymbYmNFrfY+pHj7Z8FgtbZ7w2//VAgLMac7sT6nRpIHjppXO2AwwEOg0bPFVRcmXw==", + "license": "MIT", + "dependencies": { + "pump": "^3.0.0", + "tar-stream": "^3.1.5" + }, + "optionalDependencies": { + "bare-fs": "^4.0.1", + "bare-path": "^3.0.0" + } + }, + "node_modules/@puppeteer/browsers/node_modules/tar-stream": { + "version": "3.1.8", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.8.tgz", + "integrity": "sha512-U6QpVRyCGHva435KoNWy9PRoi2IFYCgtEhq9nmrPPpbRacPs9IH4aJ3gbrFC8dPcXvdSZ4XXfXT5Fshbp2MtlQ==", + "license": "MIT", + "dependencies": { + "b4a": "^1.6.4", + "bare-fs": "^4.5.5", + "fast-fifo": "^1.2.0", + "streamx": "^2.15.0" + } + }, "node_modules/@puppeteer/browsers/node_modules/yargs": { "version": "17.7.2", "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", @@ -2756,30 +3733,6 @@ "license": "MIT", "peer": true }, - "node_modules/@radix-ui/react-arrow": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz", - "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-primitive": "2.1.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, "node_modules/@radix-ui/react-compose-refs": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.2.tgz", @@ -2796,253 +3749,54 @@ } } }, - "node_modules/@radix-ui/react-context": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.2.tgz", - "integrity": "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==", - "license": "MIT", - "peer": true, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-dismissable-layer": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz", - "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-escape-keydown": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-focus-guards": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz", - "integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==", - "license": "MIT", - "peer": true, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-focus-scope": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz", - "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-id": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz", - "integrity": "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-popover": { - "version": "1.1.15", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz", - "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-popper": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz", - "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==", - "license": "MIT", - "peer": true, - "dependencies": { - "@floating-ui/react-dom": "^2.0.0", - "@radix-ui/react-arrow": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-layout-effect": "1.1.1", - "@radix-ui/react-use-rect": "1.1.1", - "@radix-ui/react-use-size": "1.1.1", - "@radix-ui/rect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-portal": { - "version": "1.1.9", - "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz", - "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==", + "node_modules/@radix-ui/react-context": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.2.tgz", + "integrity": "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==", "license": "MIT", "peer": true, - "dependencies": { - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, "peerDependencies": { "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true - }, - "@types/react-dom": { - "optional": true } } }, - "node_modules/@radix-ui/react-presence": { - "version": "1.1.5", - "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", - "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", + "node_modules/@radix-ui/react-focus-guards": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz", + "integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==", "license": "MIT", "peer": true, - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, "peerDependencies": { "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true - }, - "@types/react-dom": { - "optional": true } } }, - "node_modules/@radix-ui/react-primitive": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", - "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", + "node_modules/@radix-ui/react-id": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz", + "integrity": "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==", "license": "MIT", "peer": true, "dependencies": { - "@radix-ui/react-slot": "1.2.3" + "@radix-ui/react-use-layout-effect": "1.1.1" }, "peerDependencies": { "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true - }, - "@types/react-dom": { - "optional": true } } }, @@ -3201,12 +3955,12 @@ "peer": true }, "node_modules/@shikijs/core": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/@shikijs/core/-/core-3.21.0.tgz", - "integrity": "sha512-AXSQu/2n1UIQekY8euBJlvFYZIw0PHY63jUzGbrOma4wPxzznJXTXkri+QcHeBNaFxiiOljKxxJkVSoB3PjbyA==", + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/@shikijs/core/-/core-3.23.0.tgz", + "integrity": "sha512-NSWQz0riNb67xthdm5br6lAkvpDJRTgB36fxlo37ZzM2yq0PQFFzbd8psqC2XMPgCzo1fW6cVi18+ArJ44wqgA==", "license": "MIT", "dependencies": { - "@shikijs/types": "3.21.0", + "@shikijs/types": "3.23.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4", "hast-util-to-html": "^9.0.5" @@ -3236,62 +3990,62 @@ } }, "node_modules/@shikijs/engine-javascript": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/@shikijs/engine-javascript/-/engine-javascript-3.21.0.tgz", - "integrity": "sha512-ATwv86xlbmfD9n9gKRiwuPpWgPENAWCLwYCGz9ugTJlsO2kOzhOkvoyV/UD+tJ0uT7YRyD530x6ugNSffmvIiQ==", + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/@shikijs/engine-javascript/-/engine-javascript-3.23.0.tgz", + "integrity": "sha512-aHt9eiGFobmWR5uqJUViySI1bHMqrAgamWE1TYSUoftkAeCCAiGawPMwM+VCadylQtF4V3VNOZ5LmfItH5f3yA==", "license": "MIT", "dependencies": { - "@shikijs/types": "3.21.0", + "@shikijs/types": "3.23.0", "@shikijs/vscode-textmate": "^10.0.2", "oniguruma-to-es": "^4.3.4" } }, "node_modules/@shikijs/engine-oniguruma": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/@shikijs/engine-oniguruma/-/engine-oniguruma-3.21.0.tgz", - "integrity": "sha512-OYknTCct6qiwpQDqDdf3iedRdzj6hFlOPv5hMvI+hkWfCKs5mlJ4TXziBG9nyabLwGulrUjHiCq3xCspSzErYQ==", + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/@shikijs/engine-oniguruma/-/engine-oniguruma-3.23.0.tgz", + "integrity": "sha512-1nWINwKXxKKLqPibT5f4pAFLej9oZzQTsby8942OTlsJzOBZ0MWKiwzMsd+jhzu8YPCHAswGnnN1YtQfirL35g==", "license": "MIT", "dependencies": { - "@shikijs/types": "3.21.0", + "@shikijs/types": "3.23.0", "@shikijs/vscode-textmate": "^10.0.2" } }, "node_modules/@shikijs/langs": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/@shikijs/langs/-/langs-3.21.0.tgz", - "integrity": "sha512-g6mn5m+Y6GBJ4wxmBYqalK9Sp0CFkUqfNzUy2pJglUginz6ZpWbaWjDB4fbQ/8SHzFjYbtU6Ddlp1pc+PPNDVA==", + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/@shikijs/langs/-/langs-3.23.0.tgz", + "integrity": "sha512-2Ep4W3Re5aB1/62RSYQInK9mM3HsLeB91cHqznAJMuylqjzNVAVCMnNWRHFtcNHXsoNRayP9z1qj4Sq3nMqYXg==", "license": "MIT", "dependencies": { - "@shikijs/types": "3.21.0" + "@shikijs/types": "3.23.0" } }, "node_modules/@shikijs/themes": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/@shikijs/themes/-/themes-3.21.0.tgz", - "integrity": "sha512-BAE4cr9EDiZyYzwIHEk7JTBJ9CzlPuM4PchfcA5ao1dWXb25nv6hYsoDiBq2aZK9E3dlt3WB78uI96UESD+8Mw==", + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/@shikijs/themes/-/themes-3.23.0.tgz", + "integrity": "sha512-5qySYa1ZgAT18HR/ypENL9cUSGOeI2x+4IvYJu4JgVJdizn6kG4ia5Q1jDEOi7gTbN4RbuYtmHh0W3eccOrjMA==", "license": "MIT", "dependencies": { - "@shikijs/types": "3.21.0" + "@shikijs/types": "3.23.0" } }, "node_modules/@shikijs/transformers": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/@shikijs/transformers/-/transformers-3.21.0.tgz", - "integrity": "sha512-CZwvCWWIiRRiFk9/JKzdEooakAP8mQDtBOQ1TKiCaS2E1bYtyBCOkUzS8akO34/7ufICQ29oeSfkb3tT5KtrhA==", + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/@shikijs/transformers/-/transformers-3.23.0.tgz", + "integrity": "sha512-F9msZVxdF+krQNSdQ4V+Ja5QemeAoTQ2jxt7nJCwhDsdF1JWS3KxIQXA3lQbyKwS3J61oHRUSv4jYWv3CkaKTQ==", "license": "MIT", "dependencies": { - "@shikijs/core": "3.21.0", - "@shikijs/types": "3.21.0" + "@shikijs/core": "3.23.0", + "@shikijs/types": "3.23.0" } }, "node_modules/@shikijs/twoslash": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/@shikijs/twoslash/-/twoslash-3.21.0.tgz", - "integrity": "sha512-iH360udAYON2JwfIldoCiMZr9MljuQA5QRBivKLpEuEpmVCSwrR+0WTQ0eS1ptgGBdH9weFiIsA5wJDzsEzTYg==", + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/@shikijs/twoslash/-/twoslash-3.23.0.tgz", + "integrity": "sha512-pNaLJWMA3LU7PhT8tm9OQBZ1epy0jmdgeJzntBtr1EVXLbHxGzTj3mnf9vOdcl84l96qnlJXkJ/NGXZYBpXl5g==", "license": "MIT", "dependencies": { - "@shikijs/core": "3.21.0", - "@shikijs/types": "3.21.0", + "@shikijs/core": "3.23.0", + "@shikijs/types": "3.23.0", "twoslash": "^0.3.6" }, "peerDependencies": { @@ -3299,9 +4053,9 @@ } }, "node_modules/@shikijs/types": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/@shikijs/types/-/types-3.21.0.tgz", - "integrity": "sha512-zGrWOxZ0/+0ovPY7PvBU2gIS9tmhSUUt30jAcNV0Bq0gb2S98gwfjIs1vxlmH5zM7/4YxLamT6ChlqqAJmPPjA==", + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/@shikijs/types/-/types-3.23.0.tgz", + "integrity": "sha512-3JZ5HXOZfYjsYSk0yPwBrkupyYSLpAE26Qc0HLghhZNGTZg/SKxXIIgoxOpmmeQP0RRSDJTk1/vPfw9tbw+jSQ==", "license": "MIT", "dependencies": { "@shikijs/vscode-textmate": "^10.0.2", @@ -3464,9 +4218,9 @@ } }, "node_modules/@stoplight/spectral-core": { - "version": "1.20.0", - "resolved": "https://registry.npmjs.org/@stoplight/spectral-core/-/spectral-core-1.20.0.tgz", - "integrity": "sha512-5hBP81nCC1zn1hJXL/uxPNRKNcB+/pEIHgCjPRpl/w/qy9yC9ver04tw1W0l/PMiv0UeB5dYgozXVQ4j5a6QQQ==", + "version": "1.21.0", + "resolved": "https://registry.npmjs.org/@stoplight/spectral-core/-/spectral-core-1.21.0.tgz", + "integrity": "sha512-oj4e/FrDLUhBRocIW+lRMKlJ/q/rDZw61HkLbTFsdMd+f/FTkli2xHNB1YC6n1mrMKjjvy7XlUuFkC7XxtgbWw==", "license": "Apache-2.0", "dependencies": { "@stoplight/better-ajv-errors": "1.0.3", @@ -3483,7 +4237,7 @@ "ajv-formats": "~2.1.1", "es-aggregate-error": "^1.0.7", "jsonpath-plus": "^10.3.0", - "lodash": "~4.17.21", + "lodash": "~4.17.23", "lodash.topath": "^4.5.2", "minimatch": "3.1.2", "nimma": "0.2.3", @@ -3508,6 +4262,12 @@ "node": "^12.20 || >=14.13" } }, + "node_modules/@stoplight/spectral-core/node_modules/lodash": { + "version": "4.17.23", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz", + "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==", + "license": "MIT" + }, "node_modules/@stoplight/spectral-core/node_modules/tslib": { "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", @@ -3759,9 +4519,9 @@ } }, "node_modules/@types/debug": { - "version": "4.1.12", - "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz", - "integrity": "sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==", + "version": "4.1.13", + "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.13.tgz", + "integrity": "sha512-KSVgmQmzMwPlmtljOomayoR89W4FynCAi3E8PPs7vmDVPe84hT+vGPKkJfThkmXs0x0jAaa9U8uW8bbfyS2fWw==", "license": "MIT", "dependencies": { "@types/ms": "*" @@ -3801,9 +4561,9 @@ } }, "node_modules/@types/http-cache-semantics": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.0.4.tgz", - "integrity": "sha512-1m0bIFVc7eJWyve9S0RnuRgcQqF/Xd5QsUZAZeQFr1Q3/p9JWoQQEqmVy+DPTNpGXwhgIetAoYF8JSc33q29QA==", + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.2.0.tgz", + "integrity": "sha512-L3LgimLHXtGkWikKnsPg0/VFx9OGZaC+eN1u4r+OB1XRqH3meBIAVC2zr1WdMH+RHmnRkqliQAOHNJ/E0j/e0Q==", "license": "MIT" }, "node_modules/@types/json-schema": { @@ -3849,18 +4609,18 @@ } }, "node_modules/@types/node": { - "version": "25.0.10", - "resolved": "https://registry.npmjs.org/@types/node/-/node-25.0.10.tgz", - "integrity": "sha512-zWW5KPngR/yvakJgGOmZ5vTBemDoSqF3AcV/LrO5u5wTWyEAVVh+IT39G4gtyAkh3CtTZs8aX/yRM82OfzHJRg==", + "version": "25.5.2", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.5.2.tgz", + "integrity": "sha512-tO4ZIRKNC+MDWV4qKVZe3Ql/woTnmHDr5JD8UI5hn2pwBrHEwOEMZK7WlNb5RKB6EoJ02gwmQS9OrjuFnZYdpg==", "license": "MIT", "dependencies": { - "undici-types": "~7.16.0" + "undici-types": "~7.18.0" } }, "node_modules/@types/react": { - "version": "19.2.9", - "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.9.tgz", - "integrity": "sha512-Lpo8kgb/igvMIPeNV2rsYKTgaORYdO1XGVZ4Qz3akwOj0ySGYMPlQWa8BaLn0G63D1aSaAQ5ldR06wCpChQCjA==", + "version": "19.2.14", + "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.14.tgz", + "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", "license": "MIT", "peer": true, "dependencies": { @@ -3890,12 +4650,12 @@ } }, "node_modules/@typescript/vfs": { - "version": "1.6.2", - "resolved": "https://registry.npmjs.org/@typescript/vfs/-/vfs-1.6.2.tgz", - "integrity": "sha512-hoBwJwcbKHmvd2QVebiytN1aELvpk9B74B4L1mFm/XT1Q/VOYAWl2vQ9AWRFtQq8zmz6enTpfTV8WRc4ATjW/g==", + "version": "1.6.4", + "resolved": "https://registry.npmjs.org/@typescript/vfs/-/vfs-1.6.4.tgz", + "integrity": "sha512-PJFXFS4ZJKiJ9Qiuix6Dz/OwEIqHD7Dme1UwZhTK11vR+5dqW2ACbdndWQexBzCx+CPuMe5WBYQWCsFyGlQLlQ==", "license": "MIT", "dependencies": { - "debug": "^4.1.1" + "debug": "^4.4.3" }, "peerDependencies": { "typescript": "*" @@ -3997,9 +4757,9 @@ } }, "node_modules/ajv": { - "version": "8.17.1", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", - "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "version": "8.18.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.18.0.tgz", + "integrity": "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==", "license": "MIT", "dependencies": { "fast-deep-equal": "^3.1.3", @@ -4053,9 +4813,9 @@ } }, "node_modules/ansi-escapes": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-7.2.0.tgz", - "integrity": "sha512-g6LhBsl+GBPRWGWsBtutpzBYuIIdBkLEvad5C/va/74Db018+5TZiyA26cZJAr3Rft5lprVqOIPxf5Vid6tqAw==", + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-7.3.0.tgz", + "integrity": "sha512-BvU8nYgGQBxcmMuEeUEmNTvrMVjJNSH7RgW24vXexN4Ven6qCvy4TntnvlnwnMLTVlcRQQdbRY8NKnaIoeWDNg==", "license": "MIT", "dependencies": { "environment": "^1.0.0" @@ -4305,9 +5065,9 @@ } }, "node_modules/b4a": { - "version": "1.7.3", - "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.7.3.tgz", - "integrity": "sha512-5Q2mfq2WfGuFp3uS//0s6baOJLMoVduPYVeNmDYxu5OUA1/cBfvr2RIS7vi62LdNj/urk1hfmj867I3qt6uZ7Q==", + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.8.0.tgz", + "integrity": "sha512-qRuSmNSkGQaHwNbM7J78Wwy+ghLEYF1zNrSeMxj4Kgw6y33O3mXcQ6Ie9fRvfU/YnxWkOchPXbaLb73TkIsfdg==", "license": "Apache-2.0", "peerDependencies": { "react-native-b4a": "*" @@ -4349,11 +5109,10 @@ } }, "node_modules/bare-fs": { - "version": "4.5.2", - "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz", - "integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==", + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.6.0.tgz", + "integrity": "sha512-2YkS7NuiJceSEbyEOdSNLE9tsGd+f4+f7C+Nik/MCk27SYdwIMPT/yRKvg++FZhQXgk0KWJKJyXX9RhVV0RGqA==", "license": "Apache-2.0", - "optional": true, "dependencies": { "bare-events": "^2.5.4", "bare-path": "^3.0.0", @@ -4374,11 +5133,10 @@ } }, "node_modules/bare-os": { - "version": "3.6.2", - "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz", - "integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==", + "version": "3.8.7", + "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.8.7.tgz", + "integrity": "sha512-G4Gr1UsGeEy2qtDTZwL7JFLo2wapUarz7iTMcYcMFdS89AIQuBoyjgXZz0Utv7uHs3xA9LckhVbeBi8lEQrC+w==", "license": "Apache-2.0", - "optional": true, "engines": { "bare": ">=1.14.0" } @@ -4388,25 +5146,28 @@ "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz", "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==", "license": "Apache-2.0", - "optional": true, "dependencies": { "bare-os": "^3.0.1" } }, "node_modules/bare-stream": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz", - "integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==", + "version": "2.12.0", + "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.12.0.tgz", + "integrity": "sha512-w28i8lkBgREV3rPXGbgK+BO66q+ZpKqRWrZLiCdmmUlLPrQ45CzkvRhN+7lnv00Gpi2zy5naRxnUFAxCECDm9g==", "license": "Apache-2.0", - "optional": true, "dependencies": { - "streamx": "^2.21.0" + "streamx": "^2.25.0", + "teex": "^1.0.1" }, "peerDependencies": { + "bare-abort-controller": "*", "bare-buffer": "*", "bare-events": "*" }, "peerDependenciesMeta": { + "bare-abort-controller": { + "optional": true + }, "bare-buffer": { "optional": true }, @@ -4416,11 +5177,10 @@ } }, "node_modules/bare-url": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz", - "integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==", + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.4.0.tgz", + "integrity": "sha512-NSTU5WN+fy/L0DDenfE8SXQna4voXuW0FHM7wH8i3/q9khUSchfPbPezO4zSFMnDGIf9YE+mt/RWhZgNRKRIXA==", "license": "Apache-2.0", - "optional": true, "dependencies": { "bare-path": "^3.0.0" } @@ -4455,9 +5215,9 @@ } }, "node_modules/basic-ftp": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.1.0.tgz", - "integrity": "sha512-RkaJzeJKDbaDWTIPiJwubyljaEPwpVWkm9Rt5h9Nd6h7tEXTJ3VB4qxdZBioV7JO5yLUaOKwz7vDOzlncUsegw==", + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.3.0.tgz", + "integrity": "sha512-5K9eNNn7ywHPsYnFwjKgYH8Hf8B5emh7JKcPaVjjrMJFQQwGpwowEnZNEtHs7DfR7hCZsmaK3VA4HUK0YarT+w==", "license": "MIT", "engines": { "node": ">=10.0.0" @@ -4487,6 +5247,18 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/bl": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", + "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", + "license": "MIT", + "optional": true, + "dependencies": { + "buffer": "^5.5.0", + "inherits": "^2.0.4", + "readable-stream": "^3.4.0" + } + }, "node_modules/body-parser": { "version": "1.20.1", "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.1.tgz", @@ -4539,9 +5311,9 @@ "license": "MIT" }, "node_modules/brace-expansion": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", - "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", + "version": "1.1.13", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.13.tgz", + "integrity": "sha512-9ZLprWS6EENmhEOpjCYW2c8VkmOvckIJZfkr7rBW6dObmfgJ/L1GpSYW5Hpo9lDz4D1+n0Ckz8rU7FwHDQiG/w==", "license": "MIT", "dependencies": { "balanced-match": "^1.0.0", @@ -5196,9 +5968,9 @@ } }, "node_modules/cosmiconfig": { - "version": "9.0.0", - "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.0.tgz", - "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==", + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.1.tgz", + "integrity": "sha512-hr4ihw+DBqcvrsEDioRO31Z17x71pUYoNe/4h6Z0wB72p7MU7/9gH8Q3s12NFhHPfYBBOV3qyfUxmr/Yn3shnQ==", "license": "MIT", "dependencies": { "env-paths": "^2.2.1", @@ -5221,6 +5993,20 @@ } } }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, "node_modules/cssesc": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz", @@ -5233,6 +6019,12 @@ "node": ">=4" } }, + "node_modules/cssfilter": { + "version": "0.0.10", + "resolved": "https://registry.npmjs.org/cssfilter/-/cssfilter-0.0.10.tgz", + "integrity": "sha512-FAaLDaplstoRsDR8XGYH51znUN0UY7nMc6Z9/fvE8EXGwvJE9hu7W2vHwx1+bd6gCYnln9nLbzxFTrcO9YQDZw==", + "license": "MIT" + }, "node_modules/csstype": { "version": "3.2.3", "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz", @@ -5384,6 +6176,16 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/deep-extend": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", + "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=4.0.0" + } + }, "node_modules/defer-to-connect": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-2.0.1.tgz", @@ -5745,9 +6547,9 @@ } }, "node_modules/es-abstract": { - "version": "1.24.1", - "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.24.1.tgz", - "integrity": "sha512-zHXBLhP+QehSSbsS9Pt23Gg964240DPd6QCf8WpkqEXxQ7fhdZzYsocOr5u7apWonsS5EjZDmTF+/slGMyasvw==", + "version": "1.24.2", + "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.24.2.tgz", + "integrity": "sha512-2FpH9Q5i2RRwyEP1AylXe6nYLR5OhaJTZwmlcP0dL/+JCbgg7yyEo/sEK6HeGZRf3dFpWwThaRHVApXSkW3xeg==", "license": "MIT", "dependencies": { "array-buffer-byte-length": "^1.0.2", @@ -5897,9 +6699,9 @@ } }, "node_modules/es-toolkit": { - "version": "1.44.0", - "resolved": "https://registry.npmjs.org/es-toolkit/-/es-toolkit-1.44.0.tgz", - "integrity": "sha512-6penXeZalaV88MM3cGkFZZfOoLGWshWWfdy0tWw/RlVVyhvMaWSBTOvXNeiW3e5FwdS5ePW0LGEu17zT139ktg==", + "version": "1.45.1", + "resolved": "https://registry.npmjs.org/es-toolkit/-/es-toolkit-1.45.1.tgz", + "integrity": "sha512-/jhoOj/Fx+A+IIyDNOvO3TItGmlMKhtX8ISAHKE90c4b/k1tqaqEZ+uUqfpU8DMnW5cgNJv606zS55jGvza0Xw==", "license": "MIT", "workspaces": [ "docs", @@ -6145,6 +6947,16 @@ "bare-events": "^2.7.0" } }, + "node_modules/expand-template": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", + "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==", + "license": "(MIT OR WTFPL)", + "optional": true, + "engines": { + "node": ">=6" + } + }, "node_modules/express": { "version": "4.18.2", "resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz", @@ -6412,6 +7224,9 @@ "cpu": [ "arm" ], + "libc": [ + "glibc" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -6428,6 +7243,9 @@ "cpu": [ "arm64" ], + "libc": [ + "glibc" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -6444,6 +7262,9 @@ "cpu": [ "s390x" ], + "libc": [ + "glibc" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -6460,6 +7281,9 @@ "cpu": [ "x64" ], + "libc": [ + "glibc" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -6476,6 +7300,9 @@ "cpu": [ "arm64" ], + "libc": [ + "musl" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -6492,6 +7319,9 @@ "cpu": [ "x64" ], + "libc": [ + "musl" + ], "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -6508,6 +7338,9 @@ "cpu": [ "arm" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -6530,6 +7363,9 @@ "cpu": [ "arm64" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -6552,6 +7388,9 @@ "cpu": [ "s390x" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -6574,6 +7413,9 @@ "cpu": [ "x64" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -6596,6 +7438,9 @@ "cpu": [ "arm64" ], + "libc": [ + "musl" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -6618,6 +7463,9 @@ "cpu": [ "x64" ], + "libc": [ + "musl" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -6784,9 +7632,9 @@ "license": "MIT" }, "node_modules/follow-redirects": { - "version": "1.15.11", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", - "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "version": "1.16.0", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.16.0.tgz", + "integrity": "sha512-y5rN/uOsadFT/JfYwhxRS5R7Qce+g3zG97+JrtFZlC9klX/W5hD7iiLzScI4nZqUS7DNUdhPgw4xI8W2LuXlUw==", "funding": [ { "type": "individual", @@ -6900,6 +7748,13 @@ "js-yaml": "bin/js-yaml.js" } }, + "node_modules/fs-constants": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", + "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", + "license": "MIT", + "optional": true + }, "node_modules/fs-extra": { "version": "11.2.0", "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz", @@ -7015,9 +7870,9 @@ } }, "node_modules/get-east-asian-width": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.4.0.tgz", - "integrity": "sha512-QZjmEOC+IT1uk6Rx0sX22V6uHWVwbdbxf1faPqJ1QhLdGgsRGCZoyaQBm/piRdJy/D2um6hM1UP7ZEeQ4EkP+Q==", + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.5.0.tgz", + "integrity": "sha512-CQ+bEO+Tva/qlmw24dCejulK5pMzVnUOFOijVogd3KQs07HnRIgp8TGipvCCRT06xeYEbpbgwaCxglFyiuIcmA==", "license": "MIT", "engines": { "node": ">=18" @@ -7116,6 +7971,13 @@ "node": ">= 14" } }, + "node_modules/github-from-package": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", + "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", + "license": "MIT", + "optional": true + }, "node_modules/glob-parent": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", @@ -7785,6 +8647,13 @@ "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", "license": "ISC" }, + "node_modules/ini": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", + "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==", + "license": "ISC", + "optional": true + }, "node_modules/ink": { "version": "6.3.0", "resolved": "https://registry.npmjs.org/ink/-/ink-6.3.0.tgz", @@ -8537,6 +9406,12 @@ "integrity": "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==", "license": "MIT" }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "license": "ISC" + }, "node_modules/jiti": { "version": "1.21.7", "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.7.tgz", @@ -8546,6 +9421,15 @@ "jiti": "bin/jiti.js" } }, + "node_modules/jose": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.2.tgz", + "integrity": "sha512-d7kPDd34KO/YnzaDOlikGpOurfF0ByC2sEV4cANCtdqLlTfBlw2p14O/5d/zv40gJPbIQxfES3nSx1/oYNyuZQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/panva" + } + }, "node_modules/js-tokens": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", @@ -8610,9 +9494,9 @@ } }, "node_modules/jsonpath-plus": { - "version": "10.3.0", - "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz", - "integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==", + "version": "10.4.0", + "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.4.0.tgz", + "integrity": "sha512-T92WWatJXmhBbKsgH/0hl+jxjdXrifi5IKeMY02DWggRxX0UElcbVzPlmgLTbvsPeW1PasQ6xE2Q75stkhGbsA==", "license": "MIT", "dependencies": { "@jsep-plugin/assignment": "^1.3.0", @@ -8637,9 +9521,9 @@ } }, "node_modules/katex": { - "version": "0.16.28", - "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.28.tgz", - "integrity": "sha512-YHzO7721WbmAL6Ov1uzN/l5mY5WWWhJBSW+jq4tkfZfsxmo1hu6frS0EOswvjBUnWE6NtjEs48SFn5CQESRLZg==", + "version": "0.16.45", + "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.45.tgz", + "integrity": "sha512-pQpZbdBu7wCTmQUh7ufPmLr0pFoObnGUoL/yhtwJDgmmQpbkg/0HSVti25Fu4rmd1oCR6NGWe9vqTWuWv3GcNA==", "funding": [ "https://opencollective.com/katex", "https://github.com/sponsors/katex" @@ -8652,6 +9536,18 @@ "katex": "cli.js" } }, + "node_modules/keytar": { + "version": "7.9.0", + "resolved": "https://registry.npmjs.org/keytar/-/keytar-7.9.0.tgz", + "integrity": "sha512-VPD8mtVtm5JNtA2AErl6Chp06JBfy7diFQ7TQQhdpWOl6MrCRB+eRbvAZUsbGQS9kiMq0coJsy0W0vHpDCkWsQ==", + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "dependencies": { + "node-addon-api": "^4.3.0", + "prebuild-install": "^7.0.1" + } + }, "node_modules/keyv": { "version": "4.5.4", "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", @@ -8683,12 +9579,15 @@ } }, "node_modules/lilconfig": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-2.1.0.tgz", - "integrity": "sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==", + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz", + "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==", "license": "MIT", "engines": { - "node": ">=10" + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/antonk52" } }, "node_modules/lines-and-columns": { @@ -9973,6 +10872,16 @@ "node": "*" } }, + "node_modules/minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "license": "MIT", + "optional": true, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/minipass": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/minipass/-/minipass-5.0.0.tgz", @@ -10008,15 +10917,14 @@ } }, "node_modules/mintlify": { - "version": "4.2.296", - "resolved": "https://registry.npmjs.org/mintlify/-/mintlify-4.2.296.tgz", - "integrity": "sha512-OX9eflJeueAnCEZQoQw9Q3YzJXoi5pK8W94DmDxSnsog9mJeVnsTrOXHAnN0Qq1Zxw+hM8ylxzO3qAk7ERxIZA==", + "version": "4.2.500", + "resolved": "https://registry.npmjs.org/mintlify/-/mintlify-4.2.500.tgz", + "integrity": "sha512-pVuzf4F+JRmVCuQZLQebIlggCzWQyHsnPiAbuUoJ8aofsKbbs30woRQznoeCmzgmzDxBk25xPay9yy4GRPRlOw==", "license": "Elastic-2.0", "dependencies": { - "@mintlify/cli": "4.0.900" + "@mintlify/cli": "4.0.1103" }, "bin": { - "mint": "index.js", "mintlify": "index.js" }, "engines": { @@ -10041,6 +10949,13 @@ "node": ">=10" } }, + "node_modules/mkdirp-classic": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", + "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==", + "license": "MIT", + "optional": true + }, "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", @@ -10085,6 +11000,13 @@ "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" } }, + "node_modules/napi-build-utils": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz", + "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==", + "license": "MIT", + "optional": true + }, "node_modules/negotiator": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", @@ -10104,9 +11026,9 @@ } }, "node_modules/netmask": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.0.2.tgz", - "integrity": "sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==", + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.1.0.tgz", + "integrity": "sha512-z9sZrk6wyf8/NDKKqe+Tyl58XtgkYrV4kgt1O8xrzYvpl1LvPacPo0imMLHfpStk3kgCIq1ksJ2bmJn9hue2lQ==", "license": "MIT", "engines": { "node": ">= 0.4.0" @@ -10144,6 +11066,26 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/node-abi": { + "version": "3.89.0", + "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.89.0.tgz", + "integrity": "sha512-6u9UwL0HlAl21+agMN3YAMXcKByMqwGx+pq+P76vii5f7hTPtKDp08/H9py6DY+cfDw7kQNTGEj/rly3IgbNQA==", + "license": "MIT", + "optional": true, + "dependencies": { + "semver": "^7.3.5" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/node-addon-api": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-4.3.0.tgz", + "integrity": "sha512-73sE9+3UaLYYFmDsFZnqCInzPyh3MqIwZO9cw58yIqAZhONrrabrYyYe3TuIqtIiOuTXVhsGau8hcrhhwSsDIQ==", + "license": "MIT", + "optional": true + }, "node_modules/node-fetch": { "version": "2.6.7", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", @@ -10164,6 +11106,18 @@ } } }, + "node_modules/non-error": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/non-error/-/non-error-0.1.0.tgz", + "integrity": "sha512-TMB1uHiGsHRGv1uYclfhivcnf0/PdFp2pNqRxXjncaAsjYMoisaQJI+SSZCqRq+VliwRTC8tsMQfmrWjDMhkPQ==", + "license": "MIT", + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/normalize-path": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", @@ -10185,6 +11139,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/oauth4webapi": { + "version": "3.8.5", + "resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.5.tgz", + "integrity": "sha512-A8jmyUckVhRJj5lspguklcl90Ydqk61H3dcU0oLhH3Yv13KpAliKTt5hknpGGPZSSfOwGyraNEFmofDYH+1kSg==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/panva" + } + }, "node_modules/object-assign": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", @@ -10287,13 +11250,13 @@ "license": "MIT" }, "node_modules/oniguruma-to-es": { - "version": "4.3.4", - "resolved": "https://registry.npmjs.org/oniguruma-to-es/-/oniguruma-to-es-4.3.4.tgz", - "integrity": "sha512-3VhUGN3w2eYxnTzHn+ikMI+fp/96KoRSVK9/kMTcFqj1NRDh2IhQCKvYxDnWePKRXY/AqH+Fuiyb7VHSzBjHfA==", + "version": "4.3.5", + "resolved": "https://registry.npmjs.org/oniguruma-to-es/-/oniguruma-to-es-4.3.5.tgz", + "integrity": "sha512-Zjygswjpsewa0NLTsiizVuMQZbp0MDyM6lIt66OxsF21npUDlzpHi1Mgb/qhQdkb+dWFTzJmFbEWdvZgRho8eQ==", "license": "MIT", "dependencies": { "oniguruma-parser": "^0.12.1", - "regex": "^6.0.1", + "regex": "^6.1.0", "regex-recursion": "^6.0.2" } }, @@ -10320,6 +11283,19 @@ "integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==", "license": "MIT" }, + "node_modules/openid-client": { + "version": "6.8.2", + "resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.2.tgz", + "integrity": "sha512-uOvTCndr4udZsKihJ68H9bUICrriHdUVJ6Az+4Ns6cW55rwM5h0bjVIzDz2SxgOI84LKjFyjOFvERLzdTUROGA==", + "license": "MIT", + "dependencies": { + "jose": "^6.1.3", + "oauth4webapi": "^3.8.4" + }, + "funding": { + "url": "https://github.com/sponsors/panva" + } + }, "node_modules/own-keys": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/own-keys/-/own-keys-1.0.1.tgz", @@ -10525,6 +11501,15 @@ "node": "^12.20.0 || ^14.13.1 || >=16.0.0" } }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, "node_modules/path-parse": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", @@ -10550,9 +11535,9 @@ "license": "ISC" }, "node_modules/picomatch": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", - "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz", + "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==", "license": "MIT", "engines": { "node": ">=8.6" @@ -10668,9 +11653,9 @@ } }, "node_modules/postcss-load-config": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.2.tgz", - "integrity": "sha512-bSVhyJGL00wMVoPUzAVAnbEoWyqRxkjv64tUl427SKnPrENtq6hJwUojroMz2VB+Q1edmi4IfrAPpami5VVgMQ==", + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-6.0.1.tgz", + "integrity": "sha512-oPtTM4oerL+UXmx+93ytZVN82RrlY/wPUV8IeDxFrzIjXOLF1pN+EmKPLbubvKHT2HC20xXsCAH2Z+CKV6Oz/g==", "funding": [ { "type": "opencollective", @@ -10683,37 +11668,32 @@ ], "license": "MIT", "dependencies": { - "lilconfig": "^3.0.0", - "yaml": "^2.3.4" + "lilconfig": "^3.1.1" }, "engines": { - "node": ">= 14" + "node": ">= 18" }, "peerDependencies": { + "jiti": ">=1.21.0", "postcss": ">=8.0.9", - "ts-node": ">=9.0.0" + "tsx": "^4.8.1", + "yaml": "^2.4.2" }, "peerDependenciesMeta": { + "jiti": { + "optional": true + }, "postcss": { "optional": true }, - "ts-node": { + "tsx": { + "optional": true + }, + "yaml": { "optional": true } } }, - "node_modules/postcss-load-config/node_modules/lilconfig": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz", - "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==", - "license": "MIT", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/antonk52" - } - }, "node_modules/postcss-nested": { "version": "6.2.0", "resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz", @@ -10745,19 +11725,59 @@ "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==", "license": "MIT", "dependencies": { - "cssesc": "^3.0.0", - "util-deprecate": "^1.0.2" + "cssesc": "^3.0.0", + "util-deprecate": "^1.0.2" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/postcss-value-parser": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz", + "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==", + "license": "MIT" + }, + "node_modules/posthog-node": { + "version": "5.17.2", + "resolved": "https://registry.npmjs.org/posthog-node/-/posthog-node-5.17.2.tgz", + "integrity": "sha512-lz3YJOr0Nmiz0yHASaINEDHqoV+0bC3eD8aZAG+Ky292dAnVYul+ga/dMX8KCBXg8hHfKdxw0SztYD5j6dgUqQ==", + "license": "MIT", + "dependencies": { + "@posthog/core": "1.7.1" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/prebuild-install": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", + "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==", + "deprecated": "No longer maintained. Please contact the author of the relevant native addon; alternatives are available.", + "license": "MIT", + "optional": true, + "dependencies": { + "detect-libc": "^2.0.0", + "expand-template": "^2.0.3", + "github-from-package": "0.0.0", + "minimist": "^1.2.3", + "mkdirp-classic": "^0.5.3", + "napi-build-utils": "^2.0.0", + "node-abi": "^3.3.0", + "pump": "^3.0.0", + "rc": "^1.2.7", + "simple-get": "^4.0.0", + "tar-fs": "^2.0.0", + "tunnel-agent": "^0.6.0" + }, + "bin": { + "prebuild-install": "bin.js" }, "engines": { - "node": ">=4" + "node": ">=10" } }, - "node_modules/postcss-value-parser": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz", - "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==", - "license": "MIT" - }, "node_modules/progress": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", @@ -10858,9 +11878,9 @@ } }, "node_modules/pump": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz", - "integrity": "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==", + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz", + "integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==", "license": "MIT", "dependencies": { "end-of-stream": "^1.1.0", @@ -10986,6 +12006,22 @@ "node": ">=0.10.0" } }, + "node_modules/rc": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", + "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", + "license": "(BSD-2-Clause OR MIT OR Apache-2.0)", + "optional": true, + "dependencies": { + "deep-extend": "^0.6.0", + "ini": "~1.3.0", + "minimist": "^1.2.0", + "strip-json-comments": "~2.0.1" + }, + "bin": { + "rc": "cli.js" + } + }, "node_modules/react": { "version": "19.2.3", "resolved": "https://registry.npmjs.org/react/-/react-19.2.3.tgz", @@ -10995,26 +12031,6 @@ "node": ">=0.10.0" } }, - "node_modules/react-dom": { - "version": "19.2.3", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.3.tgz", - "integrity": "sha512-yELu4WmLPw5Mr/lmeEpox5rw3RETacE++JgHqQzd2dg+YbJuat3jH4ingc+WPZhxaoFzdv9y33G+F7Nl5O0GBg==", - "license": "MIT", - "peer": true, - "dependencies": { - "scheduler": "^0.27.0" - }, - "peerDependencies": { - "react": "^19.2.3" - } - }, - "node_modules/react-dom/node_modules/scheduler": { - "version": "0.27.0", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz", - "integrity": "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==", - "license": "MIT", - "peer": true - }, "node_modules/react-reconciler": { "version": "0.32.0", "resolved": "https://registry.npmjs.org/react-reconciler/-/react-reconciler-0.32.0.tgz", @@ -11132,6 +12148,21 @@ "pify": "^2.3.0" } }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "optional": true, + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/readdirp": { "version": "3.6.0", "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", @@ -11436,9 +12467,9 @@ } }, "node_modules/remark-mdx-remove-esm": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/remark-mdx-remove-esm/-/remark-mdx-remove-esm-1.2.2.tgz", - "integrity": "sha512-YSaUwqiuJuD6S9XTAD6zmO4JJJZJgsRAdsl2drZO8/ssAVv0HXAg4vkSgHZAP46ORh8ERPFQrC7JWlbkwBwu1A==", + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/remark-mdx-remove-esm/-/remark-mdx-remove-esm-1.3.1.tgz", + "integrity": "sha512-POa8abdiuicD2e+zQkclxzJa5JEGLtV8XIOFVvisnGuw4l4xd6dfQozedwqR8JTeXQmxLebvYhlbwHoQP9RWkw==", "license": "MIT", "dependencies": { "@types/mdast": "^4.0.4", @@ -11805,9 +12836,9 @@ "license": "MIT" }, "node_modules/sax": { - "version": "1.4.4", - "resolved": "https://registry.npmjs.org/sax/-/sax-1.4.4.tgz", - "integrity": "sha512-1n3r/tGXO6b6VXMdFT54SHzT9ytu9yr7TaELowdYpMqY/Ao7EnlQGmAQ1+RatX7Tkkdm6hONI2owqNx2aZj5Sw==", + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/sax/-/sax-1.6.0.tgz", + "integrity": "sha512-6R3J5M4AcbtLUdZmRv2SygeVaM7IhrLXu9BmnOGmmACak8fiUtOsYNWUS4uK7upbmHIBbLBeFeI//477BKLBzA==", "license": "BlueOak-1.0.0", "engines": { "node": ">=11.0.0" @@ -11871,15 +12902,31 @@ "license": "MIT" }, "node_modules/serialize-error": { - "version": "12.0.0", - "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-12.0.0.tgz", - "integrity": "sha512-ZYkZLAvKTKQXWuh5XpBw7CdbSzagarX39WyZ2H07CDLC5/KfsRGlIXV8d4+tfqX1M7916mRqR1QfNHSij+c9Pw==", + "version": "13.0.1", + "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-13.0.1.tgz", + "integrity": "sha512-bBZaRwLH9PN5HbLCjPId4dP5bNGEtumcErgOX952IsvOhVPrm3/AeK1y0UHA/QaPG701eg0yEnOKsCOC6X/kaA==", "license": "MIT", "dependencies": { - "type-fest": "^4.31.0" + "non-error": "^0.1.0", + "type-fest": "^5.4.1" }, "engines": { - "node": ">=18" + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/serialize-error/node_modules/type-fest": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-5.5.0.tgz", + "integrity": "sha512-PlBfpQwiUvGViBNX84Yxwjsdhd1TUlXr6zjX7eoirtCPIr08NAmxwa+fcYBTeRQxHo9YC9wwF3m9i700sHma8g==", + "license": "(MIT OR CC0-1.0)", + "dependencies": { + "tagged-tag": "^1.0.0" + }, + "engines": { + "node": ">=20" }, "funding": { "url": "https://github.com/sponsors/sindresorhus" @@ -12008,9 +13055,9 @@ } }, "node_modules/sharp/node_modules/semver": { - "version": "7.7.3", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz", - "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==", + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", "license": "ISC", "bin": { "semver": "bin/semver.js" @@ -12019,18 +13066,39 @@ "node": ">=10" } }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, "node_modules/shiki": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/shiki/-/shiki-3.21.0.tgz", - "integrity": "sha512-N65B/3bqL/TI2crrXr+4UivctrAGEjmsib5rPMMPpFp1xAx/w03v8WZ9RDDFYteXoEgY7qZ4HGgl5KBIu1153w==", + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/shiki/-/shiki-3.23.0.tgz", + "integrity": "sha512-55Dj73uq9ZXL5zyeRPzHQsK7Nbyt6Y10k5s7OjuFZGMhpp4r/rsLBH0o/0fstIzX1Lep9VxefWljK/SKCzygIA==", "license": "MIT", "dependencies": { - "@shikijs/core": "3.21.0", - "@shikijs/engine-javascript": "3.21.0", - "@shikijs/engine-oniguruma": "3.21.0", - "@shikijs/langs": "3.21.0", - "@shikijs/themes": "3.21.0", - "@shikijs/types": "3.21.0", + "@shikijs/core": "3.23.0", + "@shikijs/engine-javascript": "3.23.0", + "@shikijs/engine-oniguruma": "3.23.0", + "@shikijs/langs": "3.23.0", + "@shikijs/themes": "3.23.0", + "@shikijs/types": "3.23.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } @@ -12119,6 +13187,27 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/simple-concat": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", + "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true + }, "node_modules/simple-eval": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/simple-eval/-/simple-eval-1.0.1.tgz", @@ -12131,6 +13220,32 @@ "node": ">=12" } }, + "node_modules/simple-get": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz", + "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true, + "dependencies": { + "decompress-response": "^6.0.0", + "once": "^1.3.1", + "simple-concat": "^1.0.0" + } + }, "node_modules/simple-swizzle": { "version": "0.2.4", "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.4.tgz", @@ -12222,9 +13337,9 @@ } }, "node_modules/socket.io-parser": { - "version": "4.2.5", - "resolved": "https://registry.npmjs.org/socket.io-parser/-/socket.io-parser-4.2.5.tgz", - "integrity": "sha512-bPMmpy/5WWKHea5Y/jYAP6k74A+hvmRCQaJuJB6I/ML5JZq/KfNieUVo/3Mh7SAqn7TyFdIo6wqYHInG1MU1bQ==", + "version": "4.2.6", + "resolved": "https://registry.npmjs.org/socket.io-parser/-/socket.io-parser-4.2.6.tgz", + "integrity": "sha512-asJqbVBDsBCJx0pTqw3WfesSY0iRX+2xzWEWzrpcH7L6fLzrhyF8WPI8UaeM4YCuDfpwA/cgsdugMsmtz8EJeg==", "license": "MIT", "dependencies": { "@socket.io/component-emitter": "~3.1.0", @@ -12357,9 +13472,9 @@ } }, "node_modules/streamx": { - "version": "2.23.0", - "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz", - "integrity": "sha512-kn+e44esVfn2Fa/O0CPFcex27fjIL6MkVae0Mm6q+E6f0hWv578YCERbv+4m02cjxvDsPKLnmxral/rR6lBMAg==", + "version": "2.25.0", + "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.25.0.tgz", + "integrity": "sha512-0nQuG6jf1w+wddNEEXCF4nTg3LtufWINB5eFEN+5TNZW7KWJp6x87+JFL43vaAUPyCfH1wID+mNVyW6OHtFamg==", "license": "MIT", "dependencies": { "events-universal": "^1.0.0", @@ -12367,6 +13482,16 @@ "text-decoder": "^1.1.0" } }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "optional": true, + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, "node_modules/string-width": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz", @@ -12455,12 +13580,12 @@ } }, "node_modules/strip-ansi": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.2.tgz", - "integrity": "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==", + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.2.0.tgz", + "integrity": "sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w==", "license": "MIT", "dependencies": { - "ansi-regex": "^6.0.1" + "ansi-regex": "^6.2.2" }, "engines": { "node": ">=12" @@ -12469,6 +13594,16 @@ "url": "https://github.com/chalk/strip-ansi?sponsor=1" } }, + "node_modules/strip-json-comments": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", + "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/style-to-js": { "version": "1.1.21", "resolved": "https://registry.npmjs.org/style-to-js/-/style-to-js-1.1.21.tgz", @@ -12530,34 +13665,46 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/tagged-tag": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/tagged-tag/-/tagged-tag-1.0.0.tgz", + "integrity": "sha512-yEFYrVhod+hdNyx7g5Bnkkb0G6si8HJurOoOEgC8B/O0uXLHlaey/65KRv6cuWBNhBgHKAROVpc7QyYqE5gFng==", + "license": "MIT", + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/tailwindcss": { - "version": "3.4.4", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.4.tgz", - "integrity": "sha512-ZoyXOdJjISB7/BcLTR6SEsLgKtDStYyYZVLsUtWChO4Ps20CBad7lfJKVDiejocV4ME1hLmyY0WJE3hSDcmQ2A==", + "version": "3.4.19", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.19.tgz", + "integrity": "sha512-3ofp+LL8E+pK/JuPLPggVAIaEuhvIz4qNcf3nA1Xn2o/7fb7s/TYpHhwGDv1ZU3PkBluUVaF8PyCHcm48cKLWQ==", "license": "MIT", "dependencies": { "@alloc/quick-lru": "^5.2.0", "arg": "^5.0.2", - "chokidar": "^3.5.3", + "chokidar": "^3.6.0", "didyoumean": "^1.2.2", "dlv": "^1.1.3", - "fast-glob": "^3.3.0", + "fast-glob": "^3.3.2", "glob-parent": "^6.0.2", "is-glob": "^4.0.3", - "jiti": "^1.21.0", - "lilconfig": "^2.1.0", - "micromatch": "^4.0.5", + "jiti": "^1.21.7", + "lilconfig": "^3.1.3", + "micromatch": "^4.0.8", "normalize-path": "^3.0.0", "object-hash": "^3.0.0", - "picocolors": "^1.0.0", - "postcss": "^8.4.23", + "picocolors": "^1.1.1", + "postcss": "^8.4.47", "postcss-import": "^15.1.0", "postcss-js": "^4.0.1", - "postcss-load-config": "^4.0.1", - "postcss-nested": "^6.0.1", - "postcss-selector-parser": "^6.0.11", - "resolve": "^1.22.2", - "sucrase": "^3.32.0" + "postcss-load-config": "^4.0.2 || ^5.0 || ^6.0", + "postcss-nested": "^6.2.0", + "postcss-selector-parser": "^6.1.2", + "resolve": "^1.22.8", + "sucrase": "^3.35.0" }, "bin": { "tailwind": "lib/cli.js", @@ -12567,6 +13714,42 @@ "node": ">=14.0.0" } }, + "node_modules/tailwindcss/node_modules/chokidar": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", + "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==", + "license": "MIT", + "dependencies": { + "anymatch": "~3.1.2", + "braces": "~3.0.2", + "glob-parent": "~5.1.2", + "is-binary-path": "~2.1.0", + "is-glob": "~4.0.1", + "normalize-path": "~3.0.0", + "readdirp": "~3.6.0" + }, + "engines": { + "node": ">= 8.10.0" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + }, + "optionalDependencies": { + "fsevents": "~2.3.2" + } + }, + "node_modules/tailwindcss/node_modules/chokidar/node_modules/glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "license": "ISC", + "dependencies": { + "is-glob": "^4.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/tailwindcss/node_modules/glob-parent": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", @@ -12583,7 +13766,7 @@ "version": "6.1.15", "resolved": "https://registry.npmjs.org/tar/-/tar-6.1.15.tgz", "integrity": "sha512-/zKt9UyngnxIT/EAGYuxaMYgOIJiP81ab9ZfkILq4oNLPFX50qyYmu7jRj9qeXoxmJHjGlbH0+cm2uy1WCs10A==", - "deprecated": "Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exhorbitant rates) by contacting i@izs.me", + "deprecated": "Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", "license": "ISC", "dependencies": { "chownr": "^2.0.0", @@ -12598,34 +13781,55 @@ } }, "node_modules/tar-fs": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz", - "integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==", + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz", + "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==", "license": "MIT", + "optional": true, "dependencies": { + "chownr": "^1.1.1", + "mkdirp-classic": "^0.5.2", "pump": "^3.0.0", - "tar-stream": "^3.1.5" - }, - "optionalDependencies": { - "bare-fs": "^4.0.1", - "bare-path": "^3.0.0" + "tar-stream": "^2.1.4" } }, + "node_modules/tar-fs/node_modules/chownr": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", + "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==", + "license": "ISC", + "optional": true + }, "node_modules/tar-stream": { - "version": "3.1.7", - "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz", - "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==", + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", + "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", "license": "MIT", + "optional": true, "dependencies": { - "b4a": "^1.6.4", - "fast-fifo": "^1.2.0", - "streamx": "^2.15.0" + "bl": "^4.0.3", + "end-of-stream": "^1.4.1", + "fs-constants": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.1.1" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/teex": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/teex/-/teex-1.0.1.tgz", + "integrity": "sha512-eYE6iEI62Ni1H8oIa7KlDU6uQBtqr4Eajni3wX7rpfXD8ysFx8z0+dri+KWEPWpBsxXfxu58x/0jvTVT1ekOSg==", + "license": "MIT", + "dependencies": { + "streamx": "^2.12.5" } }, "node_modules/text-decoder": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.3.tgz", - "integrity": "sha512-3/o9z3X0X0fTupwsYvR03pJ/DjWuqqrfwBgTQzdWDiQSm9KitAyz/9WqsT2JQW7KV2m+bC2ol/zqpW37NHxLaA==", + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.7.tgz", + "integrity": "sha512-vlLytXkeP4xvEq2otHeJfSQIRyWxo/oZGEbXrtEEF9Hnmrdly59sUbzZ/QgyWuLYHctCHxFF4tRQZNQ9k60ExQ==", "license": "Apache-2.0", "dependencies": { "b4a": "^1.6.4" @@ -12659,13 +13863,13 @@ "license": "MIT" }, "node_modules/tinyglobby": { - "version": "0.2.15", - "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", - "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==", + "version": "0.2.16", + "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.16.tgz", + "integrity": "sha512-pn99VhoACYR8nFHhxqix+uvsbXineAasWm5ojXoN8xEwK5Kd3/TrhNn1wByuD52UxWRLy8pu+kRMniEi6Eq9Zg==", "license": "MIT", "dependencies": { "fdir": "^6.5.0", - "picomatch": "^4.0.3" + "picomatch": "^4.0.4" }, "engines": { "node": ">=12.0.0" @@ -12692,9 +13896,9 @@ } }, "node_modules/tinyglobby/node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "license": "MIT", "engines": { "node": ">=12" @@ -12778,6 +13982,19 @@ "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", "license": "0BSD" }, + "node_modules/tunnel-agent": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", + "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==", + "license": "Apache-2.0", + "optional": true, + "dependencies": { + "safe-buffer": "^5.0.1" + }, + "engines": { + "node": "*" + } + }, "node_modules/twoslash": { "version": "0.3.6", "resolved": "https://registry.npmjs.org/twoslash/-/twoslash-0.3.6.tgz", @@ -12939,9 +14156,9 @@ } }, "node_modules/undici-types": { - "version": "7.16.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", - "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", + "version": "7.18.2", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", + "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", "license": "MIT" }, "node_modules/unified": { @@ -13357,6 +14574,21 @@ "webidl-conversions": "^3.0.0" } }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, "node_modules/which-boxed-primitive": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/which-boxed-primitive/-/which-boxed-primitive-1.1.1.tgz", @@ -13543,9 +14775,9 @@ "license": "ISC" }, "node_modules/ws": { - "version": "8.19.0", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.19.0.tgz", - "integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==", + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz", + "integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==", "license": "MIT", "engines": { "node": ">=10.0.0" @@ -13585,6 +14817,28 @@ "node": ">=4.0" } }, + "node_modules/xss": { + "version": "1.0.15", + "resolved": "https://registry.npmjs.org/xss/-/xss-1.0.15.tgz", + "integrity": "sha512-FVdlVVC67WOIPvfOwhoMETV72f6GbW7aOabBC3WxN/oUdoEMDyLz4OgRv5/gck2ZeNqEQu+Tb0kloovXOfpYVg==", + "license": "MIT", + "dependencies": { + "commander": "^2.20.3", + "cssfilter": "0.0.10" + }, + "bin": { + "xss": "bin/xss" + }, + "engines": { + "node": ">= 0.10.0" + } + }, + "node_modules/xss/node_modules/commander": { + "version": "2.20.3", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", + "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==", + "license": "MIT" + }, "node_modules/y18n": { "version": "5.0.8", "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", @@ -13601,9 +14855,9 @@ "license": "ISC" }, "node_modules/yaml": { - "version": "2.8.2", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.2.tgz", - "integrity": "sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A==", + "version": "2.8.3", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.3.tgz", + "integrity": "sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==", "license": "ISC", "bin": { "yaml": "bin.mjs" @@ -13721,23 +14975,14 @@ "license": "MIT" }, "node_modules/zod": { - "version": "3.21.4", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.21.4.tgz", - "integrity": "sha512-m46AKbrzKVzOzs/DZgVnG5H55N1sv1M8qZU3A8RIKbs3mrACDNeIOeilDymVb2HdmP8uwshOCF4uJ8uM9rCqJw==", + "version": "4.3.6", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", + "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", "license": "MIT", "funding": { "url": "https://github.com/sponsors/colinhacks" } }, - "node_modules/zod-to-json-schema": { - "version": "3.20.4", - "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.20.4.tgz", - "integrity": "sha512-Un9+kInJ2Zt63n6Z7mLqBifzzPcOyX+b+Exuzf7L1+xqck9Q2EPByyTRduV3kmSPaXaRer1JCsucubpgL1fipg==", - "license": "ISC", - "peerDependencies": { - "zod": "^3.20.0" - } - }, "node_modules/zwitch": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz", diff --git a/docs/package.json b/docs 2/package.json similarity index 64% rename from docs/package.json rename to docs 2/package.json index a6c2ef20edd..4413f7c935b 100644 --- a/docs/package.json +++ b/docs 2/package.json @@ -1,6 +1,6 @@ { "dependencies": { - "mintlify": "^4.2.296", + "mintlify": "^4.2.500", "sharp": "^0.34.4" } } diff --git a/docs/quickstart-tutorial.mdx b/docs 2/quickstart-tutorial.mdx similarity index 100% rename from docs/quickstart-tutorial.mdx rename to docs 2/quickstart-tutorial.mdx diff --git a/docs/releases.mdx b/docs 2/releases.mdx similarity index 100% rename from docs/releases.mdx rename to docs 2/releases.mdx diff --git a/docs/rest/agents/create-agent.mdx b/docs 2/rest/agents/create-agent.mdx similarity index 100% rename from docs/rest/agents/create-agent.mdx rename to docs 2/rest/agents/create-agent.mdx diff --git a/docs/rest/agents/create-chatbot.mdx b/docs 2/rest/agents/create-chatbot.mdx similarity index 100% rename from docs/rest/agents/create-chatbot.mdx rename to docs 2/rest/agents/create-chatbot.mdx diff --git a/docs/rest/agents/create-skill.mdx b/docs 2/rest/agents/create-skill.mdx similarity index 100% rename from docs/rest/agents/create-skill.mdx rename to docs 2/rest/agents/create-skill.mdx diff --git a/docs/rest/agents/create.mdx b/docs 2/rest/agents/create.mdx similarity index 100% rename from docs/rest/agents/create.mdx rename to docs 2/rest/agents/create.mdx diff --git a/docs/rest/agents/delete-agent.mdx b/docs 2/rest/agents/delete-agent.mdx similarity index 100% rename from docs/rest/agents/delete-agent.mdx rename to docs 2/rest/agents/delete-agent.mdx diff --git a/docs/rest/agents/delete-chatbot.mdx b/docs 2/rest/agents/delete-chatbot.mdx similarity index 100% rename from docs/rest/agents/delete-chatbot.mdx rename to docs 2/rest/agents/delete-chatbot.mdx diff --git a/docs/rest/agents/delete-skill.mdx b/docs 2/rest/agents/delete-skill.mdx similarity index 100% rename from docs/rest/agents/delete-skill.mdx rename to docs 2/rest/agents/delete-skill.mdx diff --git a/docs/rest/agents/delete.mdx b/docs 2/rest/agents/delete.mdx similarity index 100% rename from docs/rest/agents/delete.mdx rename to docs 2/rest/agents/delete.mdx diff --git a/docs/rest/agents/get-agent.mdx b/docs 2/rest/agents/get-agent.mdx similarity index 100% rename from docs/rest/agents/get-agent.mdx rename to docs 2/rest/agents/get-agent.mdx diff --git a/docs/rest/agents/get-chatbot.mdx b/docs 2/rest/agents/get-chatbot.mdx similarity index 100% rename from docs/rest/agents/get-chatbot.mdx rename to docs 2/rest/agents/get-chatbot.mdx diff --git a/docs/rest/agents/get-skill.mdx b/docs 2/rest/agents/get-skill.mdx similarity index 100% rename from docs/rest/agents/get-skill.mdx rename to docs 2/rest/agents/get-skill.mdx diff --git a/docs/rest/agents/get.mdx b/docs 2/rest/agents/get.mdx similarity index 100% rename from docs/rest/agents/get.mdx rename to docs 2/rest/agents/get.mdx diff --git a/docs/rest/agents/list-agents.mdx b/docs 2/rest/agents/list-agents.mdx similarity index 100% rename from docs/rest/agents/list-agents.mdx rename to docs 2/rest/agents/list-agents.mdx diff --git a/docs/rest/agents/list-chatbots.mdx b/docs 2/rest/agents/list-chatbots.mdx similarity index 100% rename from docs/rest/agents/list-chatbots.mdx rename to docs 2/rest/agents/list-chatbots.mdx diff --git a/docs/rest/agents/list-skills.mdx b/docs 2/rest/agents/list-skills.mdx similarity index 100% rename from docs/rest/agents/list-skills.mdx rename to docs 2/rest/agents/list-skills.mdx diff --git a/docs/rest/agents/list.mdx b/docs 2/rest/agents/list.mdx similarity index 100% rename from docs/rest/agents/list.mdx rename to docs 2/rest/agents/list.mdx diff --git a/docs/rest/agents/query-agent.mdx b/docs 2/rest/agents/query-agent.mdx similarity index 100% rename from docs/rest/agents/query-agent.mdx rename to docs 2/rest/agents/query-agent.mdx diff --git a/docs/rest/agents/query.mdx b/docs 2/rest/agents/query.mdx similarity index 100% rename from docs/rest/agents/query.mdx rename to docs 2/rest/agents/query.mdx diff --git a/docs/rest/agents/update-agent.mdx b/docs 2/rest/agents/update-agent.mdx similarity index 100% rename from docs/rest/agents/update-agent.mdx rename to docs 2/rest/agents/update-agent.mdx diff --git a/docs/rest/agents/update-chatbot.mdx b/docs 2/rest/agents/update-chatbot.mdx similarity index 100% rename from docs/rest/agents/update-chatbot.mdx rename to docs 2/rest/agents/update-chatbot.mdx diff --git a/docs/rest/agents/update-skill.mdx b/docs 2/rest/agents/update-skill.mdx similarity index 100% rename from docs/rest/agents/update-skill.mdx rename to docs 2/rest/agents/update-skill.mdx diff --git a/docs/rest/agents/update.mdx b/docs 2/rest/agents/update.mdx similarity index 100% rename from docs/rest/agents/update.mdx rename to docs 2/rest/agents/update.mdx diff --git a/docs/rest/authentication.mdx b/docs 2/rest/authentication.mdx similarity index 100% rename from docs/rest/authentication.mdx rename to docs 2/rest/authentication.mdx diff --git a/docs/rest/databases/create-databases.mdx b/docs 2/rest/databases/create-databases.mdx similarity index 100% rename from docs/rest/databases/create-databases.mdx rename to docs 2/rest/databases/create-databases.mdx diff --git a/docs/rest/databases/delete-databases.mdx b/docs 2/rest/databases/delete-databases.mdx similarity index 100% rename from docs/rest/databases/delete-databases.mdx rename to docs 2/rest/databases/delete-databases.mdx diff --git a/docs/rest/databases/list-database.mdx b/docs 2/rest/databases/list-database.mdx similarity index 100% rename from docs/rest/databases/list-database.mdx rename to docs 2/rest/databases/list-database.mdx diff --git a/docs/rest/databases/list-databases.mdx b/docs 2/rest/databases/list-databases.mdx similarity index 100% rename from docs/rest/databases/list-databases.mdx rename to docs 2/rest/databases/list-databases.mdx diff --git a/docs/rest/databases/update-databases.mdx b/docs 2/rest/databases/update-databases.mdx similarity index 100% rename from docs/rest/databases/update-databases.mdx rename to docs 2/rest/databases/update-databases.mdx diff --git a/docs/rest/files/delete.mdx b/docs 2/rest/files/delete.mdx similarity index 100% rename from docs/rest/files/delete.mdx rename to docs 2/rest/files/delete.mdx diff --git a/docs/rest/files/list.mdx b/docs 2/rest/files/list.mdx similarity index 100% rename from docs/rest/files/list.mdx rename to docs 2/rest/files/list.mdx diff --git a/docs/rest/files/upload.mdx b/docs 2/rest/files/upload.mdx similarity index 100% rename from docs/rest/files/upload.mdx rename to docs 2/rest/files/upload.mdx diff --git a/docs/rest/jobs/create.mdx b/docs 2/rest/jobs/create.mdx similarity index 100% rename from docs/rest/jobs/create.mdx rename to docs 2/rest/jobs/create.mdx diff --git a/docs/rest/jobs/delete.mdx b/docs 2/rest/jobs/delete.mdx similarity index 100% rename from docs/rest/jobs/delete.mdx rename to docs 2/rest/jobs/delete.mdx diff --git a/docs/rest/jobs/get.mdx b/docs 2/rest/jobs/get.mdx similarity index 100% rename from docs/rest/jobs/get.mdx rename to docs 2/rest/jobs/get.mdx diff --git a/docs/rest/jobs/list.mdx b/docs 2/rest/jobs/list.mdx similarity index 100% rename from docs/rest/jobs/list.mdx rename to docs 2/rest/jobs/list.mdx diff --git a/docs/rest/knowledge_bases/alter.mdx b/docs 2/rest/knowledge_bases/alter.mdx similarity index 100% rename from docs/rest/knowledge_bases/alter.mdx rename to docs 2/rest/knowledge_bases/alter.mdx diff --git a/docs/rest/knowledge_bases/create.mdx b/docs 2/rest/knowledge_bases/create.mdx similarity index 100% rename from docs/rest/knowledge_bases/create.mdx rename to docs 2/rest/knowledge_bases/create.mdx diff --git a/docs/rest/knowledge_bases/delete.mdx b/docs 2/rest/knowledge_bases/delete.mdx similarity index 100% rename from docs/rest/knowledge_bases/delete.mdx rename to docs 2/rest/knowledge_bases/delete.mdx diff --git a/docs/rest/knowledge_bases/get.mdx b/docs 2/rest/knowledge_bases/get.mdx similarity index 100% rename from docs/rest/knowledge_bases/get.mdx rename to docs 2/rest/knowledge_bases/get.mdx diff --git a/docs/rest/knowledge_bases/insert.mdx b/docs 2/rest/knowledge_bases/insert.mdx similarity index 100% rename from docs/rest/knowledge_bases/insert.mdx rename to docs 2/rest/knowledge_bases/insert.mdx diff --git a/docs/rest/knowledge_bases/list.mdx b/docs 2/rest/knowledge_bases/list.mdx similarity index 100% rename from docs/rest/knowledge_bases/list.mdx rename to docs 2/rest/knowledge_bases/list.mdx diff --git a/docs/rest/knowledge_bases/query.mdx b/docs 2/rest/knowledge_bases/query.mdx similarity index 100% rename from docs/rest/knowledge_bases/query.mdx rename to docs 2/rest/knowledge_bases/query.mdx diff --git a/docs/rest/ml_engines/create.mdx b/docs 2/rest/ml_engines/create.mdx similarity index 100% rename from docs/rest/ml_engines/create.mdx rename to docs 2/rest/ml_engines/create.mdx diff --git a/docs/rest/ml_engines/delete.mdx b/docs 2/rest/ml_engines/delete.mdx similarity index 100% rename from docs/rest/ml_engines/delete.mdx rename to docs 2/rest/ml_engines/delete.mdx diff --git a/docs/rest/ml_engines/list.mdx b/docs 2/rest/ml_engines/list.mdx similarity index 100% rename from docs/rest/ml_engines/list.mdx rename to docs 2/rest/ml_engines/list.mdx diff --git a/docs/rest/models/delete-model.mdx b/docs 2/rest/models/delete-model.mdx similarity index 100% rename from docs/rest/models/delete-model.mdx rename to docs 2/rest/models/delete-model.mdx diff --git a/docs/rest/models/describe-model.mdx b/docs 2/rest/models/describe-model.mdx similarity index 100% rename from docs/rest/models/describe-model.mdx rename to docs 2/rest/models/describe-model.mdx diff --git a/docs/rest/models/finetune.mdx b/docs 2/rest/models/finetune.mdx similarity index 100% rename from docs/rest/models/finetune.mdx rename to docs 2/rest/models/finetune.mdx diff --git a/docs/rest/models/list-model.mdx b/docs 2/rest/models/list-model.mdx similarity index 100% rename from docs/rest/models/list-model.mdx rename to docs 2/rest/models/list-model.mdx diff --git a/docs/rest/models/list-models.mdx b/docs 2/rest/models/list-models.mdx similarity index 100% rename from docs/rest/models/list-models.mdx rename to docs 2/rest/models/list-models.mdx diff --git a/docs/rest/models/manage-model-versions.mdx b/docs 2/rest/models/manage-model-versions.mdx similarity index 100% rename from docs/rest/models/manage-model-versions.mdx rename to docs 2/rest/models/manage-model-versions.mdx diff --git a/docs/rest/models/query-model-joined-with-data.mdx b/docs 2/rest/models/query-model-joined-with-data.mdx similarity index 100% rename from docs/rest/models/query-model-joined-with-data.mdx rename to docs 2/rest/models/query-model-joined-with-data.mdx diff --git a/docs/rest/models/query-model.mdx b/docs 2/rest/models/query-model.mdx similarity index 100% rename from docs/rest/models/query-model.mdx rename to docs 2/rest/models/query-model.mdx diff --git a/docs/rest/models/retrain.mdx b/docs 2/rest/models/retrain.mdx similarity index 100% rename from docs/rest/models/retrain.mdx rename to docs 2/rest/models/retrain.mdx diff --git a/docs/rest/models/train-model.mdx b/docs 2/rest/models/train-model.mdx similarity index 100% rename from docs/rest/models/train-model.mdx rename to docs 2/rest/models/train-model.mdx diff --git a/docs/rest/overview.mdx b/docs 2/rest/overview.mdx similarity index 100% rename from docs/rest/overview.mdx rename to docs 2/rest/overview.mdx diff --git a/docs/rest/projects/create.mdx b/docs 2/rest/projects/create.mdx similarity index 100% rename from docs/rest/projects/create.mdx rename to docs 2/rest/projects/create.mdx diff --git a/docs/rest/projects/drop.mdx b/docs 2/rest/projects/drop.mdx similarity index 100% rename from docs/rest/projects/drop.mdx rename to docs 2/rest/projects/drop.mdx diff --git a/docs/rest/projects/get-project.mdx b/docs 2/rest/projects/get-project.mdx similarity index 100% rename from docs/rest/projects/get-project.mdx rename to docs 2/rest/projects/get-project.mdx diff --git a/docs/rest/projects/get-projects.mdx b/docs 2/rest/projects/get-projects.mdx similarity index 100% rename from docs/rest/projects/get-projects.mdx rename to docs 2/rest/projects/get-projects.mdx diff --git a/docs/rest/sql.mdx b/docs 2/rest/sql.mdx similarity index 51% rename from docs/rest/sql.mdx rename to docs 2/rest/sql.mdx index 0a8d5bd0f5b..4b8604931d0 100644 --- a/docs/rest/sql.mdx +++ b/docs 2/rest/sql.mdx @@ -20,6 +20,29 @@ String that contains the SQL query that needs to be executed. + + +Format of the response. Available options: +- `null` (default) - returns all data in a single JSON response +- `"sse"` - returns data as Server-Sent Events stream +- `"jsonlines"` - returns data as JSON Lines stream (one JSON object per line) + +Use `"sse"` or `"jsonlines"` for streaming large result sets to avoid loading all data into memory at once. + + + + + +Optional context object, e.g., `{"db": "mindsdb"}` to specify the database. + + + + + +Optional parameters for parameterized queries, e.g., `{"name": "value"}`. + + + ### Response @@ -55,9 +78,32 @@ curl --request POST \ { "query": "SELECT * FROM example_db.demo_data.home_rentals LIMIT 10;" } +' +``` +```shell Shell (Streaming with SSE) +curl --request POST \ + --url https://cloud.mindsdb.com/api/sql/query \ + --header 'Content-Type: application/json' \ + --data ' +{ + "query": "SELECT * FROM example_db.demo_data.home_rentals;", + "response_format": "sse" +} +' ``` +```shell Shell (Streaming with JSON Lines) +curl --request POST \ + --url https://cloud.mindsdb.com/api/sql/query \ + --header 'Content-Type: application/json' \ + --data ' +{ + "query": "SELECT * FROM example_db.demo_data.home_rentals;", + "response_format": "jsonlines" +} +' +``` ```python Python import requests @@ -70,8 +116,8 @@ resp = requests.post(url, json={'query': -```json Response - { +```json Response (Default) +{ "column_names": [ "sqft", "rental_price" @@ -90,7 +136,22 @@ resp = requests.post(url, json={'query': ] ], "type": "table" - } +} +``` + +```text Response (SSE format) +data: {"type": "table", "column_names": ["sqft", "rental_price"], "context": {"db": "mindsdb"}} + +data: [[917, 3901], [194, 2042]] + +data: [[543, 1871], [289, 1563]] + +``` + +```text Response (JSON Lines format) +{"type": "table", "column_names": ["sqft", "rental_price"], "context": {"db": "mindsdb"}} +[[917, 3901], [194, 2042]] +[[543, 1871], [289, 1563]] ``` diff --git a/docs/rest/tables/create-table.mdx b/docs 2/rest/tables/create-table.mdx similarity index 100% rename from docs/rest/tables/create-table.mdx rename to docs 2/rest/tables/create-table.mdx diff --git a/docs/rest/tables/delete-table.mdx b/docs 2/rest/tables/delete-table.mdx similarity index 100% rename from docs/rest/tables/delete-table.mdx rename to docs 2/rest/tables/delete-table.mdx diff --git a/docs/rest/tables/delete.mdx b/docs 2/rest/tables/delete.mdx similarity index 100% rename from docs/rest/tables/delete.mdx rename to docs 2/rest/tables/delete.mdx diff --git a/docs/rest/tables/insert.mdx b/docs 2/rest/tables/insert.mdx similarity index 100% rename from docs/rest/tables/insert.mdx rename to docs 2/rest/tables/insert.mdx diff --git a/docs/rest/tables/list-table.mdx b/docs 2/rest/tables/list-table.mdx similarity index 100% rename from docs/rest/tables/list-table.mdx rename to docs 2/rest/tables/list-table.mdx diff --git a/docs/rest/tables/list-tables.mdx b/docs 2/rest/tables/list-tables.mdx similarity index 100% rename from docs/rest/tables/list-tables.mdx rename to docs 2/rest/tables/list-tables.mdx diff --git a/docs/rest/tables/update.mdx b/docs 2/rest/tables/update.mdx similarity index 100% rename from docs/rest/tables/update.mdx rename to docs 2/rest/tables/update.mdx diff --git a/docs/rest/usage.mdx b/docs 2/rest/usage.mdx similarity index 100% rename from docs/rest/usage.mdx rename to docs 2/rest/usage.mdx diff --git a/docs/rest/views/create-view.mdx b/docs 2/rest/views/create-view.mdx similarity index 100% rename from docs/rest/views/create-view.mdx rename to docs 2/rest/views/create-view.mdx diff --git a/docs/rest/views/delete-views.mdx b/docs 2/rest/views/delete-views.mdx similarity index 100% rename from docs/rest/views/delete-views.mdx rename to docs 2/rest/views/delete-views.mdx diff --git a/docs/rest/views/list-view.mdx b/docs 2/rest/views/list-view.mdx similarity index 100% rename from docs/rest/views/list-view.mdx rename to docs 2/rest/views/list-view.mdx diff --git a/docs/rest/views/list-views.mdx b/docs 2/rest/views/list-views.mdx similarity index 100% rename from docs/rest/views/list-views.mdx rename to docs 2/rest/views/list-views.mdx diff --git a/docs/rest/views/update-view.mdx b/docs 2/rest/views/update-view.mdx similarity index 100% rename from docs/rest/views/update-view.mdx rename to docs 2/rest/views/update-view.mdx diff --git a/docs/sdks/community-sdk.mdx b/docs 2/sdks/community-sdk.mdx similarity index 100% rename from docs/sdks/community-sdk.mdx rename to docs 2/sdks/community-sdk.mdx diff --git a/docs/sdks/javascript/agents.mdx b/docs 2/sdks/javascript/agents.mdx similarity index 100% rename from docs/sdks/javascript/agents.mdx rename to docs 2/sdks/javascript/agents.mdx diff --git a/docs/sdks/javascript/batchQuery.mdx b/docs 2/sdks/javascript/batchQuery.mdx similarity index 100% rename from docs/sdks/javascript/batchQuery.mdx rename to docs 2/sdks/javascript/batchQuery.mdx diff --git a/docs/sdks/javascript/connect.mdx b/docs 2/sdks/javascript/connect.mdx similarity index 100% rename from docs/sdks/javascript/connect.mdx rename to docs 2/sdks/javascript/connect.mdx diff --git a/docs/sdks/javascript/create_database.mdx b/docs 2/sdks/javascript/create_database.mdx similarity index 100% rename from docs/sdks/javascript/create_database.mdx rename to docs 2/sdks/javascript/create_database.mdx diff --git a/docs/sdks/javascript/create_job.mdx b/docs 2/sdks/javascript/create_job.mdx similarity index 100% rename from docs/sdks/javascript/create_job.mdx rename to docs 2/sdks/javascript/create_job.mdx diff --git a/docs/sdks/javascript/create_ml_engine.mdx b/docs 2/sdks/javascript/create_ml_engine.mdx similarity index 100% rename from docs/sdks/javascript/create_ml_engine.mdx rename to docs 2/sdks/javascript/create_ml_engine.mdx diff --git a/docs/sdks/javascript/create_model.mdx b/docs 2/sdks/javascript/create_model.mdx similarity index 100% rename from docs/sdks/javascript/create_model.mdx rename to docs 2/sdks/javascript/create_model.mdx diff --git a/docs/sdks/javascript/create_project.mdx b/docs 2/sdks/javascript/create_project.mdx similarity index 100% rename from docs/sdks/javascript/create_project.mdx rename to docs 2/sdks/javascript/create_project.mdx diff --git a/docs/sdks/javascript/create_table.mdx b/docs 2/sdks/javascript/create_table.mdx similarity index 100% rename from docs/sdks/javascript/create_table.mdx rename to docs 2/sdks/javascript/create_table.mdx diff --git a/docs/sdks/javascript/create_view.mdx b/docs 2/sdks/javascript/create_view.mdx similarity index 100% rename from docs/sdks/javascript/create_view.mdx rename to docs 2/sdks/javascript/create_view.mdx diff --git a/docs/sdks/javascript/delete_file.mdx b/docs 2/sdks/javascript/delete_file.mdx similarity index 100% rename from docs/sdks/javascript/delete_file.mdx rename to docs 2/sdks/javascript/delete_file.mdx diff --git a/docs/sdks/javascript/delete_from.mdx b/docs 2/sdks/javascript/delete_from.mdx similarity index 100% rename from docs/sdks/javascript/delete_from.mdx rename to docs 2/sdks/javascript/delete_from.mdx diff --git a/docs/sdks/javascript/delete_table.mdx b/docs 2/sdks/javascript/delete_table.mdx similarity index 100% rename from docs/sdks/javascript/delete_table.mdx rename to docs 2/sdks/javascript/delete_table.mdx diff --git a/docs/sdks/javascript/describe.mdx b/docs 2/sdks/javascript/describe.mdx similarity index 100% rename from docs/sdks/javascript/describe.mdx rename to docs 2/sdks/javascript/describe.mdx diff --git a/docs/sdks/javascript/drop_database.mdx b/docs 2/sdks/javascript/drop_database.mdx similarity index 100% rename from docs/sdks/javascript/drop_database.mdx rename to docs 2/sdks/javascript/drop_database.mdx diff --git a/docs/sdks/javascript/drop_job.mdx b/docs 2/sdks/javascript/drop_job.mdx similarity index 100% rename from docs/sdks/javascript/drop_job.mdx rename to docs 2/sdks/javascript/drop_job.mdx diff --git a/docs/sdks/javascript/drop_ml_engine.mdx b/docs 2/sdks/javascript/drop_ml_engine.mdx similarity index 100% rename from docs/sdks/javascript/drop_ml_engine.mdx rename to docs 2/sdks/javascript/drop_ml_engine.mdx diff --git a/docs/sdks/javascript/drop_model.mdx b/docs 2/sdks/javascript/drop_model.mdx similarity index 100% rename from docs/sdks/javascript/drop_model.mdx rename to docs 2/sdks/javascript/drop_model.mdx diff --git a/docs/sdks/javascript/drop_project.mdx b/docs 2/sdks/javascript/drop_project.mdx similarity index 100% rename from docs/sdks/javascript/drop_project.mdx rename to docs 2/sdks/javascript/drop_project.mdx diff --git a/docs/sdks/javascript/drop_view.mdx b/docs 2/sdks/javascript/drop_view.mdx similarity index 100% rename from docs/sdks/javascript/drop_view.mdx rename to docs 2/sdks/javascript/drop_view.mdx diff --git a/docs/sdks/javascript/finetune.mdx b/docs 2/sdks/javascript/finetune.mdx similarity index 100% rename from docs/sdks/javascript/finetune.mdx rename to docs 2/sdks/javascript/finetune.mdx diff --git a/docs/sdks/javascript/get_database.mdx b/docs 2/sdks/javascript/get_database.mdx similarity index 100% rename from docs/sdks/javascript/get_database.mdx rename to docs 2/sdks/javascript/get_database.mdx diff --git a/docs/sdks/javascript/get_status.mdx b/docs 2/sdks/javascript/get_status.mdx similarity index 100% rename from docs/sdks/javascript/get_status.mdx rename to docs 2/sdks/javascript/get_status.mdx diff --git a/docs/sdks/javascript/insert_into_table.mdx b/docs 2/sdks/javascript/insert_into_table.mdx similarity index 100% rename from docs/sdks/javascript/insert_into_table.mdx rename to docs 2/sdks/javascript/insert_into_table.mdx diff --git a/docs/sdks/javascript/installation.mdx b/docs 2/sdks/javascript/installation.mdx similarity index 100% rename from docs/sdks/javascript/installation.mdx rename to docs 2/sdks/javascript/installation.mdx diff --git a/docs/sdks/javascript/join_on.mdx b/docs 2/sdks/javascript/join_on.mdx similarity index 100% rename from docs/sdks/javascript/join_on.mdx rename to docs 2/sdks/javascript/join_on.mdx diff --git a/docs/sdks/javascript/list_data_handlers.mdx b/docs 2/sdks/javascript/list_data_handlers.mdx similarity index 100% rename from docs/sdks/javascript/list_data_handlers.mdx rename to docs 2/sdks/javascript/list_data_handlers.mdx diff --git a/docs/sdks/javascript/list_databases.mdx b/docs 2/sdks/javascript/list_databases.mdx similarity index 100% rename from docs/sdks/javascript/list_databases.mdx rename to docs 2/sdks/javascript/list_databases.mdx diff --git a/docs/sdks/javascript/list_ml_engines.mdx b/docs 2/sdks/javascript/list_ml_engines.mdx similarity index 100% rename from docs/sdks/javascript/list_ml_engines.mdx rename to docs 2/sdks/javascript/list_ml_engines.mdx diff --git a/docs/sdks/javascript/list_ml_handlers.mdx b/docs 2/sdks/javascript/list_ml_handlers.mdx similarity index 100% rename from docs/sdks/javascript/list_ml_handlers.mdx rename to docs 2/sdks/javascript/list_ml_handlers.mdx diff --git a/docs/sdks/javascript/list_models.mdx b/docs 2/sdks/javascript/list_models.mdx similarity index 100% rename from docs/sdks/javascript/list_models.mdx rename to docs 2/sdks/javascript/list_models.mdx diff --git a/docs/sdks/javascript/list_projects.mdx b/docs 2/sdks/javascript/list_projects.mdx similarity index 100% rename from docs/sdks/javascript/list_projects.mdx rename to docs 2/sdks/javascript/list_projects.mdx diff --git a/docs/sdks/javascript/list_views.mdx b/docs 2/sdks/javascript/list_views.mdx similarity index 100% rename from docs/sdks/javascript/list_views.mdx rename to docs 2/sdks/javascript/list_views.mdx diff --git a/docs/sdks/javascript/manage-model-versions.mdx b/docs 2/sdks/javascript/manage-model-versions.mdx similarity index 100% rename from docs/sdks/javascript/manage-model-versions.mdx rename to docs 2/sdks/javascript/manage-model-versions.mdx diff --git a/docs/sdks/javascript/native_queries.mdx b/docs 2/sdks/javascript/native_queries.mdx similarity index 100% rename from docs/sdks/javascript/native_queries.mdx rename to docs 2/sdks/javascript/native_queries.mdx diff --git a/docs/sdks/javascript/overview.mdx b/docs 2/sdks/javascript/overview.mdx similarity index 100% rename from docs/sdks/javascript/overview.mdx rename to docs 2/sdks/javascript/overview.mdx diff --git a/docs/sdks/javascript/query.mdx b/docs 2/sdks/javascript/query.mdx similarity index 100% rename from docs/sdks/javascript/query.mdx rename to docs 2/sdks/javascript/query.mdx diff --git a/docs/sdks/javascript/query_files.mdx b/docs 2/sdks/javascript/query_files.mdx similarity index 100% rename from docs/sdks/javascript/query_files.mdx rename to docs 2/sdks/javascript/query_files.mdx diff --git a/docs/sdks/javascript/query_jobs.mdx b/docs 2/sdks/javascript/query_jobs.mdx similarity index 100% rename from docs/sdks/javascript/query_jobs.mdx rename to docs 2/sdks/javascript/query_jobs.mdx diff --git a/docs/sdks/javascript/query_table.mdx b/docs 2/sdks/javascript/query_table.mdx similarity index 100% rename from docs/sdks/javascript/query_table.mdx rename to docs 2/sdks/javascript/query_table.mdx diff --git a/docs/sdks/javascript/query_view.mdx b/docs 2/sdks/javascript/query_view.mdx similarity index 100% rename from docs/sdks/javascript/query_view.mdx rename to docs 2/sdks/javascript/query_view.mdx diff --git a/docs/sdks/javascript/retrain.mdx b/docs 2/sdks/javascript/retrain.mdx similarity index 100% rename from docs/sdks/javascript/retrain.mdx rename to docs 2/sdks/javascript/retrain.mdx diff --git a/docs/sdks/javascript/update_table.mdx b/docs 2/sdks/javascript/update_table.mdx similarity index 100% rename from docs/sdks/javascript/update_table.mdx rename to docs 2/sdks/javascript/update_table.mdx diff --git a/docs/sdks/javascript/upload_file.mdx b/docs 2/sdks/javascript/upload_file.mdx similarity index 100% rename from docs/sdks/javascript/upload_file.mdx rename to docs 2/sdks/javascript/upload_file.mdx diff --git a/docs/sdks/overview.mdx b/docs 2/sdks/overview.mdx similarity index 100% rename from docs/sdks/overview.mdx rename to docs 2/sdks/overview.mdx diff --git a/docs/sdks/python/agents.mdx b/docs 2/sdks/python/agents.mdx similarity index 96% rename from docs/sdks/python/agents.mdx rename to docs 2/sdks/python/agents.mdx index f5cf58e11ce..b6c170b90fc 100644 --- a/docs/sdks/python/agents.mdx +++ b/docs 2/sdks/python/agents.mdx @@ -307,6 +307,12 @@ This parameter defines the time the agent can take to come back with an answer. For example, when the `timeout` parameter is set to 10, the agent has 10 seconds to return an answer. If the agent takes longer than 10 seconds, it aborts the process and comes back with an answer indicating its failure to return an answer within the defined time interval. +### `mode` + +This parameter defines the agent's response style, allowing users to partially control the output format. Supported values include `text` and `sql`. + +When set, the agent will tailor its responses to match the specified format. Note that the agent may still adapt its output when necessary to ensure clarity or correctness. + ## Get Agents You can get an existing agent with the `get()` method. diff --git a/docs/sdks/python/connect.mdx b/docs 2/sdks/python/connect.mdx similarity index 100% rename from docs/sdks/python/connect.mdx rename to docs 2/sdks/python/connect.mdx diff --git a/docs/sdks/python/create_database.mdx b/docs 2/sdks/python/create_database.mdx similarity index 100% rename from docs/sdks/python/create_database.mdx rename to docs 2/sdks/python/create_database.mdx diff --git a/docs/sdks/python/create_job.mdx b/docs 2/sdks/python/create_job.mdx similarity index 100% rename from docs/sdks/python/create_job.mdx rename to docs 2/sdks/python/create_job.mdx diff --git a/docs/sdks/python/create_ml_engine.mdx b/docs 2/sdks/python/create_ml_engine.mdx similarity index 100% rename from docs/sdks/python/create_ml_engine.mdx rename to docs 2/sdks/python/create_ml_engine.mdx diff --git a/docs/sdks/python/create_model.mdx b/docs 2/sdks/python/create_model.mdx similarity index 100% rename from docs/sdks/python/create_model.mdx rename to docs 2/sdks/python/create_model.mdx diff --git a/docs/sdks/python/create_project.mdx b/docs 2/sdks/python/create_project.mdx similarity index 100% rename from docs/sdks/python/create_project.mdx rename to docs 2/sdks/python/create_project.mdx diff --git a/docs/sdks/python/create_table.mdx b/docs 2/sdks/python/create_table.mdx similarity index 100% rename from docs/sdks/python/create_table.mdx rename to docs 2/sdks/python/create_table.mdx diff --git a/docs/sdks/python/create_view.mdx b/docs 2/sdks/python/create_view.mdx similarity index 100% rename from docs/sdks/python/create_view.mdx rename to docs 2/sdks/python/create_view.mdx diff --git a/docs/sdks/python/delete_file.mdx b/docs 2/sdks/python/delete_file.mdx similarity index 100% rename from docs/sdks/python/delete_file.mdx rename to docs 2/sdks/python/delete_file.mdx diff --git a/docs/sdks/python/delete_from.mdx b/docs 2/sdks/python/delete_from.mdx similarity index 100% rename from docs/sdks/python/delete_from.mdx rename to docs 2/sdks/python/delete_from.mdx diff --git a/docs/sdks/python/delete_table.mdx b/docs 2/sdks/python/delete_table.mdx similarity index 100% rename from docs/sdks/python/delete_table.mdx rename to docs 2/sdks/python/delete_table.mdx diff --git a/docs/sdks/python/describe.mdx b/docs 2/sdks/python/describe.mdx similarity index 100% rename from docs/sdks/python/describe.mdx rename to docs 2/sdks/python/describe.mdx diff --git a/docs/sdks/python/drop_database.mdx b/docs 2/sdks/python/drop_database.mdx similarity index 100% rename from docs/sdks/python/drop_database.mdx rename to docs 2/sdks/python/drop_database.mdx diff --git a/docs/sdks/python/drop_job.mdx b/docs 2/sdks/python/drop_job.mdx similarity index 100% rename from docs/sdks/python/drop_job.mdx rename to docs 2/sdks/python/drop_job.mdx diff --git a/docs/sdks/python/drop_ml_engine.mdx b/docs 2/sdks/python/drop_ml_engine.mdx similarity index 100% rename from docs/sdks/python/drop_ml_engine.mdx rename to docs 2/sdks/python/drop_ml_engine.mdx diff --git a/docs/sdks/python/drop_model.mdx b/docs 2/sdks/python/drop_model.mdx similarity index 100% rename from docs/sdks/python/drop_model.mdx rename to docs 2/sdks/python/drop_model.mdx diff --git a/docs/sdks/python/drop_project.mdx b/docs 2/sdks/python/drop_project.mdx similarity index 100% rename from docs/sdks/python/drop_project.mdx rename to docs 2/sdks/python/drop_project.mdx diff --git a/docs/sdks/python/drop_view.mdx b/docs 2/sdks/python/drop_view.mdx similarity index 100% rename from docs/sdks/python/drop_view.mdx rename to docs 2/sdks/python/drop_view.mdx diff --git a/docs/sdks/python/finetune.mdx b/docs 2/sdks/python/finetune.mdx similarity index 100% rename from docs/sdks/python/finetune.mdx rename to docs 2/sdks/python/finetune.mdx diff --git a/docs/sdks/python/get-batch-predictions.mdx b/docs 2/sdks/python/get-batch-predictions.mdx similarity index 100% rename from docs/sdks/python/get-batch-predictions.mdx rename to docs 2/sdks/python/get-batch-predictions.mdx diff --git a/docs/sdks/python/get-single-prediction.mdx b/docs 2/sdks/python/get-single-prediction.mdx similarity index 100% rename from docs/sdks/python/get-single-prediction.mdx rename to docs 2/sdks/python/get-single-prediction.mdx diff --git a/docs/sdks/python/get_history.mdx b/docs 2/sdks/python/get_history.mdx similarity index 100% rename from docs/sdks/python/get_history.mdx rename to docs 2/sdks/python/get_history.mdx diff --git a/docs/sdks/python/get_status.mdx b/docs 2/sdks/python/get_status.mdx similarity index 100% rename from docs/sdks/python/get_status.mdx rename to docs 2/sdks/python/get_status.mdx diff --git a/docs/sdks/python/insert_into_table.mdx b/docs 2/sdks/python/insert_into_table.mdx similarity index 100% rename from docs/sdks/python/insert_into_table.mdx rename to docs 2/sdks/python/insert_into_table.mdx diff --git a/docs/sdks/python/installation.mdx b/docs 2/sdks/python/installation.mdx similarity index 100% rename from docs/sdks/python/installation.mdx rename to docs 2/sdks/python/installation.mdx diff --git a/docs/sdks/python/join_on.mdx b/docs 2/sdks/python/join_on.mdx similarity index 100% rename from docs/sdks/python/join_on.mdx rename to docs 2/sdks/python/join_on.mdx diff --git a/docs/sdks/python/knowledge_bases/create.mdx b/docs 2/sdks/python/knowledge_bases/create.mdx similarity index 100% rename from docs/sdks/python/knowledge_bases/create.mdx rename to docs 2/sdks/python/knowledge_bases/create.mdx diff --git a/docs/sdks/python/knowledge_bases/insert_data.mdx b/docs 2/sdks/python/knowledge_bases/insert_data.mdx similarity index 100% rename from docs/sdks/python/knowledge_bases/insert_data.mdx rename to docs 2/sdks/python/knowledge_bases/insert_data.mdx diff --git a/docs/sdks/python/knowledge_bases/overview.mdx b/docs 2/sdks/python/knowledge_bases/overview.mdx similarity index 100% rename from docs/sdks/python/knowledge_bases/overview.mdx rename to docs 2/sdks/python/knowledge_bases/overview.mdx diff --git a/docs/sdks/python/knowledge_bases/query.mdx b/docs 2/sdks/python/knowledge_bases/query.mdx similarity index 100% rename from docs/sdks/python/knowledge_bases/query.mdx rename to docs 2/sdks/python/knowledge_bases/query.mdx diff --git a/docs/sdks/python/list_data_handlers.mdx b/docs 2/sdks/python/list_data_handlers.mdx similarity index 100% rename from docs/sdks/python/list_data_handlers.mdx rename to docs 2/sdks/python/list_data_handlers.mdx diff --git a/docs/sdks/python/list_databases.mdx b/docs 2/sdks/python/list_databases.mdx similarity index 100% rename from docs/sdks/python/list_databases.mdx rename to docs 2/sdks/python/list_databases.mdx diff --git a/docs/sdks/python/list_jobs.mdx b/docs 2/sdks/python/list_jobs.mdx similarity index 100% rename from docs/sdks/python/list_jobs.mdx rename to docs 2/sdks/python/list_jobs.mdx diff --git a/docs/sdks/python/list_ml_engines.mdx b/docs 2/sdks/python/list_ml_engines.mdx similarity index 100% rename from docs/sdks/python/list_ml_engines.mdx rename to docs 2/sdks/python/list_ml_engines.mdx diff --git a/docs/sdks/python/list_ml_handlers.mdx b/docs 2/sdks/python/list_ml_handlers.mdx similarity index 100% rename from docs/sdks/python/list_ml_handlers.mdx rename to docs 2/sdks/python/list_ml_handlers.mdx diff --git a/docs/sdks/python/list_models.mdx b/docs 2/sdks/python/list_models.mdx similarity index 100% rename from docs/sdks/python/list_models.mdx rename to docs 2/sdks/python/list_models.mdx diff --git a/docs/sdks/python/list_projects.mdx b/docs 2/sdks/python/list_projects.mdx similarity index 100% rename from docs/sdks/python/list_projects.mdx rename to docs 2/sdks/python/list_projects.mdx diff --git a/docs/sdks/python/list_views.mdx b/docs 2/sdks/python/list_views.mdx similarity index 100% rename from docs/sdks/python/list_views.mdx rename to docs 2/sdks/python/list_views.mdx diff --git a/docs/sdks/python/manage-model-versions.mdx b/docs 2/sdks/python/manage-model-versions.mdx similarity index 100% rename from docs/sdks/python/manage-model-versions.mdx rename to docs 2/sdks/python/manage-model-versions.mdx diff --git a/docs/sdks/python/native_queries.mdx b/docs 2/sdks/python/native_queries.mdx similarity index 100% rename from docs/sdks/python/native_queries.mdx rename to docs 2/sdks/python/native_queries.mdx diff --git a/docs/sdks/python/overview.mdx b/docs 2/sdks/python/overview.mdx similarity index 100% rename from docs/sdks/python/overview.mdx rename to docs 2/sdks/python/overview.mdx diff --git a/docs/sdks/python/query_files.mdx b/docs 2/sdks/python/query_files.mdx similarity index 100% rename from docs/sdks/python/query_files.mdx rename to docs 2/sdks/python/query_files.mdx diff --git a/docs/sdks/python/query_projects.mdx b/docs 2/sdks/python/query_projects.mdx similarity index 100% rename from docs/sdks/python/query_projects.mdx rename to docs 2/sdks/python/query_projects.mdx diff --git a/docs/sdks/python/query_table.mdx b/docs 2/sdks/python/query_table.mdx similarity index 100% rename from docs/sdks/python/query_table.mdx rename to docs 2/sdks/python/query_table.mdx diff --git a/docs/sdks/python/query_view.mdx b/docs 2/sdks/python/query_view.mdx similarity index 100% rename from docs/sdks/python/query_view.mdx rename to docs 2/sdks/python/query_view.mdx diff --git a/docs/sdks/python/refresh_job.mdx b/docs 2/sdks/python/refresh_job.mdx similarity index 100% rename from docs/sdks/python/refresh_job.mdx rename to docs 2/sdks/python/refresh_job.mdx diff --git a/docs/sdks/python/refresh_model.mdx b/docs 2/sdks/python/refresh_model.mdx similarity index 100% rename from docs/sdks/python/refresh_model.mdx rename to docs 2/sdks/python/refresh_model.mdx diff --git a/docs/sdks/python/retrain.mdx b/docs 2/sdks/python/retrain.mdx similarity index 100% rename from docs/sdks/python/retrain.mdx rename to docs 2/sdks/python/retrain.mdx diff --git a/docs/sdks/python/update_table.mdx b/docs 2/sdks/python/update_table.mdx similarity index 100% rename from docs/sdks/python/update_table.mdx rename to docs 2/sdks/python/update_table.mdx diff --git a/docs/sdks/python/upload_file.mdx b/docs 2/sdks/python/upload_file.mdx similarity index 100% rename from docs/sdks/python/upload_file.mdx rename to docs 2/sdks/python/upload_file.mdx diff --git a/docs/setup/cloud/aws-marketplace.mdx b/docs 2/setup/cloud/aws-marketplace.mdx similarity index 100% rename from docs/setup/cloud/aws-marketplace.mdx rename to docs 2/setup/cloud/aws-marketplace.mdx diff --git a/docs/setup/community-deploys-mindsdb.mdx b/docs 2/setup/community-deploys-mindsdb.mdx similarity index 100% rename from docs/setup/community-deploys-mindsdb.mdx rename to docs 2/setup/community-deploys-mindsdb.mdx diff --git a/docs/setup/custom-config.mdx b/docs 2/setup/custom-config.mdx similarity index 92% rename from docs/setup/custom-config.mdx rename to docs 2/setup/custom-config.mdx index 89772412af0..0a76a038edc 100644 --- a/docs/setup/custom-config.mdx +++ b/docs 2/setup/custom-config.mdx @@ -191,6 +191,38 @@ Connection parameters for the MySQL API include: + + +The `mcp` section configures the [MCP server](/model-context-protocol/usage). + +```json +"api": { + "mcp": { + "cors": { + "enabled": true, + "allow_origins": [], + "allow_origin_regex": "https?://(localhost|127\\.0\\.0\\.1)(:\\d+)?", + "allow_headers": ["*"] + }, + "rate_limit": { + "enabled": false, + "requests_per_minute": 60 + }, + "dns_rebinding_protection": false + } +} +``` + +* `cors.enabled`: Enables CORS headers on MCP endpoints. Can also be set via `MINDSDB_MCP_CORS_ENABLED`. +* `cors.allow_origins`: List of allowed origins. Can also be set via `MINDSDB_MCP_ALLOW_ORIGINS` (comma-separated). +* `cors.allow_origin_regex`: Regex pattern for allowed origins. Can also be set via `MINDSDB_MCP_ALLOW_ORIGIN_REGEXP`. +* `cors.allow_headers`: List of allowed request headers. Can also be set via `MINDSDB_MCP_ALLOW_HEADERS` (comma-separated). +* `rate_limit.enabled`: Enables per-IP rate limiting. Can also be set via `MINDSDB_MCP_RATE_LIMIT_ENABLED`. +* `rate_limit.requests_per_minute`: Maximum number of requests per minute per IP. Can also be set via `MINDSDB_MCP_RATE_LIMIT_RPM`. +* `dns_rebinding_protection`: When `true`, the MCP transport validates the `Host` header against a list of known-safe hosts to prevent DNS rebinding attacks. Disabled by default (`false`). Enable it when running MindsDB locally and you want to restrict MCP access to `localhost` only. Can also be set via `MINDSDB_MCP_DNS_REBINDING_PROTECTION`. + + + #### `cache` diff --git a/docs/setup/environment-vars.mdx b/docs 2/setup/environment-vars.mdx similarity index 100% rename from docs/setup/environment-vars.mdx rename to docs 2/setup/environment-vars.mdx diff --git a/docs/setup/mindsdb-apis.mdx b/docs 2/setup/mindsdb-apis.mdx similarity index 100% rename from docs/setup/mindsdb-apis.mdx rename to docs 2/setup/mindsdb-apis.mdx diff --git a/docs/setup/my_config.json b/docs 2/setup/my_config.json similarity index 100% rename from docs/setup/my_config.json rename to docs 2/setup/my_config.json diff --git a/docs/setup/open_telemetry.mdx b/docs 2/setup/open_telemetry.mdx similarity index 100% rename from docs/setup/open_telemetry.mdx rename to docs 2/setup/open_telemetry.mdx diff --git a/docs/setup/self-hosted/docker-desktop.mdx b/docs 2/setup/self-hosted/docker-desktop.mdx similarity index 100% rename from docs/setup/self-hosted/docker-desktop.mdx rename to docs 2/setup/self-hosted/docker-desktop.mdx diff --git a/docs/setup/self-hosted/docker.mdx b/docs 2/setup/self-hosted/docker.mdx similarity index 100% rename from docs/setup/self-hosted/docker.mdx rename to docs 2/setup/self-hosted/docker.mdx diff --git a/docs/setup/self-hosted/pip/linux.mdx b/docs 2/setup/self-hosted/pip/linux.mdx similarity index 100% rename from docs/setup/self-hosted/pip/linux.mdx rename to docs 2/setup/self-hosted/pip/linux.mdx diff --git a/docs/setup/self-hosted/pip/macos.mdx b/docs 2/setup/self-hosted/pip/macos.mdx similarity index 100% rename from docs/setup/self-hosted/pip/macos.mdx rename to docs 2/setup/self-hosted/pip/macos.mdx diff --git a/docs/setup/self-hosted/pip/source.mdx b/docs 2/setup/self-hosted/pip/source.mdx similarity index 100% rename from docs/setup/self-hosted/pip/source.mdx rename to docs 2/setup/self-hosted/pip/source.mdx diff --git a/docs/setup/self-hosted/pip/windows.mdx b/docs 2/setup/self-hosted/pip/windows.mdx similarity index 100% rename from docs/setup/self-hosted/pip/windows.mdx rename to docs 2/setup/self-hosted/pip/windows.mdx diff --git a/docs/setup/system-defaults.mdx b/docs 2/setup/system-defaults.mdx similarity index 100% rename from docs/setup/system-defaults.mdx rename to docs 2/setup/system-defaults.mdx diff --git a/docs/sitemaps/contribute.mdx b/docs 2/sitemaps/contribute.mdx similarity index 100% rename from docs/sitemaps/contribute.mdx rename to docs 2/sitemaps/contribute.mdx diff --git a/docs/sitemaps/faq.mdx b/docs 2/sitemaps/faq.mdx similarity index 100% rename from docs/sitemaps/faq.mdx rename to docs 2/sitemaps/faq.mdx diff --git a/docs/sitemaps/get_started_documentation.mdx b/docs 2/sitemaps/get_started_documentation.mdx similarity index 100% rename from docs/sitemaps/get_started_documentation.mdx rename to docs 2/sitemaps/get_started_documentation.mdx diff --git a/docs/sitemaps/integrations.mdx b/docs 2/sitemaps/integrations.mdx similarity index 100% rename from docs/sitemaps/integrations.mdx rename to docs 2/sitemaps/integrations.mdx diff --git a/docs/sitemaps/rest_api.mdx b/docs 2/sitemaps/rest_api.mdx similarity index 100% rename from docs/sitemaps/rest_api.mdx rename to docs 2/sitemaps/rest_api.mdx diff --git a/docs/sitemaps/sdks.mdx b/docs 2/sitemaps/sdks.mdx similarity index 100% rename from docs/sitemaps/sdks.mdx rename to docs 2/sitemaps/sdks.mdx diff --git a/docs/sitemaps/sql_api.mdx b/docs 2/sitemaps/sql_api.mdx similarity index 100% rename from docs/sitemaps/sql_api.mdx rename to docs 2/sitemaps/sql_api.mdx diff --git a/docs/sitemaps/use_cases.mdx b/docs 2/sitemaps/use_cases.mdx similarity index 97% rename from docs/sitemaps/use_cases.mdx rename to docs 2/sitemaps/use_cases.mdx index c1c0baa3637..771cca4429f 100644 --- a/docs/sitemaps/use_cases.mdx +++ b/docs 2/sitemaps/use_cases.mdx @@ -37,7 +37,6 @@ https://docs.mindsdb.com/use-cases/data_enrichment/hugging-face-inference-api-ex Predictive Analytics: https://docs.mindsdb.com/use-cases/predictive_analytics/overview https://docs.mindsdb.com/use-cases/predictive_analytics/house-sales-forecasting -https://docs.mindsdb.com/use-cases/predictive_analytics/expenditures-statsforecast https://docs.mindsdb.com/use-cases/predictive_analytics/eeg-forecasting In-Database Machine Learning: diff --git a/docs/sql/data-insights.mdx b/docs 2/sql/data-insights.mdx similarity index 100% rename from docs/sql/data-insights.mdx rename to docs 2/sql/data-insights.mdx diff --git a/docs/sql/feature-eng.mdx b/docs 2/sql/feature-eng.mdx similarity index 100% rename from docs/sql/feature-eng.mdx rename to docs 2/sql/feature-eng.mdx diff --git a/docs/sql/project.mdx b/docs 2/sql/project.mdx similarity index 100% rename from docs/sql/project.mdx rename to docs 2/sql/project.mdx diff --git a/docs/sql/table-structure.mdx b/docs 2/sql/table-structure.mdx similarity index 100% rename from docs/sql/table-structure.mdx rename to docs 2/sql/table-structure.mdx diff --git a/docs/tutorials/images/sentiment-analysis-on-french-tweets/batch.png b/docs 2/tutorials/images/sentiment-analysis-on-french-tweets/batch.png similarity index 100% rename from docs/tutorials/images/sentiment-analysis-on-french-tweets/batch.png rename to docs 2/tutorials/images/sentiment-analysis-on-french-tweets/batch.png diff --git a/docs/tutorials/images/sentiment-analysis-on-french-tweets/createmysqldb.png b/docs 2/tutorials/images/sentiment-analysis-on-french-tweets/createmysqldb.png similarity index 100% rename from docs/tutorials/images/sentiment-analysis-on-french-tweets/createmysqldb.png rename to docs 2/tutorials/images/sentiment-analysis-on-french-tweets/createmysqldb.png diff --git a/docs/tutorials/images/sentiment-analysis-on-french-tweets/query.png b/docs 2/tutorials/images/sentiment-analysis-on-french-tweets/query.png similarity index 100% rename from docs/tutorials/images/sentiment-analysis-on-french-tweets/query.png rename to docs 2/tutorials/images/sentiment-analysis-on-french-tweets/query.png diff --git a/docs/tutorials/images/sentiment-analysis-on-french-tweets/querythemodel.png b/docs 2/tutorials/images/sentiment-analysis-on-french-tweets/querythemodel.png similarity index 100% rename from docs/tutorials/images/sentiment-analysis-on-french-tweets/querythemodel.png rename to docs 2/tutorials/images/sentiment-analysis-on-french-tweets/querythemodel.png diff --git a/docs/tutorials/images/sentiment-analysis-on-french-tweets/railwaydetails.png b/docs 2/tutorials/images/sentiment-analysis-on-french-tweets/railwaydetails.png similarity index 100% rename from docs/tutorials/images/sentiment-analysis-on-french-tweets/railwaydetails.png rename to docs 2/tutorials/images/sentiment-analysis-on-french-tweets/railwaydetails.png diff --git a/docs/tutorials/sentiment-analysis-intercom-data-airbyte.mdx b/docs 2/tutorials/sentiment-analysis-intercom-data-airbyte.mdx similarity index 100% rename from docs/tutorials/sentiment-analysis-intercom-data-airbyte.mdx rename to docs 2/tutorials/sentiment-analysis-intercom-data-airbyte.mdx diff --git a/docs/use-cases/ai-powered_data_retrieval/overview.mdx b/docs 2/use-cases/ai-powered_data_retrieval/overview.mdx similarity index 100% rename from docs/use-cases/ai-powered_data_retrieval/overview.mdx rename to docs 2/use-cases/ai-powered_data_retrieval/overview.mdx diff --git a/docs/use-cases/ai-powered_data_retrieval/recommenders.mdx b/docs 2/use-cases/ai-powered_data_retrieval/recommenders.mdx similarity index 100% rename from docs/use-cases/ai-powered_data_retrieval/recommenders.mdx rename to docs 2/use-cases/ai-powered_data_retrieval/recommenders.mdx diff --git a/docs/use-cases/ai_agents/build_ai_agents.mdx b/docs 2/use-cases/ai_agents/build_ai_agents.mdx similarity index 100% rename from docs/use-cases/ai_agents/build_ai_agents.mdx rename to docs 2/use-cases/ai_agents/build_ai_agents.mdx diff --git a/docs/use-cases/ai_agents/chatbots_agents.mdx b/docs 2/use-cases/ai_agents/chatbots_agents.mdx similarity index 100% rename from docs/use-cases/ai_agents/chatbots_agents.mdx rename to docs 2/use-cases/ai_agents/chatbots_agents.mdx diff --git a/docs/use-cases/ai_agents/create-chatbot-kb.mdx b/docs 2/use-cases/ai_agents/create-chatbot-kb.mdx similarity index 100% rename from docs/use-cases/ai_agents/create-chatbot-kb.mdx rename to docs 2/use-cases/ai_agents/create-chatbot-kb.mdx diff --git a/docs/use-cases/ai_agents/create-chatbot.mdx b/docs 2/use-cases/ai_agents/create-chatbot.mdx similarity index 100% rename from docs/use-cases/ai_agents/create-chatbot.mdx rename to docs 2/use-cases/ai_agents/create-chatbot.mdx diff --git a/docs/use-cases/ai_agents/llm-chatbot-ui.mdx b/docs 2/use-cases/ai_agents/llm-chatbot-ui.mdx similarity index 100% rename from docs/use-cases/ai_agents/llm-chatbot-ui.mdx rename to docs 2/use-cases/ai_agents/llm-chatbot-ui.mdx diff --git a/docs/use-cases/ai_agents/overview.mdx b/docs 2/use-cases/ai_agents/overview.mdx similarity index 100% rename from docs/use-cases/ai_agents/overview.mdx rename to docs 2/use-cases/ai_agents/overview.mdx diff --git a/docs/use-cases/ai_workflow_automation/ai_workflow.mdx b/docs 2/use-cases/ai_workflow_automation/ai_workflow.mdx similarity index 100% rename from docs/use-cases/ai_workflow_automation/ai_workflow.mdx rename to docs 2/use-cases/ai_workflow_automation/ai_workflow.mdx diff --git a/docs/use-cases/ai_workflow_automation/customer-reviews-notifications.mdx b/docs 2/use-cases/ai_workflow_automation/customer-reviews-notifications.mdx similarity index 100% rename from docs/use-cases/ai_workflow_automation/customer-reviews-notifications.mdx rename to docs 2/use-cases/ai_workflow_automation/customer-reviews-notifications.mdx diff --git a/docs/use-cases/ai_workflow_automation/overview.mdx b/docs 2/use-cases/ai_workflow_automation/overview.mdx similarity index 100% rename from docs/use-cases/ai_workflow_automation/overview.mdx rename to docs 2/use-cases/ai_workflow_automation/overview.mdx diff --git a/docs/use-cases/ai_workflow_automation/slack-chatbot.mdx b/docs 2/use-cases/ai_workflow_automation/slack-chatbot.mdx similarity index 100% rename from docs/use-cases/ai_workflow_automation/slack-chatbot.mdx rename to docs 2/use-cases/ai_workflow_automation/slack-chatbot.mdx diff --git a/docs/use-cases/ai_workflow_automation/twilio-chatbot.mdx b/docs 2/use-cases/ai_workflow_automation/twilio-chatbot.mdx similarity index 100% rename from docs/use-cases/ai_workflow_automation/twilio-chatbot.mdx rename to docs 2/use-cases/ai_workflow_automation/twilio-chatbot.mdx diff --git a/docs/use-cases/ai_workflow_automation/twitter-chatbot.mdx b/docs 2/use-cases/ai_workflow_automation/twitter-chatbot.mdx similarity index 100% rename from docs/use-cases/ai_workflow_automation/twitter-chatbot.mdx rename to docs 2/use-cases/ai_workflow_automation/twitter-chatbot.mdx diff --git a/docs/use-cases/ai_workflow_automation/twitterbot-mariadb-enterprise-server-skysql.mdx b/docs 2/use-cases/ai_workflow_automation/twitterbot-mariadb-enterprise-server-skysql.mdx similarity index 100% rename from docs/use-cases/ai_workflow_automation/twitterbot-mariadb-enterprise-server-skysql.mdx rename to docs 2/use-cases/ai_workflow_automation/twitterbot-mariadb-enterprise-server-skysql.mdx diff --git a/docs/use-cases/automated_finetuning/data.csv b/docs 2/use-cases/automated_finetuning/data.csv similarity index 100% rename from docs/use-cases/automated_finetuning/data.csv rename to docs 2/use-cases/automated_finetuning/data.csv diff --git a/docs/use-cases/automated_finetuning/openai.mdx b/docs 2/use-cases/automated_finetuning/openai.mdx similarity index 100% rename from docs/use-cases/automated_finetuning/openai.mdx rename to docs 2/use-cases/automated_finetuning/openai.mdx diff --git a/docs/use-cases/automated_finetuning/overview.mdx b/docs 2/use-cases/automated_finetuning/overview.mdx similarity index 100% rename from docs/use-cases/automated_finetuning/overview.mdx rename to docs 2/use-cases/automated_finetuning/overview.mdx diff --git a/docs/use-cases/data_enrichment/hugging-face-examples.mdx b/docs 2/use-cases/data_enrichment/hugging-face-examples.mdx similarity index 100% rename from docs/use-cases/data_enrichment/hugging-face-examples.mdx rename to docs 2/use-cases/data_enrichment/hugging-face-examples.mdx diff --git a/docs/use-cases/data_enrichment/hugging-face-inference-api-examples.mdx b/docs 2/use-cases/data_enrichment/hugging-face-inference-api-examples.mdx similarity index 100% rename from docs/use-cases/data_enrichment/hugging-face-inference-api-examples.mdx rename to docs 2/use-cases/data_enrichment/hugging-face-inference-api-examples.mdx diff --git a/docs/use-cases/data_enrichment/image-generator.mdx b/docs 2/use-cases/data_enrichment/image-generator.mdx similarity index 100% rename from docs/use-cases/data_enrichment/image-generator.mdx rename to docs 2/use-cases/data_enrichment/image-generator.mdx diff --git a/docs/use-cases/data_enrichment/json-from-text.mdx b/docs 2/use-cases/data_enrichment/json-from-text.mdx similarity index 100% rename from docs/use-cases/data_enrichment/json-from-text.mdx rename to docs 2/use-cases/data_enrichment/json-from-text.mdx diff --git a/docs/use-cases/data_enrichment/overview.mdx b/docs 2/use-cases/data_enrichment/overview.mdx similarity index 100% rename from docs/use-cases/data_enrichment/overview.mdx rename to docs 2/use-cases/data_enrichment/overview.mdx diff --git a/docs/use-cases/data_enrichment/question-answering-inside-mongodb-with-openai.mdx b/docs 2/use-cases/data_enrichment/question-answering-inside-mongodb-with-openai.mdx similarity index 100% rename from docs/use-cases/data_enrichment/question-answering-inside-mongodb-with-openai.mdx rename to docs 2/use-cases/data_enrichment/question-answering-inside-mongodb-with-openai.mdx diff --git a/docs/use-cases/data_enrichment/question-answering-inside-mysql-with-openai.mdx b/docs 2/use-cases/data_enrichment/question-answering-inside-mysql-with-openai.mdx similarity index 100% rename from docs/use-cases/data_enrichment/question-answering-inside-mysql-with-openai.mdx rename to docs 2/use-cases/data_enrichment/question-answering-inside-mysql-with-openai.mdx diff --git a/docs/use-cases/data_enrichment/sentiment-analysis-inside-mongodb-with-openai.mdx b/docs 2/use-cases/data_enrichment/sentiment-analysis-inside-mongodb-with-openai.mdx similarity index 100% rename from docs/use-cases/data_enrichment/sentiment-analysis-inside-mongodb-with-openai.mdx rename to docs 2/use-cases/data_enrichment/sentiment-analysis-inside-mongodb-with-openai.mdx diff --git a/docs/use-cases/data_enrichment/sentiment-analysis-inside-mysql-with-openai.mdx b/docs 2/use-cases/data_enrichment/sentiment-analysis-inside-mysql-with-openai.mdx similarity index 100% rename from docs/use-cases/data_enrichment/sentiment-analysis-inside-mysql-with-openai.mdx rename to docs 2/use-cases/data_enrichment/sentiment-analysis-inside-mysql-with-openai.mdx diff --git a/docs/use-cases/data_enrichment/text-sentiment-hf.mdx b/docs 2/use-cases/data_enrichment/text-sentiment-hf.mdx similarity index 100% rename from docs/use-cases/data_enrichment/text-sentiment-hf.mdx rename to docs 2/use-cases/data_enrichment/text-sentiment-hf.mdx diff --git a/docs/use-cases/data_enrichment/text-summarization-inside-mongodb-with-openai.mdx b/docs 2/use-cases/data_enrichment/text-summarization-inside-mongodb-with-openai.mdx similarity index 100% rename from docs/use-cases/data_enrichment/text-summarization-inside-mongodb-with-openai.mdx rename to docs 2/use-cases/data_enrichment/text-summarization-inside-mongodb-with-openai.mdx diff --git a/docs/use-cases/data_enrichment/text-summarization-inside-mysql-with-openai.mdx b/docs 2/use-cases/data_enrichment/text-summarization-inside-mysql-with-openai.mdx similarity index 100% rename from docs/use-cases/data_enrichment/text-summarization-inside-mysql-with-openai.mdx rename to docs 2/use-cases/data_enrichment/text-summarization-inside-mysql-with-openai.mdx diff --git a/docs/use-cases/in-database_ml/ai-tables.mdx b/docs 2/use-cases/in-database_ml/ai-tables.mdx similarity index 100% rename from docs/use-cases/in-database_ml/ai-tables.mdx rename to docs 2/use-cases/in-database_ml/ai-tables.mdx diff --git a/docs/use-cases/in-database_ml/byom.mdx b/docs 2/use-cases/in-database_ml/byom.mdx similarity index 100% rename from docs/use-cases/in-database_ml/byom.mdx rename to docs 2/use-cases/in-database_ml/byom.mdx diff --git a/docs/use-cases/in-database_ml/overview.mdx b/docs 2/use-cases/in-database_ml/overview.mdx similarity index 100% rename from docs/use-cases/in-database_ml/overview.mdx rename to docs 2/use-cases/in-database_ml/overview.mdx diff --git a/docs/use-cases/mcp_api/cursor.mdx b/docs 2/use-cases/mcp_api/cursor.mdx similarity index 100% rename from docs/use-cases/mcp_api/cursor.mdx rename to docs 2/use-cases/mcp_api/cursor.mdx diff --git a/docs/use-cases/mcp_api/mindsdb_mcp_server.mdx b/docs 2/use-cases/mcp_api/mindsdb_mcp_server.mdx similarity index 100% rename from docs/use-cases/mcp_api/mindsdb_mcp_server.mdx rename to docs 2/use-cases/mcp_api/mindsdb_mcp_server.mdx diff --git a/docs/use-cases/mcp_api/overview.mdx b/docs 2/use-cases/mcp_api/overview.mdx similarity index 100% rename from docs/use-cases/mcp_api/overview.mdx rename to docs 2/use-cases/mcp_api/overview.mdx diff --git a/docs/use-cases/predictive_analytics/overview.mdx b/docs 2/use-cases/predictive_analytics/overview.mdx similarity index 86% rename from docs/use-cases/predictive_analytics/overview.mdx rename to docs 2/use-cases/predictive_analytics/overview.mdx index 91c0b0c9b43..7e210a07769 100644 --- a/docs/use-cases/predictive_analytics/overview.mdx +++ b/docs 2/use-cases/predictive_analytics/overview.mdx @@ -21,7 +21,6 @@ Available tutorials: - diff --git a/docs/assets/BearHeroImageMindsDB.jpeg b/docs/assets/BearHeroImageMindsDB.jpeg deleted file mode 100644 index 0e017c94693..00000000000 Binary files a/docs/assets/BearHeroImageMindsDB.jpeg and /dev/null differ diff --git a/docs/assets/MindsDBLightwood@3x.png b/docs/assets/MindsDBLightwood@3x.png deleted file mode 100644 index 74c7d8ca2ec..00000000000 Binary files a/docs/assets/MindsDBLightwood@3x.png and /dev/null differ diff --git a/docs/assets/TWbot - hero Snoopstein.png b/docs/assets/TWbot - hero Snoopstein.png deleted file mode 100644 index 476c9101759..00000000000 Binary files a/docs/assets/TWbot - hero Snoopstein.png and /dev/null differ diff --git a/docs/assets/TWbot-response-image.png b/docs/assets/TWbot-response-image.png deleted file mode 100644 index 179df1ad4ce..00000000000 Binary files a/docs/assets/TWbot-response-image.png and /dev/null differ diff --git a/docs/assets/TWbot-response1.png b/docs/assets/TWbot-response1.png deleted file mode 100644 index 8322450a5c5..00000000000 Binary files a/docs/assets/TWbot-response1.png and /dev/null differ diff --git a/docs/assets/TWbot-response2.png b/docs/assets/TWbot-response2.png deleted file mode 100644 index ec2ed3f9d1b..00000000000 Binary files a/docs/assets/TWbot-response2.png and /dev/null differ diff --git a/docs/assets/TWbot-response3.png b/docs/assets/TWbot-response3.png deleted file mode 100644 index 9e988bb273b..00000000000 Binary files a/docs/assets/TWbot-response3.png and /dev/null differ diff --git a/docs/assets/TWbot-response4.png b/docs/assets/TWbot-response4.png deleted file mode 100644 index 21effa5798a..00000000000 Binary files a/docs/assets/TWbot-response4.png and /dev/null differ diff --git a/docs/assets/TWbot-response5.png b/docs/assets/TWbot-response5.png deleted file mode 100644 index 6a868c65550..00000000000 Binary files a/docs/assets/TWbot-response5.png and /dev/null differ diff --git a/docs/assets/a2a-unavailable.png b/docs/assets/a2a-unavailable.png deleted file mode 100644 index 20ede064b14..00000000000 Binary files a/docs/assets/a2a-unavailable.png and /dev/null differ diff --git a/docs/assets/agent_diagram.png b/docs/assets/agent_diagram.png deleted file mode 100644 index 56e7689bada..00000000000 Binary files a/docs/assets/agent_diagram.png and /dev/null differ diff --git a/docs/assets/ai_system_deployment.png b/docs/assets/ai_system_deployment.png deleted file mode 100644 index 720384986c1..00000000000 Binary files a/docs/assets/ai_system_deployment.png and /dev/null differ diff --git a/docs/assets/ai_workflow_automation.png b/docs/assets/ai_workflow_automation.png deleted file mode 100644 index 72184ecc384..00000000000 Binary files a/docs/assets/ai_workflow_automation.png and /dev/null differ diff --git a/docs/assets/cloud-login.png b/docs/assets/cloud-login.png deleted file mode 100644 index 32e39f0437e..00000000000 Binary files a/docs/assets/cloud-login.png and /dev/null differ diff --git a/docs/assets/cloud-signup.png b/docs/assets/cloud-signup.png deleted file mode 100644 index 50673620fa0..00000000000 Binary files a/docs/assets/cloud-signup.png and /dev/null differ diff --git a/docs/assets/cloud/cloud-signup-filledout.png b/docs/assets/cloud/cloud-signup-filledout.png deleted file mode 100644 index 91175fb2146..00000000000 Binary files a/docs/assets/cloud/cloud-signup-filledout.png and /dev/null differ diff --git a/docs/assets/cloud/dedicated_instance_off.png b/docs/assets/cloud/dedicated_instance_off.png deleted file mode 100644 index 25930e0dedc..00000000000 Binary files a/docs/assets/cloud/dedicated_instance_off.png and /dev/null differ diff --git a/docs/assets/cloud/dedicated_instance_on.png b/docs/assets/cloud/dedicated_instance_on.png deleted file mode 100644 index 8c3d7dc6751..00000000000 Binary files a/docs/assets/cloud/dedicated_instance_on.png and /dev/null differ diff --git a/docs/assets/cloud/email.png b/docs/assets/cloud/email.png deleted file mode 100644 index 781bb2e7b1b..00000000000 Binary files a/docs/assets/cloud/email.png and /dev/null differ diff --git a/docs/assets/cloud/gui.png b/docs/assets/cloud/gui.png deleted file mode 100644 index ba3c270db21..00000000000 Binary files a/docs/assets/cloud/gui.png and /dev/null differ diff --git a/docs/assets/cloud/import_file.png b/docs/assets/cloud/import_file.png deleted file mode 100644 index 24f0332896e..00000000000 Binary files a/docs/assets/cloud/import_file.png and /dev/null differ diff --git a/docs/assets/cloud/import_file_2.png b/docs/assets/cloud/import_file_2.png deleted file mode 100644 index 2219fb87387..00000000000 Binary files a/docs/assets/cloud/import_file_2.png and /dev/null differ diff --git a/docs/assets/cloud/login.png b/docs/assets/cloud/login.png deleted file mode 100644 index bdd8c788657..00000000000 Binary files a/docs/assets/cloud/login.png and /dev/null differ diff --git a/docs/assets/cloud/plan_table.png b/docs/assets/cloud/plan_table.png deleted file mode 100644 index a5b2db3206c..00000000000 Binary files a/docs/assets/cloud/plan_table.png and /dev/null differ diff --git a/docs/assets/connect_compass_cloud.png b/docs/assets/connect_compass_cloud.png deleted file mode 100644 index 7cd7304ac6f..00000000000 Binary files a/docs/assets/connect_compass_cloud.png and /dev/null differ diff --git a/docs/assets/connect_compass_srv.png b/docs/assets/connect_compass_srv.png deleted file mode 100644 index f1107f229b6..00000000000 Binary files a/docs/assets/connect_compass_srv.png and /dev/null differ diff --git a/docs/assets/connect_compassm.png b/docs/assets/connect_compassm.png deleted file mode 100644 index cb411e0bdde..00000000000 Binary files a/docs/assets/connect_compassm.png and /dev/null differ diff --git a/docs/assets/connect_mongo_compass.png b/docs/assets/connect_mongo_compass.png deleted file mode 100644 index 4871a35bd44..00000000000 Binary files a/docs/assets/connect_mongo_compass.png and /dev/null differ diff --git a/docs/assets/connect_mongo_compass_1.png b/docs/assets/connect_mongo_compass_1.png deleted file mode 100644 index 4d0bdfd61d0..00000000000 Binary files a/docs/assets/connect_mongo_compass_1.png and /dev/null differ diff --git a/docs/assets/connect_mongo_compass_2.png b/docs/assets/connect_mongo_compass_2.png deleted file mode 100644 index 3168594878f..00000000000 Binary files a/docs/assets/connect_mongo_compass_2.png and /dev/null differ diff --git a/docs/assets/connect_mongo_compass_3.png b/docs/assets/connect_mongo_compass_3.png deleted file mode 100644 index c2dbd33da42..00000000000 Binary files a/docs/assets/connect_mongo_compass_3.png and /dev/null differ diff --git a/docs/assets/connect_mongo_shell.png b/docs/assets/connect_mongo_shell.png deleted file mode 100644 index 3a75a60332e..00000000000 Binary files a/docs/assets/connect_mongo_shell.png and /dev/null differ diff --git a/docs/assets/connect_mongo_shell_1.png b/docs/assets/connect_mongo_shell_1.png deleted file mode 100644 index c954afc41eb..00000000000 Binary files a/docs/assets/connect_mongo_shell_1.png and /dev/null differ diff --git a/docs/assets/connect_mongo_shell_2.png b/docs/assets/connect_mongo_shell_2.png deleted file mode 100644 index 76908fa0a80..00000000000 Binary files a/docs/assets/connect_mongo_shell_2.png and /dev/null differ diff --git a/docs/assets/contribute.png b/docs/assets/contribute.png deleted file mode 100644 index adbceacb572..00000000000 Binary files a/docs/assets/contribute.png and /dev/null differ diff --git a/docs/assets/data/mssql-select.gif b/docs/assets/data/mssql-select.gif deleted file mode 100644 index 759755c4983..00000000000 Binary files a/docs/assets/data/mssql-select.gif and /dev/null differ diff --git a/docs/assets/databases/mdb-mysql.png b/docs/assets/databases/mdb-mysql.png deleted file mode 100644 index 640138effcf..00000000000 Binary files a/docs/assets/databases/mdb-mysql.png and /dev/null differ diff --git a/docs/assets/databases/mdb-postgres.png b/docs/assets/databases/mdb-postgres.png deleted file mode 100644 index 7bf055cb840..00000000000 Binary files a/docs/assets/databases/mdb-postgres.png and /dev/null differ diff --git a/docs/assets/databases/mongodb/mongo-mdb-code.png b/docs/assets/databases/mongodb/mongo-mdb-code.png deleted file mode 100644 index a3ec12afc94..00000000000 Binary files a/docs/assets/databases/mongodb/mongo-mdb-code.png and /dev/null differ diff --git a/docs/assets/databases/mongodb/mongo-mdb-current.png b/docs/assets/databases/mongodb/mongo-mdb-current.png deleted file mode 100644 index 6910119bb0b..00000000000 Binary files a/docs/assets/databases/mongodb/mongo-mdb-current.png and /dev/null differ diff --git a/docs/assets/databases/mongodb/mongo-mdb.png b/docs/assets/databases/mongodb/mongo-mdb.png deleted file mode 100644 index 4d3f6043800..00000000000 Binary files a/docs/assets/databases/mongodb/mongo-mdb.png and /dev/null differ diff --git a/docs/assets/dbeaver-check-predictor-status.png b/docs/assets/dbeaver-check-predictor-status.png deleted file mode 100644 index 6904f13dd14..00000000000 Binary files a/docs/assets/dbeaver-check-predictor-status.png and /dev/null differ diff --git a/docs/assets/dbeaver-configure-cloud-connection.png b/docs/assets/dbeaver-configure-cloud-connection.png deleted file mode 100644 index 51ad1f2c8b6..00000000000 Binary files a/docs/assets/dbeaver-configure-cloud-connection.png and /dev/null differ diff --git a/docs/assets/dbeaver-configure-docker-connection.png b/docs/assets/dbeaver-configure-docker-connection.png deleted file mode 100644 index 434da45f9d0..00000000000 Binary files a/docs/assets/dbeaver-configure-docker-connection.png and /dev/null differ diff --git a/docs/assets/dbeaver-create-connection.png b/docs/assets/dbeaver-create-connection.png deleted file mode 100644 index 6d22deca2c8..00000000000 Binary files a/docs/assets/dbeaver-create-connection.png and /dev/null differ diff --git a/docs/assets/dbeaver-create-database.png b/docs/assets/dbeaver-create-database.png deleted file mode 100644 index a742e832bad..00000000000 Binary files a/docs/assets/dbeaver-create-database.png and /dev/null differ diff --git a/docs/assets/dbeaver-create-predictor-simple.png b/docs/assets/dbeaver-create-predictor-simple.png deleted file mode 100644 index b17cdad392b..00000000000 Binary files a/docs/assets/dbeaver-create-predictor-simple.png and /dev/null differ diff --git a/docs/assets/dbeaver-create-script.png b/docs/assets/dbeaver-create-script.png deleted file mode 100644 index ea4f0447bcf..00000000000 Binary files a/docs/assets/dbeaver-create-script.png and /dev/null differ diff --git a/docs/assets/dbeaver-empty-script.png b/docs/assets/dbeaver-empty-script.png deleted file mode 100644 index bb984b71c1c..00000000000 Binary files a/docs/assets/dbeaver-empty-script.png and /dev/null differ diff --git a/docs/assets/dbeaver-home-rentals-prediction-results.png b/docs/assets/dbeaver-home-rentals-prediction-results.png deleted file mode 100644 index 5c0032063df..00000000000 Binary files a/docs/assets/dbeaver-home-rentals-prediction-results.png and /dev/null differ diff --git a/docs/assets/dbeaver-home-rentals-prediction.png b/docs/assets/dbeaver-home-rentals-prediction.png deleted file mode 100644 index 0f7215e9723..00000000000 Binary files a/docs/assets/dbeaver-home-rentals-prediction.png and /dev/null differ diff --git a/docs/assets/dbeaver-predict-home-rentals.png b/docs/assets/dbeaver-predict-home-rentals.png deleted file mode 100644 index d4d35eea7b5..00000000000 Binary files a/docs/assets/dbeaver-predict-home-rentals.png and /dev/null differ diff --git a/docs/assets/dbeaver-preview-data.png b/docs/assets/dbeaver-preview-data.png deleted file mode 100644 index 31794e39954..00000000000 Binary files a/docs/assets/dbeaver-preview-data.png and /dev/null differ diff --git a/docs/assets/getting-started.png b/docs/assets/getting-started.png deleted file mode 100644 index 168ff02e040..00000000000 Binary files a/docs/assets/getting-started.png and /dev/null differ diff --git a/docs/assets/icons/Cloud.svg b/docs/assets/icons/Cloud.svg deleted file mode 100644 index 082345353ab..00000000000 --- a/docs/assets/icons/Cloud.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - Cloud - - - - - - - \ No newline at end of file diff --git a/docs/assets/icons/Database.svg b/docs/assets/icons/Database.svg deleted file mode 100644 index 25326bf3c90..00000000000 --- a/docs/assets/icons/Database.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - Database - - - - - - - \ No newline at end of file diff --git a/docs/assets/icons/Explainable.svg b/docs/assets/icons/Explainable.svg deleted file mode 100644 index 96514672a8f..00000000000 --- a/docs/assets/icons/Explainable.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - Explainable - - - - - - - \ No newline at end of file diff --git a/docs/assets/icons/GUI.svg b/docs/assets/icons/GUI.svg deleted file mode 100644 index ea99f55e989..00000000000 --- a/docs/assets/icons/GUI.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - GUI - - - - - - - \ No newline at end of file diff --git a/docs/assets/icons/Python.svg b/docs/assets/icons/Python.svg deleted file mode 100644 index 6ac826a7654..00000000000 --- a/docs/assets/icons/Python.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - Python - - - - - - - \ No newline at end of file diff --git a/docs/assets/icons/Server.svg b/docs/assets/icons/Server.svg deleted file mode 100644 index 94f40f4adbd..00000000000 --- a/docs/assets/icons/Server.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - Server - - - - - - - \ No newline at end of file diff --git a/docs/assets/icons/sdk.svg b/docs/assets/icons/sdk.svg deleted file mode 100644 index 25cdebf4f30..00000000000 --- a/docs/assets/icons/sdk.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - Server Copy - - - - - - - \ No newline at end of file diff --git a/docs/assets/info/query.png b/docs/assets/info/query.png deleted file mode 100644 index 48301f93d97..00000000000 Binary files a/docs/assets/info/query.png and /dev/null differ diff --git a/docs/assets/info/select.png b/docs/assets/info/select.png deleted file mode 100644 index 731cf3e3257..00000000000 Binary files a/docs/assets/info/select.png and /dev/null differ diff --git a/docs/assets/install-dependencies-gui-x.png b/docs/assets/install-dependencies-gui-x.png deleted file mode 100644 index a4afc61191c..00000000000 Binary files a/docs/assets/install-dependencies-gui-x.png and /dev/null differ diff --git a/docs/assets/integration-image.png b/docs/assets/integration-image.png deleted file mode 100644 index 20a2feb7ff8..00000000000 Binary files a/docs/assets/integration-image.png and /dev/null differ diff --git a/docs/assets/lightwood.png b/docs/assets/lightwood.png deleted file mode 100644 index d55f5c7fe5e..00000000000 Binary files a/docs/assets/lightwood.png and /dev/null differ diff --git a/docs/assets/mdb_image.png b/docs/assets/mdb_image.png deleted file mode 100644 index 84a7720081f..00000000000 Binary files a/docs/assets/mdb_image.png and /dev/null differ diff --git a/docs/assets/mdb_logo.png b/docs/assets/mdb_logo.png deleted file mode 100755 index 7e1ee76aebd..00000000000 Binary files a/docs/assets/mdb_logo.png and /dev/null differ diff --git a/docs/assets/mdb_logo_name.png b/docs/assets/mdb_logo_name.png deleted file mode 100755 index 88374501af6..00000000000 Binary files a/docs/assets/mdb_logo_name.png and /dev/null differ diff --git a/docs/assets/mdb_logo_w.svg b/docs/assets/mdb_logo_w.svg deleted file mode 100644 index cd35d4eddab..00000000000 --- a/docs/assets/mdb_logo_w.svg +++ /dev/null @@ -1 +0,0 @@ -MindsDBPolarBear \ No newline at end of file diff --git a/docs/assets/mindsdb-local-editor.png b/docs/assets/mindsdb-local-editor.png deleted file mode 100644 index 0aa8d85e0f5..00000000000 Binary files a/docs/assets/mindsdb-local-editor.png and /dev/null differ diff --git a/docs/assets/mindsdb_gui_editor/learning_hub.png b/docs/assets/mindsdb_gui_editor/learning_hub.png deleted file mode 100644 index ecb700afafd..00000000000 Binary files a/docs/assets/mindsdb_gui_editor/learning_hub.png and /dev/null differ diff --git a/docs/assets/mindsdb_homepage_diagram.png b/docs/assets/mindsdb_homepage_diagram.png deleted file mode 100644 index c77fc5b623f..00000000000 Binary files a/docs/assets/mindsdb_homepage_diagram.png and /dev/null differ diff --git a/docs/assets/mindsdb_logo.png b/docs/assets/mindsdb_logo.png deleted file mode 100644 index 2dc5824d871..00000000000 Binary files a/docs/assets/mindsdb_logo.png and /dev/null differ diff --git a/docs/assets/predictors/clickhouse-insert.gif b/docs/assets/predictors/clickhouse-insert.gif deleted file mode 100644 index b1161582f69..00000000000 Binary files a/docs/assets/predictors/clickhouse-insert.gif and /dev/null differ diff --git a/docs/assets/predictors/clickhouse-query.gif b/docs/assets/predictors/clickhouse-query.gif deleted file mode 100644 index 76d231b4a2a..00000000000 Binary files a/docs/assets/predictors/clickhouse-query.gif and /dev/null differ diff --git a/docs/assets/predictors/column-importance.png b/docs/assets/predictors/column-importance.png deleted file mode 100644 index 565d0fde145..00000000000 Binary files a/docs/assets/predictors/column-importance.png and /dev/null differ diff --git a/docs/assets/predictors/mariadb-insert.gif b/docs/assets/predictors/mariadb-insert.gif deleted file mode 100644 index 795d61c6f61..00000000000 Binary files a/docs/assets/predictors/mariadb-insert.gif and /dev/null differ diff --git a/docs/assets/predictors/mariadb-status.gif b/docs/assets/predictors/mariadb-status.gif deleted file mode 100644 index 98be86a25c8..00000000000 Binary files a/docs/assets/predictors/mariadb-status.gif and /dev/null differ diff --git a/docs/assets/predictors/mongo/mongo-insert.gif b/docs/assets/predictors/mongo/mongo-insert.gif deleted file mode 100644 index 39d36ca5be3..00000000000 Binary files a/docs/assets/predictors/mongo/mongo-insert.gif and /dev/null differ diff --git a/docs/assets/predictors/mssql-status.gif b/docs/assets/predictors/mssql-status.gif deleted file mode 100644 index c7e617b7923..00000000000 Binary files a/docs/assets/predictors/mssql-status.gif and /dev/null differ diff --git a/docs/assets/predictors/mysql-insert.gif b/docs/assets/predictors/mysql-insert.gif deleted file mode 100644 index 2eeb0bd0f7e..00000000000 Binary files a/docs/assets/predictors/mysql-insert.gif and /dev/null differ diff --git a/docs/assets/predictors/mysql-query.gif b/docs/assets/predictors/mysql-query.gif deleted file mode 100644 index 611f2fd76b3..00000000000 Binary files a/docs/assets/predictors/mysql-query.gif and /dev/null differ diff --git a/docs/assets/predictors/mysql-status.gif b/docs/assets/predictors/mysql-status.gif deleted file mode 100644 index 96ee51fc6db..00000000000 Binary files a/docs/assets/predictors/mysql-status.gif and /dev/null differ diff --git a/docs/assets/predictors/postgresql-insert.gif b/docs/assets/predictors/postgresql-insert.gif deleted file mode 100644 index 08ed88afe93..00000000000 Binary files a/docs/assets/predictors/postgresql-insert.gif and /dev/null differ diff --git a/docs/assets/predictors/postgresql-status.gif b/docs/assets/predictors/postgresql-status.gif deleted file mode 100644 index 8fe657ac8f2..00000000000 Binary files a/docs/assets/predictors/postgresql-status.gif and /dev/null differ diff --git a/docs/assets/predictors/train-advanced.gif b/docs/assets/predictors/train-advanced.gif deleted file mode 100644 index 2bed2542a40..00000000000 Binary files a/docs/assets/predictors/train-advanced.gif and /dev/null differ diff --git a/docs/assets/predictors/train-timeseries.gif b/docs/assets/predictors/train-timeseries.gif deleted file mode 100644 index 891ccf8d944..00000000000 Binary files a/docs/assets/predictors/train-timeseries.gif and /dev/null differ diff --git a/docs/assets/report-issue.gif b/docs/assets/report-issue.gif deleted file mode 100644 index f2f770bfa5c..00000000000 Binary files a/docs/assets/report-issue.gif and /dev/null differ diff --git a/docs/assets/report_issues/1_issue_types.png b/docs/assets/report_issues/1_issue_types.png deleted file mode 100644 index 35f5b80f559..00000000000 Binary files a/docs/assets/report_issues/1_issue_types.png and /dev/null differ diff --git a/docs/assets/report_issues/1_reporting_new_issue.png b/docs/assets/report_issues/1_reporting_new_issue.png deleted file mode 100644 index 331a1ba3148..00000000000 Binary files a/docs/assets/report_issues/1_reporting_new_issue.png and /dev/null differ diff --git a/docs/assets/report_issues/2_bug_report.png b/docs/assets/report_issues/2_bug_report.png deleted file mode 100644 index 23b8ad0454c..00000000000 Binary files a/docs/assets/report_issues/2_bug_report.png and /dev/null differ diff --git a/docs/assets/report_issues/2_bug_report_form_1.png b/docs/assets/report_issues/2_bug_report_form_1.png deleted file mode 100644 index b5b9a21fa75..00000000000 Binary files a/docs/assets/report_issues/2_bug_report_form_1.png and /dev/null differ diff --git a/docs/assets/report_issues/2_bug_report_form_2.png b/docs/assets/report_issues/2_bug_report_form_2.png deleted file mode 100644 index ad9790049b2..00000000000 Binary files a/docs/assets/report_issues/2_bug_report_form_2.png and /dev/null differ diff --git a/docs/assets/report_issues/2_bug_report_form_3.png b/docs/assets/report_issues/2_bug_report_form_3.png deleted file mode 100644 index 866dab3ed89..00000000000 Binary files a/docs/assets/report_issues/2_bug_report_form_3.png and /dev/null differ diff --git a/docs/assets/report_issues/2_bug_report_form_4.png b/docs/assets/report_issues/2_bug_report_form_4.png deleted file mode 100644 index 090879b2724..00000000000 Binary files a/docs/assets/report_issues/2_bug_report_form_4.png and /dev/null differ diff --git a/docs/assets/report_issues/2_bug_report_form_5.png b/docs/assets/report_issues/2_bug_report_form_5.png deleted file mode 100644 index 9522dafaa46..00000000000 Binary files a/docs/assets/report_issues/2_bug_report_form_5.png and /dev/null differ diff --git a/docs/assets/report_issues/3_feature_request.png b/docs/assets/report_issues/3_feature_request.png deleted file mode 100644 index a40bf424f17..00000000000 Binary files a/docs/assets/report_issues/3_feature_request.png and /dev/null differ diff --git a/docs/assets/report_issues/3_feature_request_form_1.png b/docs/assets/report_issues/3_feature_request_form_1.png deleted file mode 100644 index 72b959d883c..00000000000 Binary files a/docs/assets/report_issues/3_feature_request_form_1.png and /dev/null differ diff --git a/docs/assets/report_issues/3_feature_request_form_2.png b/docs/assets/report_issues/3_feature_request_form_2.png deleted file mode 100644 index 7798e3113f7..00000000000 Binary files a/docs/assets/report_issues/3_feature_request_form_2.png and /dev/null differ diff --git a/docs/assets/report_issues/3_feature_request_form_3.png b/docs/assets/report_issues/3_feature_request_form_3.png deleted file mode 100644 index 0c328662c15..00000000000 Binary files a/docs/assets/report_issues/3_feature_request_form_3.png and /dev/null differ diff --git a/docs/assets/report_issues/3_feature_request_form_4.png b/docs/assets/report_issues/3_feature_request_form_4.png deleted file mode 100644 index dcf29dec23d..00000000000 Binary files a/docs/assets/report_issues/3_feature_request_form_4.png and /dev/null differ diff --git a/docs/assets/report_issues/4_improve_docs.png b/docs/assets/report_issues/4_improve_docs.png deleted file mode 100644 index 6d4c6bd2a4b..00000000000 Binary files a/docs/assets/report_issues/4_improve_docs.png and /dev/null differ diff --git a/docs/assets/report_issues/4_improve_docs_form_1.png b/docs/assets/report_issues/4_improve_docs_form_1.png deleted file mode 100644 index 34555ee09c4..00000000000 Binary files a/docs/assets/report_issues/4_improve_docs_form_1.png and /dev/null differ diff --git a/docs/assets/report_issues/4_improve_docs_form_2.png b/docs/assets/report_issues/4_improve_docs_form_2.png deleted file mode 100644 index 6c74fc78e26..00000000000 Binary files a/docs/assets/report_issues/4_improve_docs_form_2.png and /dev/null differ diff --git a/docs/assets/report_issues/4_improve_docs_form_3.png b/docs/assets/report_issues/4_improve_docs_form_3.png deleted file mode 100644 index 3ad7ad6bf45..00000000000 Binary files a/docs/assets/report_issues/4_improve_docs_form_3.png and /dev/null differ diff --git a/docs/assets/report_issues/5_new_integration.png b/docs/assets/report_issues/5_new_integration.png deleted file mode 100644 index 76d538124b4..00000000000 Binary files a/docs/assets/report_issues/5_new_integration.png and /dev/null differ diff --git a/docs/assets/report_issues/5_new_integration_form_1.png b/docs/assets/report_issues/5_new_integration_form_1.png deleted file mode 100644 index faff515679c..00000000000 Binary files a/docs/assets/report_issues/5_new_integration_form_1.png and /dev/null differ diff --git a/docs/assets/report_issues/5_new_integration_form_2.png b/docs/assets/report_issues/5_new_integration_form_2.png deleted file mode 100644 index 10b02645877..00000000000 Binary files a/docs/assets/report_issues/5_new_integration_form_2.png and /dev/null differ diff --git a/docs/assets/report_issues/5_new_integration_form_3.png b/docs/assets/report_issues/5_new_integration_form_3.png deleted file mode 100644 index 7ef021fb9cb..00000000000 Binary files a/docs/assets/report_issues/5_new_integration_form_3.png and /dev/null differ diff --git a/docs/assets/report_issues/5_new_integration_form_4.png b/docs/assets/report_issues/5_new_integration_form_4.png deleted file mode 100644 index 64afecdb6db..00000000000 Binary files a/docs/assets/report_issues/5_new_integration_form_4.png and /dev/null differ diff --git a/docs/assets/report_issues/5_new_integration_form_5.png b/docs/assets/report_issues/5_new_integration_form_5.png deleted file mode 100644 index e278788c71b..00000000000 Binary files a/docs/assets/report_issues/5_new_integration_form_5.png and /dev/null differ diff --git a/docs/assets/report_issues/5_new_integration_form_6.png b/docs/assets/report_issues/5_new_integration_form_6.png deleted file mode 100644 index a9bed7296e7..00000000000 Binary files a/docs/assets/report_issues/5_new_integration_form_6.png and /dev/null differ diff --git a/docs/assets/report_issues/6_security_vulnerability.png b/docs/assets/report_issues/6_security_vulnerability.png deleted file mode 100644 index 8b31fcba775..00000000000 Binary files a/docs/assets/report_issues/6_security_vulnerability.png and /dev/null differ diff --git a/docs/assets/report_issues/6_security_vulnerability_form_1.png b/docs/assets/report_issues/6_security_vulnerability_form_1.png deleted file mode 100644 index ffb45993174..00000000000 Binary files a/docs/assets/report_issues/6_security_vulnerability_form_1.png and /dev/null differ diff --git a/docs/assets/report_issues/6_security_vulnerability_form_2.png b/docs/assets/report_issues/6_security_vulnerability_form_2.png deleted file mode 100644 index e8305dd7a72..00000000000 Binary files a/docs/assets/report_issues/6_security_vulnerability_form_2.png and /dev/null differ diff --git a/docs/assets/report_issues/6_security_vulnerability_form_3.png b/docs/assets/report_issues/6_security_vulnerability_form_3.png deleted file mode 100644 index 1339a145e54..00000000000 Binary files a/docs/assets/report_issues/6_security_vulnerability_form_3.png and /dev/null differ diff --git a/docs/assets/report_issues/6_security_vulnerability_form_4.png b/docs/assets/report_issues/6_security_vulnerability_form_4.png deleted file mode 100644 index 45efe6f28be..00000000000 Binary files a/docs/assets/report_issues/6_security_vulnerability_form_4.png and /dev/null differ diff --git a/docs/assets/report_issues/6_security_vulnerability_form_5.png b/docs/assets/report_issues/6_security_vulnerability_form_5.png deleted file mode 100644 index c594a715d9c..00000000000 Binary files a/docs/assets/report_issues/6_security_vulnerability_form_5.png and /dev/null differ diff --git a/docs/assets/sentiment_analysis_diagram.png b/docs/assets/sentiment_analysis_diagram.png deleted file mode 100644 index d163a6843da..00000000000 Binary files a/docs/assets/sentiment_analysis_diagram.png and /dev/null differ diff --git a/docs/assets/sql/add-file-data.png b/docs/assets/sql/add-file-data.png deleted file mode 100644 index d5ff62b99c1..00000000000 Binary files a/docs/assets/sql/add-file-data.png and /dev/null differ diff --git a/docs/assets/sql/analytics_shift.png b/docs/assets/sql/analytics_shift.png deleted file mode 100644 index 8ee0d7526d2..00000000000 Binary files a/docs/assets/sql/analytics_shift.png and /dev/null differ diff --git a/docs/assets/sql/connectcloud.png b/docs/assets/sql/connectcloud.png deleted file mode 100644 index da88ef566ad..00000000000 Binary files a/docs/assets/sql/connectcloud.png and /dev/null differ diff --git a/docs/assets/sql/connectdb.png b/docs/assets/sql/connectdb.png deleted file mode 100644 index a76dd4cd10a..00000000000 Binary files a/docs/assets/sql/connectdb.png and /dev/null differ diff --git a/docs/assets/sql/datasource.gif b/docs/assets/sql/datasource.gif deleted file mode 100644 index fef984cadfa..00000000000 Binary files a/docs/assets/sql/datasource.gif and /dev/null differ diff --git a/docs/assets/sql/datasource_listing.png b/docs/assets/sql/datasource_listing.png deleted file mode 100644 index f54dbd61be6..00000000000 Binary files a/docs/assets/sql/datasource_listing.png and /dev/null differ diff --git a/docs/assets/sql/dbeaver-local.png b/docs/assets/sql/dbeaver-local.png deleted file mode 100644 index 1200e1586d7..00000000000 Binary files a/docs/assets/sql/dbeaver-local.png and /dev/null differ diff --git a/docs/assets/sql/dbeaver8.png b/docs/assets/sql/dbeaver8.png deleted file mode 100644 index ccc23fa0b37..00000000000 Binary files a/docs/assets/sql/dbeaver8.png and /dev/null differ diff --git a/docs/assets/sql/drop.png b/docs/assets/sql/drop.png deleted file mode 100644 index 2b9cbd213cb..00000000000 Binary files a/docs/assets/sql/drop.png and /dev/null differ diff --git a/docs/assets/sql/file.png b/docs/assets/sql/file.png deleted file mode 100644 index 2cbaa03f960..00000000000 Binary files a/docs/assets/sql/file.png and /dev/null differ diff --git a/docs/assets/sql/machine_learning_lifecycle.png b/docs/assets/sql/machine_learning_lifecycle.png deleted file mode 100644 index f52f642decf..00000000000 Binary files a/docs/assets/sql/machine_learning_lifecycle.png and /dev/null differ diff --git a/docs/assets/sql/mysql-client.gif b/docs/assets/sql/mysql-client.gif deleted file mode 100644 index 3bbd821221f..00000000000 Binary files a/docs/assets/sql/mysql-client.gif and /dev/null differ diff --git a/docs/assets/sql/select.png b/docs/assets/sql/select.png deleted file mode 100644 index e580d64c835..00000000000 Binary files a/docs/assets/sql/select.png and /dev/null differ diff --git a/docs/assets/sql/select_bulk.png b/docs/assets/sql/select_bulk.png deleted file mode 100644 index 0ba44b132f3..00000000000 Binary files a/docs/assets/sql/select_bulk.png and /dev/null differ diff --git a/docs/assets/sql/select_file.png b/docs/assets/sql/select_file.png deleted file mode 100644 index 306b84b67b2..00000000000 Binary files a/docs/assets/sql/select_file.png and /dev/null differ diff --git a/docs/assets/sql/select_hr.png b/docs/assets/sql/select_hr.png deleted file mode 100644 index febe5cf5249..00000000000 Binary files a/docs/assets/sql/select_hr.png and /dev/null differ diff --git a/docs/assets/sql/select_hra.png b/docs/assets/sql/select_hra.png deleted file mode 100644 index b3024153e21..00000000000 Binary files a/docs/assets/sql/select_hra.png and /dev/null differ diff --git a/docs/assets/sql/show.png b/docs/assets/sql/show.png deleted file mode 100644 index 9e3c9f089c8..00000000000 Binary files a/docs/assets/sql/show.png and /dev/null differ diff --git a/docs/assets/sql/status.png b/docs/assets/sql/status.png deleted file mode 100644 index e4a4e846d05..00000000000 Binary files a/docs/assets/sql/status.png and /dev/null differ diff --git a/docs/assets/sql/test_connection_dbeaver.png b/docs/assets/sql/test_connection_dbeaver.png deleted file mode 100644 index b53e1d25367..00000000000 Binary files a/docs/assets/sql/test_connection_dbeaver.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/Mushrooms/Selection_004.png b/docs/assets/sql/tutorials/Mushrooms/Selection_004.png deleted file mode 100644 index 60765f6734e..00000000000 Binary files a/docs/assets/sql/tutorials/Mushrooms/Selection_004.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/Mushrooms/create.png b/docs/assets/sql/tutorials/Mushrooms/create.png deleted file mode 100644 index a58fb5170fa..00000000000 Binary files a/docs/assets/sql/tutorials/Mushrooms/create.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/Mushrooms/database.png b/docs/assets/sql/tutorials/Mushrooms/database.png deleted file mode 100644 index da64041148c..00000000000 Binary files a/docs/assets/sql/tutorials/Mushrooms/database.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/Mushrooms/dbintegration.png b/docs/assets/sql/tutorials/Mushrooms/dbintegration.png deleted file mode 100644 index 5011c8b514b..00000000000 Binary files a/docs/assets/sql/tutorials/Mushrooms/dbintegration.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/Mushrooms/mushroomsselect.png b/docs/assets/sql/tutorials/Mushrooms/mushroomsselect.png deleted file mode 100644 index 63462cfd043..00000000000 Binary files a/docs/assets/sql/tutorials/Mushrooms/mushroomsselect.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/Mushrooms/prediction.png b/docs/assets/sql/tutorials/Mushrooms/prediction.png deleted file mode 100644 index 4077748ab11..00000000000 Binary files a/docs/assets/sql/tutorials/Mushrooms/prediction.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/Mushrooms/statuscheck.png b/docs/assets/sql/tutorials/Mushrooms/statuscheck.png deleted file mode 100644 index a9225ed4139..00000000000 Binary files a/docs/assets/sql/tutorials/Mushrooms/statuscheck.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/bodyfat/connect-database.png b/docs/assets/sql/tutorials/bodyfat/connect-database.png deleted file mode 100644 index 1f4bc04e0c7..00000000000 Binary files a/docs/assets/sql/tutorials/bodyfat/connect-database.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/bodyfat/create.png b/docs/assets/sql/tutorials/bodyfat/create.png deleted file mode 100644 index 1c29ba80331..00000000000 Binary files a/docs/assets/sql/tutorials/bodyfat/create.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/bodyfat/database-connected.png b/docs/assets/sql/tutorials/bodyfat/database-connected.png deleted file mode 100644 index 7de3cbfa5f0..00000000000 Binary files a/docs/assets/sql/tutorials/bodyfat/database-connected.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/bodyfat/join.png b/docs/assets/sql/tutorials/bodyfat/join.png deleted file mode 100644 index 34e271329ea..00000000000 Binary files a/docs/assets/sql/tutorials/bodyfat/join.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/bodyfat/prediction.png b/docs/assets/sql/tutorials/bodyfat/prediction.png deleted file mode 100644 index ccb29486ab5..00000000000 Binary files a/docs/assets/sql/tutorials/bodyfat/prediction.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/bodyfat/selectdata.png b/docs/assets/sql/tutorials/bodyfat/selectdata.png deleted file mode 100644 index 2dba31a42f5..00000000000 Binary files a/docs/assets/sql/tutorials/bodyfat/selectdata.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/bodyfat/status.png b/docs/assets/sql/tutorials/bodyfat/status.png deleted file mode 100644 index 205bdebb9c0..00000000000 Binary files a/docs/assets/sql/tutorials/bodyfat/status.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/bodyfat/upload_file.png b/docs/assets/sql/tutorials/bodyfat/upload_file.png deleted file mode 100644 index 971f921735f..00000000000 Binary files a/docs/assets/sql/tutorials/bodyfat/upload_file.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/connect.gif b/docs/assets/sql/tutorials/connect.gif deleted file mode 100644 index a9c96e0c84f..00000000000 Binary files a/docs/assets/sql/tutorials/connect.gif and /dev/null differ diff --git a/docs/assets/sql/tutorials/connect.png b/docs/assets/sql/tutorials/connect.png deleted file mode 100644 index 3a5f3e60c0b..00000000000 Binary files a/docs/assets/sql/tutorials/connect.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/create_ds.gif b/docs/assets/sql/tutorials/create_ds.gif deleted file mode 100644 index 6d8e61f9670..00000000000 Binary files a/docs/assets/sql/tutorials/create_ds.gif and /dev/null differ diff --git a/docs/assets/sql/tutorials/crop-prediction/database-integration-mariadb.png b/docs/assets/sql/tutorials/crop-prediction/database-integration-mariadb.png deleted file mode 100644 index 82c012e883c..00000000000 Binary files a/docs/assets/sql/tutorials/crop-prediction/database-integration-mariadb.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/customer_churn/create_churn.png b/docs/assets/sql/tutorials/customer_churn/create_churn.png deleted file mode 100644 index e8e09d8c433..00000000000 Binary files a/docs/assets/sql/tutorials/customer_churn/create_churn.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/customer_churn/customer_churn.png b/docs/assets/sql/tutorials/customer_churn/customer_churn.png deleted file mode 100644 index b512debb315..00000000000 Binary files a/docs/assets/sql/tutorials/customer_churn/customer_churn.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/customer_churn/customer_churn2.png b/docs/assets/sql/tutorials/customer_churn/customer_churn2.png deleted file mode 100644 index 5dd32ae5069..00000000000 Binary files a/docs/assets/sql/tutorials/customer_churn/customer_churn2.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/customer_churn/select.png b/docs/assets/sql/tutorials/customer_churn/select.png deleted file mode 100644 index 0431e61d605..00000000000 Binary files a/docs/assets/sql/tutorials/customer_churn/select.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/customer_churn/upload.png b/docs/assets/sql/tutorials/customer_churn/upload.png deleted file mode 100644 index 9cd89702011..00000000000 Binary files a/docs/assets/sql/tutorials/customer_churn/upload.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/1prediction.png b/docs/assets/sql/tutorials/heart-disease/1prediction.png deleted file mode 100644 index 386ee0d5986..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/1prediction.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/2.1prediction.png b/docs/assets/sql/tutorials/heart-disease/2.1prediction.png deleted file mode 100644 index 694db4a5ca0..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/2.1prediction.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/2ndprediction.png b/docs/assets/sql/tutorials/heart-disease/2ndprediction.png deleted file mode 100644 index 61eb5cb9652..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/2ndprediction.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/3rdprediction.png b/docs/assets/sql/tutorials/heart-disease/3rdprediction.png deleted file mode 100644 index 1d6e5e886fa..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/3rdprediction.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/connect_db.png b/docs/assets/sql/tutorials/heart-disease/connect_db.png deleted file mode 100644 index d2ff26c758b..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/connect_db.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/connect_mysql_client.png b/docs/assets/sql/tutorials/heart-disease/connect_mysql_client.png deleted file mode 100644 index 045900c4288..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/connect_mysql_client.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/create.png b/docs/assets/sql/tutorials/heart-disease/create.png deleted file mode 100644 index c2d19f2ce2c..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/create.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/create_predictor.png b/docs/assets/sql/tutorials/heart-disease/create_predictor.png deleted file mode 100644 index f0a87aac086..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/create_predictor.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/database.png b/docs/assets/sql/tutorials/heart-disease/database.png deleted file mode 100644 index d607c7a540c..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/database.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/dataselection.png b/docs/assets/sql/tutorials/heart-disease/dataselection.png deleted file mode 100644 index 400b1d0231a..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/dataselection.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/dbintegration.png b/docs/assets/sql/tutorials/heart-disease/dbintegration.png deleted file mode 100644 index 5011c8b514b..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/dbintegration.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/join_query.gif b/docs/assets/sql/tutorials/heart-disease/join_query.gif deleted file mode 100644 index 2b41dc45d7d..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/join_query.gif and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/predictor_status.png b/docs/assets/sql/tutorials/heart-disease/predictor_status.png deleted file mode 100644 index 7c86f66666a..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/predictor_status.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/select_prediction_query.png b/docs/assets/sql/tutorials/heart-disease/select_prediction_query.png deleted file mode 100644 index 1ae0639d1e4..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/select_prediction_query.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/selectpredictor.png b/docs/assets/sql/tutorials/heart-disease/selectpredictor.png deleted file mode 100644 index d28cc8575b4..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/selectpredictor.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/heart-disease/use_mindsdb.png b/docs/assets/sql/tutorials/heart-disease/use_mindsdb.png deleted file mode 100644 index e5aade0af41..00000000000 Binary files a/docs/assets/sql/tutorials/heart-disease/use_mindsdb.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insert.png b/docs/assets/sql/tutorials/insert.png deleted file mode 100644 index 8f2fa8df549..00000000000 Binary files a/docs/assets/sql/tutorials/insert.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/add-database-cloud-mindsdb-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/add-database-cloud-mindsdb-sql.png deleted file mode 100644 index 039e479fd66..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/add-database-cloud-mindsdb-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/connect-mindsdb-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/connect-mindsdb-sql.png deleted file mode 100644 index 28f4f5936d5..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/connect-mindsdb-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/create-prediction-bitcoin-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/create-prediction-bitcoin-sql.png deleted file mode 100644 index 1ba4621c05b..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/create-prediction-bitcoin-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/create-prediction-isurance-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/create-prediction-isurance-sql.png deleted file mode 100644 index dd71194df5e..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/create-prediction-isurance-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/create-predictor-bitcoin-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/create-predictor-bitcoin-sql.png deleted file mode 100644 index 0688bebeac3..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/create-predictor-bitcoin-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/create-predictor-insurance-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/create-predictor-insurance-sql.png deleted file mode 100644 index 9937b9e4a82..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/create-predictor-insurance-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/show-bitcoin-table.png b/docs/assets/sql/tutorials/insurance-cost-prediction/show-bitcoin-table.png deleted file mode 100644 index 1970b54c5a9..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/show-bitcoin-table.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/show-databases-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/show-databases-sql.png deleted file mode 100644 index 540e119131e..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/show-databases-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/show-insurance-table.png b/docs/assets/sql/tutorials/insurance-cost-prediction/show-insurance-table.png deleted file mode 100644 index 1006202b031..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/show-insurance-table.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/show-predictor-bitcoin-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/show-predictor-bitcoin-sql.png deleted file mode 100644 index b3302211ebf..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/show-predictor-bitcoin-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/show-predictor-isurance-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/show-predictor-isurance-sql.png deleted file mode 100644 index e3a4cf0832b..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/show-predictor-isurance-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/show-tables-sql-2.png b/docs/assets/sql/tutorials/insurance-cost-prediction/show-tables-sql-2.png deleted file mode 100644 index 6a695cbef93..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/show-tables-sql-2.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/show-tables-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/show-tables-sql.png deleted file mode 100644 index 119fa2222ef..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/show-tables-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost-prediction/success-connect-sql.png b/docs/assets/sql/tutorials/insurance-cost-prediction/success-connect-sql.png deleted file mode 100644 index 21bcd93c307..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost-prediction/success-connect-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost/add-database-cloud-mindsdb-sql.png b/docs/assets/sql/tutorials/insurance-cost/add-database-cloud-mindsdb-sql.png deleted file mode 100644 index 039e479fd66..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost/add-database-cloud-mindsdb-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost/connect-mindsdb-sql.png b/docs/assets/sql/tutorials/insurance-cost/connect-mindsdb-sql.png deleted file mode 100644 index 28f4f5936d5..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost/connect-mindsdb-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost/create-prediction-isurance-sql.png b/docs/assets/sql/tutorials/insurance-cost/create-prediction-isurance-sql.png deleted file mode 100644 index dd71194df5e..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost/create-prediction-isurance-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost/create-predictor-insurance-sql.png b/docs/assets/sql/tutorials/insurance-cost/create-predictor-insurance-sql.png deleted file mode 100644 index 9937b9e4a82..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost/create-predictor-insurance-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost/create_db.png b/docs/assets/sql/tutorials/insurance-cost/create_db.png deleted file mode 100644 index 0397217b658..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost/create_db.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost/insurance_predictor.png b/docs/assets/sql/tutorials/insurance-cost/insurance_predictor.png deleted file mode 100644 index f0eccd7686e..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost/insurance_predictor.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost/prediction_insurance.png b/docs/assets/sql/tutorials/insurance-cost/prediction_insurance.png deleted file mode 100644 index 484efef6678..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost/prediction_insurance.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost/select_insurance.png b/docs/assets/sql/tutorials/insurance-cost/select_insurance.png deleted file mode 100644 index fdf0ee65a9b..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost/select_insurance.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost/show-databases-sql.png b/docs/assets/sql/tutorials/insurance-cost/show-databases-sql.png deleted file mode 100644 index 540e119131e..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost/show-databases-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost/show-insurance-table.png b/docs/assets/sql/tutorials/insurance-cost/show-insurance-table.png deleted file mode 100644 index 1006202b031..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost/show-insurance-table.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost/show-predictor-isurance-sql.png b/docs/assets/sql/tutorials/insurance-cost/show-predictor-isurance-sql.png deleted file mode 100644 index e3a4cf0832b..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost/show-predictor-isurance-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost/show-tables-sql-2.png b/docs/assets/sql/tutorials/insurance-cost/show-tables-sql-2.png deleted file mode 100644 index 6a695cbef93..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost/show-tables-sql-2.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost/show-tables-sql.png b/docs/assets/sql/tutorials/insurance-cost/show-tables-sql.png deleted file mode 100644 index 119fa2222ef..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost/show-tables-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/insurance-cost/success-connect-sql.png b/docs/assets/sql/tutorials/insurance-cost/success-connect-sql.png deleted file mode 100644 index 21bcd93c307..00000000000 Binary files a/docs/assets/sql/tutorials/insurance-cost/success-connect-sql.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/process-quality/database-integration.png b/docs/assets/sql/tutorials/process-quality/database-integration.png deleted file mode 100644 index fe96f4c119e..00000000000 Binary files a/docs/assets/sql/tutorials/process-quality/database-integration.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/process-quality/database.png b/docs/assets/sql/tutorials/process-quality/database.png deleted file mode 100644 index d607c7a540c..00000000000 Binary files a/docs/assets/sql/tutorials/process-quality/database.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/process-quality/dbintegration.png b/docs/assets/sql/tutorials/process-quality/dbintegration.png deleted file mode 100644 index 5011c8b514b..00000000000 Binary files a/docs/assets/sql/tutorials/process-quality/dbintegration.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/select.png b/docs/assets/sql/tutorials/select.png deleted file mode 100644 index 2fa767edccb..00000000000 Binary files a/docs/assets/sql/tutorials/select.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/selecti.png b/docs/assets/sql/tutorials/selecti.png deleted file mode 100644 index 4cce94e7e26..00000000000 Binary files a/docs/assets/sql/tutorials/selecti.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/1-ML_audience.png b/docs/assets/sql/tutorials/snowflake-superset/1-ML_audience.png deleted file mode 100644 index a9f65cb53e6..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/1-ML_audience.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/10-DBeaver connection.png b/docs/assets/sql/tutorials/snowflake-superset/10-DBeaver connection.png deleted file mode 100644 index 8722f1c41cd..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/10-DBeaver connection.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/11-Dbeaver2.png b/docs/assets/sql/tutorials/snowflake-superset/11-Dbeaver2.png deleted file mode 100644 index 42690cc8298..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/11-Dbeaver2.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/12-show_dtabases.png b/docs/assets/sql/tutorials/snowflake-superset/12-show_dtabases.png deleted file mode 100644 index 71ec0af4a88..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/12-show_dtabases.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/13-info_schema.png b/docs/assets/sql/tutorials/snowflake-superset/13-info_schema.png deleted file mode 100644 index 7d0ff3a0b90..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/13-info_schema.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/14-table.png b/docs/assets/sql/tutorials/snowflake-superset/14-table.png deleted file mode 100644 index 7a15d17fdad..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/14-table.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/15-query.png b/docs/assets/sql/tutorials/snowflake-superset/15-query.png deleted file mode 100644 index 3dd98bb2c6b..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/15-query.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/16-predictive_query.png b/docs/assets/sql/tutorials/snowflake-superset/16-predictive_query.png deleted file mode 100644 index 46164d46d65..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/16-predictive_query.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/17-stops_by_route_Superset.jpg b/docs/assets/sql/tutorials/snowflake-superset/17-stops_by_route_Superset.jpg deleted file mode 100644 index c131dce035b..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/17-stops_by_route_Superset.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/18-timeseries_chart.jpg b/docs/assets/sql/tutorials/snowflake-superset/18-timeseries_chart.jpg deleted file mode 100644 index bf4d473c0a6..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/18-timeseries_chart.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/19-timeseries2.jpg b/docs/assets/sql/tutorials/snowflake-superset/19-timeseries2.jpg deleted file mode 100644 index 2d44bec204c..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/19-timeseries2.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/2-ML_workflow.png b/docs/assets/sql/tutorials/snowflake-superset/2-ML_workflow.png deleted file mode 100644 index 7775334643e..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/2-ML_workflow.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/20-filters1.jpg b/docs/assets/sql/tutorials/snowflake-superset/20-filters1.jpg deleted file mode 100644 index 44adb88da32..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/20-filters1.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/20-filters2.jpg b/docs/assets/sql/tutorials/snowflake-superset/20-filters2.jpg deleted file mode 100644 index eebecd869f5..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/20-filters2.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/21-graph.jpg b/docs/assets/sql/tutorials/snowflake-superset/21-graph.jpg deleted file mode 100644 index 1a41b0e0541..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/21-graph.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/22-graph.jpg b/docs/assets/sql/tutorials/snowflake-superset/22-graph.jpg deleted file mode 100644 index 57cf5ae4bec..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/22-graph.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/23-graph.jpg b/docs/assets/sql/tutorials/snowflake-superset/23-graph.jpg deleted file mode 100644 index 7bf3035448c..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/23-graph.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/24-MindsDB_ML-Workflow.png b/docs/assets/sql/tutorials/snowflake-superset/24-MindsDB_ML-Workflow.png deleted file mode 100644 index ba0bc2c9c51..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/24-MindsDB_ML-Workflow.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/3-AI_Tables-income-debt.jpg b/docs/assets/sql/tutorials/snowflake-superset/3-AI_Tables-income-debt.jpg deleted file mode 100644 index c3a3c3d6001..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/3-AI_Tables-income-debt.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/3-AI_Tables-income_table.jpg b/docs/assets/sql/tutorials/snowflake-superset/3-AI_Tables-income_table.jpg deleted file mode 100644 index 71b8e3d8058..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/3-AI_Tables-income_table.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/4-AI_Tables-income-debt-query.jpg b/docs/assets/sql/tutorials/snowflake-superset/4-AI_Tables-income-debt-query.jpg deleted file mode 100644 index 19ce5b15f7d..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/4-AI_Tables-income-debt-query.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/4-debt_vs_income.png b/docs/assets/sql/tutorials/snowflake-superset/4-debt_vs_income.png deleted file mode 100644 index d1e5c60ab06..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/4-debt_vs_income.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/5-debt-income-query-table.jpg b/docs/assets/sql/tutorials/snowflake-superset/5-debt-income-query-table.jpg deleted file mode 100644 index 12966b717d6..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/5-debt-income-query-table.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/5-debt-income-query.jpg b/docs/assets/sql/tutorials/snowflake-superset/5-debt-income-query.jpg deleted file mode 100644 index 8fed6732107..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/5-debt-income-query.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/6-debt-income-query-null-table.jpg b/docs/assets/sql/tutorials/snowflake-superset/6-debt-income-query-null-table.jpg deleted file mode 100644 index 25b6415008b..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/6-debt-income-query-null-table.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/6-debt-income-query-null.jpg b/docs/assets/sql/tutorials/snowflake-superset/6-debt-income-query-null.jpg deleted file mode 100644 index 2122c847f60..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/6-debt-income-query-null.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/7-debt-income-query-ml-table.jpg b/docs/assets/sql/tutorials/snowflake-superset/7-debt-income-query-ml-table.jpg deleted file mode 100644 index eb4735d750f..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/7-debt-income-query-ml-table.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/7-debt-income-query-ml.jpg b/docs/assets/sql/tutorials/snowflake-superset/7-debt-income-query-ml.jpg deleted file mode 100644 index 36c4225f717..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/7-debt-income-query-ml.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/8-multivariate_problem.jpg b/docs/assets/sql/tutorials/snowflake-superset/8-multivariate_problem.jpg deleted file mode 100644 index 8e437b53e23..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/8-multivariate_problem.jpg and /dev/null differ diff --git a/docs/assets/sql/tutorials/snowflake-superset/9-connect_to_MindsDB.png b/docs/assets/sql/tutorials/snowflake-superset/9-connect_to_MindsDB.png deleted file mode 100644 index 45e505128f4..00000000000 Binary files a/docs/assets/sql/tutorials/snowflake-superset/9-connect_to_MindsDB.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/status.png b/docs/assets/sql/tutorials/status.png deleted file mode 100644 index a60ad71df42..00000000000 Binary files a/docs/assets/sql/tutorials/status.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/use.png b/docs/assets/sql/tutorials/use.png deleted file mode 100644 index 25f47fbe9ec..00000000000 Binary files a/docs/assets/sql/tutorials/use.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-clickhouse/describe-table.png b/docs/assets/tutorials/aitables-clickhouse/describe-table.png deleted file mode 100644 index c10d33fc8d0..00000000000 Binary files a/docs/assets/tutorials/aitables-clickhouse/describe-table.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-clickhouse/select-info.png b/docs/assets/tutorials/aitables-clickhouse/select-info.png deleted file mode 100644 index fc9fee0a20d..00000000000 Binary files a/docs/assets/tutorials/aitables-clickhouse/select-info.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-clickhouse/select-info2.png b/docs/assets/tutorials/aitables-clickhouse/select-info2.png deleted file mode 100644 index 215fce4a09e..00000000000 Binary files a/docs/assets/tutorials/aitables-clickhouse/select-info2.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-clickhouse/show-tables.png b/docs/assets/tutorials/aitables-clickhouse/show-tables.png deleted file mode 100644 index d5081904f6c..00000000000 Binary files a/docs/assets/tutorials/aitables-clickhouse/show-tables.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mariadb/database.png b/docs/assets/tutorials/aitables-mariadb/database.png deleted file mode 100644 index a53ec9a4667..00000000000 Binary files a/docs/assets/tutorials/aitables-mariadb/database.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mariadb/mdb-maria.png b/docs/assets/tutorials/aitables-mariadb/mdb-maria.png deleted file mode 100644 index d4293d55ba2..00000000000 Binary files a/docs/assets/tutorials/aitables-mariadb/mdb-maria.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mariadb/mdb-ver.png b/docs/assets/tutorials/aitables-mariadb/mdb-ver.png deleted file mode 100644 index 0e741b06d95..00000000000 Binary files a/docs/assets/tutorials/aitables-mariadb/mdb-ver.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mariadb/predicted-info.png b/docs/assets/tutorials/aitables-mariadb/predicted-info.png deleted file mode 100644 index 78c90805f88..00000000000 Binary files a/docs/assets/tutorials/aitables-mariadb/predicted-info.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mariadb/predicted.png b/docs/assets/tutorials/aitables-mariadb/predicted.png deleted file mode 100644 index e004f66ee76..00000000000 Binary files a/docs/assets/tutorials/aitables-mariadb/predicted.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mariadb/predicted1.png b/docs/assets/tutorials/aitables-mariadb/predicted1.png deleted file mode 100644 index aa15856adb0..00000000000 Binary files a/docs/assets/tutorials/aitables-mariadb/predicted1.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mariadb/select-data.png b/docs/assets/tutorials/aitables-mariadb/select-data.png deleted file mode 100644 index edf1f79f0ca..00000000000 Binary files a/docs/assets/tutorials/aitables-mariadb/select-data.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mariadb/training-finish.png b/docs/assets/tutorials/aitables-mariadb/training-finish.png deleted file mode 100644 index 90650d48663..00000000000 Binary files a/docs/assets/tutorials/aitables-mariadb/training-finish.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mariadb/training-run.png b/docs/assets/tutorials/aitables-mariadb/training-run.png deleted file mode 100644 index 44c8736e2f2..00000000000 Binary files a/docs/assets/tutorials/aitables-mariadb/training-run.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mariadb/training.png b/docs/assets/tutorials/aitables-mariadb/training.png deleted file mode 100644 index 658fa1dff8f..00000000000 Binary files a/docs/assets/tutorials/aitables-mariadb/training.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mssql/AI Tables.jpg b/docs/assets/tutorials/aitables-mssql/AI Tables.jpg deleted file mode 100644 index 12af031ca62..00000000000 Binary files a/docs/assets/tutorials/aitables-mssql/AI Tables.jpg and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mssql/train-model.png b/docs/assets/tutorials/aitables-mssql/train-model.png deleted file mode 100644 index c722f0c6ee3..00000000000 Binary files a/docs/assets/tutorials/aitables-mssql/train-model.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mysql/list_tables.png b/docs/assets/tutorials/aitables-mysql/list_tables.png deleted file mode 100644 index ce044c906b0..00000000000 Binary files a/docs/assets/tutorials/aitables-mysql/list_tables.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mysql/select_status.png b/docs/assets/tutorials/aitables-mysql/select_status.png deleted file mode 100644 index 4d40ac3fd5a..00000000000 Binary files a/docs/assets/tutorials/aitables-mysql/select_status.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-mysql/select_table.png b/docs/assets/tutorials/aitables-mysql/select_table.png deleted file mode 100644 index f7e9ed4b014..00000000000 Binary files a/docs/assets/tutorials/aitables-mysql/select_table.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-postgresql/list_schema.png b/docs/assets/tutorials/aitables-postgresql/list_schema.png deleted file mode 100644 index 3d982cf610a..00000000000 Binary files a/docs/assets/tutorials/aitables-postgresql/list_schema.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-postgresql/mindsdb_started.png b/docs/assets/tutorials/aitables-postgresql/mindsdb_started.png deleted file mode 100644 index 243c2d6c2bb..00000000000 Binary files a/docs/assets/tutorials/aitables-postgresql/mindsdb_started.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-postgresql/select_model.png b/docs/assets/tutorials/aitables-postgresql/select_model.png deleted file mode 100644 index 0c30c1a440a..00000000000 Binary files a/docs/assets/tutorials/aitables-postgresql/select_model.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-postgresql/select_status.png b/docs/assets/tutorials/aitables-postgresql/select_status.png deleted file mode 100644 index 08eaea986a1..00000000000 Binary files a/docs/assets/tutorials/aitables-postgresql/select_status.png and /dev/null differ diff --git a/docs/assets/tutorials/aitables-postgresql/select_table.png b/docs/assets/tutorials/aitables-postgresql/select_table.png deleted file mode 100644 index c5b4f6b07eb..00000000000 Binary files a/docs/assets/tutorials/aitables-postgresql/select_table.png and /dev/null differ diff --git a/docs/assets/tutorials/crops/2ndprediction.png b/docs/assets/tutorials/crops/2ndprediction.png deleted file mode 100644 index 208516c42c8..00000000000 Binary files a/docs/assets/tutorials/crops/2ndprediction.png and /dev/null differ diff --git a/docs/assets/tutorials/crops/createcropspredictor.png b/docs/assets/tutorials/crops/createcropspredictor.png deleted file mode 100644 index bdaf210462f..00000000000 Binary files a/docs/assets/tutorials/crops/createcropspredictor.png and /dev/null differ diff --git a/docs/assets/tutorials/crops/cropprediction.png b/docs/assets/tutorials/crops/cropprediction.png deleted file mode 100644 index f70b75753e8..00000000000 Binary files a/docs/assets/tutorials/crops/cropprediction.png and /dev/null differ diff --git a/docs/assets/tutorials/crops/database.png b/docs/assets/tutorials/crops/database.png deleted file mode 100644 index d607c7a540c..00000000000 Binary files a/docs/assets/tutorials/crops/database.png and /dev/null differ diff --git a/docs/assets/tutorials/crops/select_datasource.png b/docs/assets/tutorials/crops/select_datasource.png deleted file mode 100644 index ace649d18cf..00000000000 Binary files a/docs/assets/tutorials/crops/select_datasource.png and /dev/null differ diff --git a/docs/assets/tutorials/crops/selectfromfiles.png b/docs/assets/tutorials/crops/selectfromfiles.png deleted file mode 100644 index f0cb9fa4ebf..00000000000 Binary files a/docs/assets/tutorials/crops/selectfromfiles.png and /dev/null differ diff --git a/docs/assets/tutorials/crops/statuscheck.png b/docs/assets/tutorials/crops/statuscheck.png deleted file mode 100644 index 49f8b56a13c..00000000000 Binary files a/docs/assets/tutorials/crops/statuscheck.png and /dev/null differ diff --git a/docs/assets/tutorials/diabetes/Connecting_database_to_MindsdbCloud.gif b/docs/assets/tutorials/diabetes/Connecting_database_to_MindsdbCloud.gif deleted file mode 100644 index ec838c4aeb3..00000000000 Binary files a/docs/assets/tutorials/diabetes/Connecting_database_to_MindsdbCloud.gif and /dev/null differ diff --git a/docs/assets/tutorials/diabetes/DBdiabetes.png b/docs/assets/tutorials/diabetes/DBdiabetes.png deleted file mode 100644 index 6eac862fc64..00000000000 Binary files a/docs/assets/tutorials/diabetes/DBdiabetes.png and /dev/null differ diff --git a/docs/assets/tutorials/diabetes/connecting_mysql_client.gif b/docs/assets/tutorials/diabetes/connecting_mysql_client.gif deleted file mode 100644 index 07534bfad86..00000000000 Binary files a/docs/assets/tutorials/diabetes/connecting_mysql_client.gif and /dev/null differ diff --git a/docs/assets/tutorials/diabetes/create_predictor.png b/docs/assets/tutorials/diabetes/create_predictor.png deleted file mode 100644 index 0f0288411b1..00000000000 Binary files a/docs/assets/tutorials/diabetes/create_predictor.png and /dev/null differ diff --git a/docs/assets/tutorials/diabetes/database.png b/docs/assets/tutorials/diabetes/database.png deleted file mode 100644 index d607c7a540c..00000000000 Binary files a/docs/assets/tutorials/diabetes/database.png and /dev/null differ diff --git a/docs/assets/tutorials/diabetes/pg4admin/diabetes_logo.png b/docs/assets/tutorials/diabetes/pg4admin/diabetes_logo.png deleted file mode 100644 index 29b54b8f4bd..00000000000 Binary files a/docs/assets/tutorials/diabetes/pg4admin/diabetes_logo.png and /dev/null differ diff --git a/docs/assets/tutorials/diabetes/pg4admin/images.png b/docs/assets/tutorials/diabetes/pg4admin/images.png deleted file mode 100644 index 7aa8fa107b6..00000000000 Binary files a/docs/assets/tutorials/diabetes/pg4admin/images.png and /dev/null differ diff --git a/docs/assets/tutorials/diabetes/pg4admin/index.jpeg b/docs/assets/tutorials/diabetes/pg4admin/index.jpeg deleted file mode 100644 index b4488fe8a58..00000000000 Binary files a/docs/assets/tutorials/diabetes/pg4admin/index.jpeg and /dev/null differ diff --git a/docs/assets/tutorials/diabetes/prediction.png b/docs/assets/tutorials/diabetes/prediction.png deleted file mode 100644 index 2f1badedd68..00000000000 Binary files a/docs/assets/tutorials/diabetes/prediction.png and /dev/null differ diff --git a/docs/assets/tutorials/diabetes/predictor.png b/docs/assets/tutorials/diabetes/predictor.png deleted file mode 100644 index dc446528989..00000000000 Binary files a/docs/assets/tutorials/diabetes/predictor.png and /dev/null differ diff --git a/docs/assets/tutorials/diabetes/select_predictor.png b/docs/assets/tutorials/diabetes/select_predictor.png deleted file mode 100644 index 472452bb893..00000000000 Binary files a/docs/assets/tutorials/diabetes/select_predictor.png and /dev/null differ diff --git a/docs/assets/tutorials/llamaindex/1.ml_engine.png b/docs/assets/tutorials/llamaindex/1.ml_engine.png deleted file mode 100644 index 7425e7e13f2..00000000000 Binary files a/docs/assets/tutorials/llamaindex/1.ml_engine.png and /dev/null differ diff --git a/docs/assets/tutorials/llamaindex/2.create_model.png b/docs/assets/tutorials/llamaindex/2.create_model.png deleted file mode 100644 index a118a16f6d0..00000000000 Binary files a/docs/assets/tutorials/llamaindex/2.create_model.png and /dev/null differ diff --git a/docs/assets/tutorials/llamaindex/3.describe.png b/docs/assets/tutorials/llamaindex/3.describe.png deleted file mode 100644 index b0f2546d04a..00000000000 Binary files a/docs/assets/tutorials/llamaindex/3.describe.png and /dev/null differ diff --git a/docs/assets/tutorials/llamaindex/4.select_model.png b/docs/assets/tutorials/llamaindex/4.select_model.png deleted file mode 100644 index b99e27a57b3..00000000000 Binary files a/docs/assets/tutorials/llamaindex/4.select_model.png and /dev/null differ diff --git a/docs/assets/tutorials/llamaindex/5.batch.png b/docs/assets/tutorials/llamaindex/5.batch.png deleted file mode 100644 index 6a49c9ccd14..00000000000 Binary files a/docs/assets/tutorials/llamaindex/5.batch.png and /dev/null differ diff --git a/docs/assets/tutorials/monkeylearn/model3.png b/docs/assets/tutorials/monkeylearn/model3.png deleted file mode 100644 index 6581255841a..00000000000 Binary files a/docs/assets/tutorials/monkeylearn/model3.png and /dev/null differ diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-launch-skysql.png b/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-launch-skysql.png deleted file mode 100644 index c830cf0ac14..00000000000 Binary files a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-launch-skysql.png and /dev/null differ diff --git a/docs/assets/tutorials/zero-shot-classification-postgresql-pg-admin.png b/docs/assets/tutorials/zero-shot-classification-postgresql-pg-admin.png deleted file mode 100644 index 1f9f4379d53..00000000000 Binary files a/docs/assets/tutorials/zero-shot-classification-postgresql-pg-admin.png and /dev/null differ diff --git a/docs/assets/tutorials/zero-shot-classification-postgresql.png b/docs/assets/tutorials/zero-shot-classification-postgresql.png deleted file mode 100644 index 27e1a750d08..00000000000 Binary files a/docs/assets/tutorials/zero-shot-classification-postgresql.png and /dev/null differ diff --git a/docs/assets/what_is_mindsdb.png b/docs/assets/what_is_mindsdb.png deleted file mode 100644 index 6b54159e543..00000000000 Binary files a/docs/assets/what_is_mindsdb.png and /dev/null differ diff --git a/docs/assets/what_is_mindsdb2.png b/docs/assets/what_is_mindsdb2.png deleted file mode 100644 index 3d1508da6f1..00000000000 Binary files a/docs/assets/what_is_mindsdb2.png and /dev/null differ diff --git a/docs/dark_mode.html b/docs/dark_mode.html new file mode 100644 index 00000000000..1b588574922 --- /dev/null +++ b/docs/dark_mode.html @@ -0,0 +1,187 @@ + + + + + Type & Buttons (dark) β€” Anton Cowork + + + + +
+ + + Aa + + +
+
Unpacking...
+ + + + + + + + + + diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 00000000000..ee144ec6d21 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,1985 @@ + + + + + + +MindsDB Query Engine β€” SQL Reference + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ SQL Reference +
+ + +
+
+ +
+ + + + + +
+
+ + +
+
+
SQL Reference
+

MindsDB Query Engine β€” Semantic Search via SQL

+

MindsDB lets you build semantic search pipelines entirely in SQL. Connect your data sources, shape them with views and tables, index unstructured content into knowledge bases, and expose everything through a natural-language agent β€” all without leaving your SQL client.

+
+ +
+
πŸ—„οΈ Connect Data
+
──→
+
🧠 Index in KB
+
──→
+
πŸ€– Query via Agent
+
+ +
+
+
+
+ optional +
πŸ”§ Views
+ + +
πŸ“ Projects
+
+
+
+
+
+
+ + +
+

Quickstart

+

Five SQL statements to go from zero to a working semantic search agent:

+
SQL
+
-- 1. Connect your data source
+CREATE DATABASE my_pg WITH ENGINE = 'postgres',
+PARAMETERS = {"host":"localhost","port":5432,"user":"user","password":"pass","database":"mydb"};
+
+-- 2. Create a project to organize work
+CREATE PROJECT search_project;
+
+-- 3. Create a knowledge base (semantic index)
+CREATE KNOWLEDGE_BASE search_project.docs_kb
+USING
+  embedding_model = {"provider":"openai","model_name":"text-embedding-3-large","api_key":"sk-..."},
+  content_columns = ['body'],
+  metadata_columns = ['title','category'],
+  id_column = 'doc_id';
+
+-- 4. Index your data
+INSERT INTO search_project.docs_kb
+  SELECT doc_id, title, category, body FROM my_pg.documents;
+
+-- 5. Build the agent
+CREATE AGENT search_project.my_agent
+USING
+  model = {"provider":"openai","model_name":"gpt-4o","api_key":"sk-..."},
+  data = {"knowledge_bases":["search_project.docs_kb"]},
+  prompt_template = 'docs_kb contains product documentation. Answer user questions using it.';
+
+-- Query it
+SELECT answer FROM search_project.my_agent
+WHERE question = 'How do I reset my password?';
+ +
+
+ + +
+

Setup

+

# Docker

+

The fastest way to run MindsDB locally. Choose the image that fits your integrations:

+
+ + + + + +
ImageIncludes
mindsdb/mindsdb:latestmysql, postgresql, snowflake, bigquery, mssql, salesforce
mindsdb/mindsdb:huggingfaceAll of the above + HuggingFace
+
BASH
+
docker run --name mindsdb_container \
+  -e MINDSDB_APIS=http,mysql \
+  -p 47334:47334 -p 47335:47335 \
+  mindsdb/mindsdb
+

Access the MindsDB editor at http://127.0.0.1:47334. The MySQL API is available at port 47335.

+ +
πŸ’‘

Persist data: add -v $(pwd)/mdb_data:/root/mdb_storage to keep models and configs across restarts.

+
βœ“

With auth: pass -e MINDSDB_USERNAME=admin -e MINDSDB_PASSWORD=password to enable login.

+
⚠

Local databases: use http://host.docker.internal instead of localhost when the target DB runs in another container.

+ +

Container management

+
BASH
+
docker stop mindsdb_container    # stop
+docker start mindsdb_container   # restart (preserves state)
+docker logs -f mindsdb_container # follow logs
+ +

Install integration dependencies

+

Once the container is running, install extra integrations directly from the editor (Settings β†’ Manage Integrations), or via shell:

+
BASH
+
docker exec mindsdb_container pip install mindsdb[pgvector]
+
+ +
+

# pip install

+
BASH
+
pip install mindsdb
+
+# with extras
+pip install mindsdb[pgvector,openai,postgres]
+
BASH β€” start
+
python -m mindsdb
+

Editor: http://127.0.0.1:47334  Β·  MySQL API: port 47335  Β·  PostgreSQL API: port 47336

+
+ +
+

# Connect Clients

+

MindsDB exposes a MySQL-compatible wire protocol. Any MySQL client can connect:

+
+ + + + + + +
ClientHostPortUserPassword
MySQL CLI / DBeaver / TablePlus127.0.0.147335mindsdb(empty)
MindsDB Editorhttp://127.0.0.1:47334
SQLAlchemymysql+pymysql://mindsdb@127.0.0.1:47335/mindsdb
+
BASH β€” MySQL CLI
+
mysql -h 127.0.0.1 --port 47335 -u mindsdb -p
+
PYTHON β€” SQLAlchemy
+
from sqlalchemy import create_engine
+engine = create_engine("mysql+pymysql://mindsdb@127.0.0.1:47335/mindsdb")
+with engine.connect() as conn:
+    result = conn.execute("SELECT answer FROM my_agent WHERE question = 'hello'")
+
+
+ + +
+

Databases

+

Databases are connections to external data sources β€” your Postgres, MySQL, S3, Snowflake, MongoDB, etc. MindsDB never copies your data; it queries it live each time.

+ +

# CREATE DATABASE

+
+
CREATE DATABASEConnect an external data source
+
CREATE DATABASE [IF NOT EXISTS] datasource_name
+[WITH] [ENGINE [=] engine_name] [,]
+[PARAMETERS [=] {
+  "key": "value",
+  ...
+}];
+
+ +
+
datasource_nameUnique identifier for this connection within MindsDB.
+
engine_nameThe handler to use (e.g. 'postgres', 'mysql', 'mongodb', 'snowflake', 's3').
+
PARAMETERSJSON object of connection parameters specific to the engine (host, port, user, password, database, etc.).
+
+ +

Examples

+
PostgreSQL
+
CREATE DATABASE my_postgres
+WITH ENGINE = 'postgres',
+PARAMETERS = {
+  "host": "127.0.0.1",
+  "port": 5432,
+  "user": "postgres",
+  "password": "password",
+  "database": "mydb"
+};
+ +
MySQL
+
CREATE DATABASE my_mysql
+WITH ENGINE = 'mysql',
+PARAMETERS = {
+  "host": "127.0.0.1",
+  "port": 3306,
+  "user": "root",
+  "password": "password",
+  "database": "mydb"
+};
+ +
MongoDB
+
CREATE DATABASE my_mongo
+WITH ENGINE = 'mongodb',
+PARAMETERS = {
+  "host": "mongodb+srv://user:pass@cluster.mongodb.net",
+  "database": "mydb"
+};
+ +
πŸ’‘

Query all available data handlers: SELECT * FROM information_schema.handlers WHERE type = 'data';

+
+ +
+

# DROP DATABASE

+
+
DROP DATABASERemove a connected data source
+
DROP DATABASE [IF EXISTS] database_name;
+
+
⚠

Dropping a database removes the connection only. No data in the external source is affected.

+
+ +
+

# SHOW DATABASES

+
SQL
+
-- List all databases and projects
+SHOW DATABASES;
+SHOW FULL DATABASES;
+
+-- Filter to data sources only
+SHOW FULL DATABASES WHERE type = 'data';
+
+-- Via information_schema
+SELECT * FROM information_schema.databases;
+
+-- List tables in a database
+SHOW TABLES FROM datasource_name;
+
+ +
+

# USE

+

Switch context to a database or project. Subsequent queries can omit the database prefix.

+
SQL
+
USE datasource_name;
+
+-- Now you can query directly
+SELECT * FROM table_name LIMIT 10;
+
+-- Switch to a project
+USE my_project;
+
+ +
+

# Native Queries

+

Pass database-native syntax through MindsDB without translation. Useful for database-specific functions, MongoDB-QL, Snowflake SQL extensions, etc.

+
SQL β€” PostgreSQL native query
+
SELECT * FROM my_postgres (
+  SELECT
+    model,
+    year,
+    price,
+    ROUND(CAST((mpg / 2.3521458) AS numeric), 1) AS kml,
+    COUNT(*) OVER (PARTITION BY model, year) AS units_to_sell
+  FROM demo_data.used_cars
+);
+ +
SQL β€” MongoDB-QL native query
+
SELECT * FROM my_mongo (
+  db.products.find({"category": "electronics"}).limit(50)
+);
+ +
SQL β€” create view from native query
+
CREATE VIEW enriched_cars FROM my_postgres (
+  SELECT *, ROUND(CAST((mpg / 2.3521458) AS numeric), 1) AS kml
+  FROM demo_data.used_cars
+);
+
+
+ + +
+

Projects

+

Projects are namespaces that hold knowledge bases, agents, views, and jobs. They let you organize your AI pipeline by use case (e.g. search_prod, search_staging).

+ +

# CREATE PROJECT

+
+
CREATE PROJECT
+
CREATE PROJECT [IF NOT EXISTS] project_name;
+
+
SQL
+
CREATE PROJECT search_project;
+CREATE PROJECT IF NOT EXISTS analytics;
+
+ +
+

# DROP PROJECT

+
+
DROP PROJECT
+
DROP PROJECT [IF EXISTS] project_name;
+
+
+ +
+

# SHOW PROJECTS

+
SQL
+
-- List all projects
+SHOW DATABASES WHERE type = 'project';
+SHOW FULL DATABASES WHERE type = 'project';
+
+-- List objects in a project
+SHOW TABLES FROM project_name;
+SHOW VIEWS FROM project_name;
+SHOW KNOWLEDGE_BASES FROM project_name;
+
+
+ + +
+

Tables & Views

+

Use tables and views to shape and materialize data before indexing it into a knowledge base. Views are saved SELECT statements (virtual); tables are materialized results (physical).

+ +

# CREATE TABLE

+

Create an empty table or materialize a query result into an integration.

+
+
CREATE TABLEEmpty table or filled from query
+
-- Empty table
+CREATE TABLE integration_name.table_name (
+  column_name data_type,
+  ...
+);
+
+-- Filled from query
+CREATE TABLE integration_name.table_name (SELECT ...);
+
+-- Replace if exists
+CREATE OR REPLACE TABLE integration_name.table_name (SELECT ...);
+
+ +
SQL β€” materialize cleaned data
+
-- Clean and materialize support tickets for KB ingestion
+CREATE OR REPLACE TABLE my_pg.clean_tickets (
+  SELECT
+    id,
+    category,
+    TRIM(LOWER(subject))  AS subject,
+    body,
+    created_at
+  FROM my_pg.raw_support_tickets
+  WHERE body IS NOT NULL
+    AND LENGTH(body) > 20
+);
+
+ +
+

# DROP TABLE

+
+
DROP TABLE
+
DROP TABLE table_name;
+DROP TABLE files.uploaded_file;
+
+
+ +
+

# CREATE VIEW

+

A view is a saved SELECT that executes on every access. Perfect for data preparation before sending to a knowledge base.

+
+
CREATE VIEW
+
CREATE VIEW [IF NOT EXISTS] [project_name.]view_name AS (
+  SELECT columns
+  FROM integration_name.table_name
+  WHERE ...
+);
+
+ +
SQL β€” create filtered view for KB ingestion
+
-- View joining tickets with product metadata for richer indexing
+CREATE VIEW search_project.enriched_tickets AS (
+  SELECT
+    t.id,
+    t.body,
+    t.created_at,
+    p.name    AS product_name,
+    p.category AS product_category
+  FROM my_pg.support_tickets t
+  JOIN my_pg.products p ON t.product_id = p.id
+  WHERE t.status = 'closed'
+);
+ +
SQL β€” list views
+
SHOW VIEWS;
+SHOW FULL VIEWS;
+
+ +
+

# ALTER VIEW

+
+
ALTER VIEW
+
-- Standard syntax
+ALTER VIEW view_name [AS] (
+  SELECT * FROM integration_name.table_name
+);
+
+-- With explicit source
+ALTER VIEW view_name
+FROM integration_name (
+  SELECT * FROM table_name
+);
+
+
+ +
+

# DROP VIEW

+
+
DROP VIEW
+
DROP VIEW [IF EXISTS] view_name;
+
+
+
+ + +
+

SQL Reference

+ +

# SELECT

+

MindsDB supports standard SELECT with all standard clauses. Queries that reference one integration are pushed down to that engine. Cross-integration queries are executed in MindsDB's DuckDB-backed engine.

+ +
SQL β€” basic select
+
SELECT location, MAX(sqft)
+FROM my_pg.home_rentals
+GROUP BY location
+LIMIT 5;
+ +
SQL β€” subquery on integration data
+
-- Wrap in subquery when integration doesn't support GROUP BY
+SELECT type, MAX(bedrooms), LAST(price)
+FROM my_mongo (
+  db.house_sales.find().limit(300)
+) GROUP BY 1;
+ +
SQL β€” UNION ALL
+
SELECT id, content, 'tickets' AS source FROM my_pg.support_tickets
+UNION ALL
+SELECT id, body  AS content, 'forums'  AS source FROM my_pg.forum_posts;
+
+ +
+

# INSERT INTO

+

Insert rows into an integration table from a subquery. The destination table must already exist.

+
+
INSERT INTO
+
INSERT INTO integration_name.table_name
+  (SELECT ...);
+
+ +
SQL
+
-- Archive processed tickets into a separate table
+INSERT INTO my_pg.archived_tickets (
+  SELECT * FROM my_pg.support_tickets
+  WHERE resolved_at < '2024-01-01'
+);
+
+ +
+

# UPDATE

+
SQL β€” standard update
+
UPDATE my_pg.support_tickets
+SET status = 'archived'
+WHERE resolved_at < '2024-01-01';
+ +
SQL β€” update from select
+
UPDATE my_pg.products
+ON product_id
+FROM (
+  SELECT product_id, new_description AS description
+  FROM my_pg.product_updates
+);
+
+ +
+

# DELETE

+
SQL
+
-- Simple delete
+DELETE FROM my_pg.table_name
+WHERE column_name = 'value';
+
+-- Delete with subquery
+DELETE FROM my_pg.support_tickets
+WHERE id IN (
+  SELECT id FROM my_pg.resolved_tickets
+  WHERE resolved_at < '2023-01-01'
+);
+
+ +
+

# JOIN

+

Standard SQL JOINs work across tables within the same integration or after bridging with a view/subquery. Use JOINs to denormalize and enrich data before indexing.

+
SQL β€” cross-table join for data prep
+
-- Enrich tickets with user and product info before KB insert
+SELECT
+  t.id,
+  t.body,
+  u.name       AS user_name,
+  u.plan        AS user_plan,
+  p.name        AS product_name,
+  p.category    AS product_category
+FROM my_pg.support_tickets t
+JOIN my_pg.users    u ON t.user_id    = u.id
+JOIN my_pg.products p ON t.product_id = p.id
+WHERE t.body IS NOT NULL;
+ +
SQL β€” multi-source join via subqueries
+
SELECT pg_data.id, mongo_data.tags
+FROM (SELECT id, title FROM my_pg.articles) AS pg_data
+JOIN (SELECT article_id, tags FROM my_mongo.article_tags) AS mongo_data
+  ON pg_data.id = mongo_data.article_id;
+
+ +
+

# CASE WHEN

+

Standard conditional logic in SELECT, WHERE, and other clauses.

+
SQL
+
SELECT
+  id,
+  body,
+  CASE
+    WHEN priority = 1              THEN 'critical'
+    WHEN priority BETWEEN 2 AND 3 THEN 'high'
+    WHEN priority = 4              THEN 'medium'
+    ELSE                               'low'
+  END AS priority_label
+FROM my_pg.support_tickets;
+
+ +
+

# CTEs (WITH)

+

Common Table Expressions create named temporary result sets for modular, readable queries.

+
SQL
+
WITH
+-- Step 1: Get recent tickets
+recent AS (
+  SELECT id, user_id, product_id, body
+  FROM my_pg.support_tickets
+  WHERE created_at > '2024-01-01'
+),
+-- Step 2: Join product info
+enriched AS (
+  SELECT
+    r.id,
+    r.body,
+    p.name     AS product_name,
+    p.category AS category
+  FROM recent r
+  JOIN my_pg.products p ON r.product_id = p.id
+)
+-- Final: Insert into KB
+INSERT INTO search_project.tickets_kb
+  SELECT * FROM enriched;
+
+
+ + +
+

Knowledge Bases

+

A knowledge base is the semantic index at the heart of MindsDB's search capabilities. It combines an embedding model, an optional reranking model, and a vector store to enable context-aware retrieval over any data you load into it.

+ +
🧠

Knowledge bases match content by meaning, not keywords. "reset credentials" and "forgot password" return the same document even though no words overlap.

+ +

How it works

+
+
1

Create

Register the KB with an embedding model, optional reranking model, storage backend, and column mapping.

+
2

Insert

Feed rows from any table or view. Each row is chunked, embedded, and written to the vector store.

+
3

Query

Use WHERE content = '...' for semantic search, metadata columns for filtering, and relevance to threshold results.

+
4

Connect to Agent

Reference the KB in a CREATE AGENT statement β€” the agent reasons over it automatically.

+
+
+ +
+

# CREATE KNOWLEDGE_BASE

+
+
CREATE KNOWLEDGE_BASE
+
CREATE KNOWLEDGE_BASE [project_name.]kb_name
+USING
+  embedding_model  = { "provider": "...", "model_name": "...", "api_key": "..." },
+  reranking_model  = { "provider": "...", "model_name": "...", "api_key": "..." },
+  storage          = vector_db_conn.storage_table,
+  metadata_columns = ['col1', 'col2', ...],
+  content_columns  = ['col_a', 'col_b', ...],
+  id_column        = 'id_col';
+
+ +
SQL β€” full example
+
-- First connect PGVector as the vector store
+CREATE DATABASE my_pgvector
+WITH ENGINE = 'pgvector',
+PARAMETERS = {
+  "host": "127.0.0.1",
+  "port": 5432,
+  "database": "postgres",
+  "user": "user",
+  "password": "password",
+  "distance": "cosine"
+};
+
+-- Create the knowledge base
+CREATE KNOWLEDGE_BASE search_project.support_kb
+USING
+  embedding_model = {
+    "provider":   "openai",
+    "model_name": "text-embedding-3-large",
+    "api_key":    "sk-..."
+  },
+  reranking_model = {
+    "provider":   "openai",
+    "model_name": "gpt-4o",
+    "api_key":    "sk-...",
+    "method":     "multi-class"
+  },
+  storage          = my_pgvector.support_kb_store,
+  metadata_columns = ['product_name', 'priority', 'created_at'],
+  content_columns  = ['subject', 'body'],
+  id_column        = 'ticket_id';
+
+ +
+

# Parameters

+ +

embedding_model

+

Required. Converts text into vector representations for similarity search.

+
+ + + + + + + + + +
ProviderRequired paramsOptional params
openaimodel_name, api_keybase_url, api_version
openai_azuremodel_name, api_key, base_url, api_versionβ€”
googlemodel_name, api_keyβ€”
bedrockmodel_name, aws_access_key_id, aws_region_name, aws_secret_access_keyaws_session_token
snowflakemodel_name, api_key, account_idβ€”
ollamamodel_name, base_urlβ€”
+
πŸ’‘

Define default_embedding_model in config.json to avoid specifying it on every CREATE KNOWLEDGE_BASE.

+ +

reranking_model

+

Optional. Scores result chunks for relevance using an LLM. Supports same providers as embedding_model. Set to false to disable.

+
SQL β€” disable reranking
+
CREATE KNOWLEDGE_BASE my_kb
+USING
+  embedding_model  = { ... },
+  reranking_model  = false,
+  ...;
+ +
+ + + + + +
methodDescription
multi-class (default)Classifies each chunk into 4 relevance levels (0.25 / 0.5 / 0.75 / 1.0). Relevance = weighted sum of class probabilities.
binaryRelevant / not relevant. Uses log probability of the positive class.
+ +

storage

+

The vector database to store embeddings in. Connect it first with CREATE DATABASE.

+
βœ“

Recommended: PGVector β‰₯ 0.8.0 for best performance and hybrid search support.

+
πŸ’‘

MindsDB Docker Desktop Extension includes a built-in PGVector β€” storage is optional when using it.

+ +

metadata_columns

+

Array of column names used as metadata. Metadata enables fast pre-filtering before or alongside semantic search.

+
⚠

A column cannot be in both metadata_columns and content_columns.

+ +

content_columns

+

Array of column names whose text gets chunked and embedded. Multiple columns are concatenated. Defaults to a column named content if not specified.

+ +

id_column

+

Column that uniquely identifies each source row. Optional β€” defaults to the MD5 hash of content columns. Used for upsert logic when re-inserting data.

+
SQL β€” auto-generate ID when none exists
+
INSERT INTO my_kb (
+  SELECT ROW_NUMBER() OVER (ORDER BY created_at) AS id, *
+  FROM my_pg.raw_documents
+);
+
+ +
+

# ALTER KNOWLEDGE_BASE

+

Modify an existing KB configuration. The storage backend and embedding model type cannot be changed (would break existing embeddings), but you can rotate API keys, swap reranking models, and update column mappings.

+
+
ALTER KNOWLEDGE_BASE
+
ALTER KNOWLEDGE_BASE kb_name
+USING
+  param_name = value,
+  ...;
+
+ +
SQL β€” rotate API key
+
ALTER KNOWLEDGE_BASE support_kb
+USING
+  embedding_model = { "api_key": "sk-new-key" };
+ +
SQL β€” swap reranking model
+
ALTER KNOWLEDGE_BASE support_kb
+USING
+  reranking_model = {
+    "provider": "google",
+    "model_name": "gemini-2.0-flash",
+    "api_key": "AIza..."
+  };
+ +
SQL β€” update metadata columns
+
-- Add new fields while keeping existing ones filterable
+ALTER KNOWLEDGE_BASE support_kb
+USING
+  metadata_columns = ['product_name', 'priority', 'created_at', 'region'];
+ +
⚠

Changing metadata_columns doesn't remove old stored metadata. Only the columns listed in the most recent ALTER can be used in WHERE filters going forward.

+
+ +
+

# INSERT INTO Knowledge Base

+

Feed data into the KB. Rows are chunked, embedded, and stored in the vector database.

+
SQL β€” insert from table
+
INSERT INTO search_project.support_kb
+  SELECT ticket_id, subject, body, product_name, priority, created_at
+  FROM my_pg.support_tickets;
+ +
SQL β€” insert from view (enriched data)
+
INSERT INTO search_project.support_kb
+  SELECT * FROM search_project.enriched_tickets;
+ +
SQL β€” incremental insert (new rows only)
+
INSERT INTO search_project.support_kb
+  SELECT ticket_id, subject, body, product_name, priority, created_at
+  FROM my_pg.support_tickets
+  WHERE created_at > (
+    SELECT MAX(created_at) FROM search_project.support_kb
+  );
+ +

Performance options

+
SQL β€” skip duplicate check for bulk loads
+
-- Faster inserts when you know there are no duplicates
+INSERT INTO my_kb
+  SELECT * FROM my_pg.documents
+USING kb_no_upsert = true;
+ +
πŸ’‘

Track insert progress: SELECT * FROM information_schema.queries;

+
+ +
+

# Query Knowledge Base / Semantic Search

+

Query a knowledge base using the content pseudo-column for semantic search, metadata columns for structured filtering, and relevance for scoring.

+ +

Output columns

+
+ + + + + + + + + + +
ColumnDescription
idSource row identifier (from id_column)
chunk_idChunk identifier: <id>:<n>of<total>:<start>to<end>
chunk_contentThe actual text of this chunk
metadataJSON blob with all stored metadata fields
metadata colsIndividual metadata columns (e.g. product_name, priority)
distanceRaw vector distance (lower = more similar)
relevanceScore 0–1 from reranking model (or 1/(1+distance) if no reranker)
+ +

Semantic search

+
SQL β€” basic semantic search
+
SELECT *
+FROM search_project.support_kb
+WHERE content = 'how do I reset my password'
+LIMIT 5;
+ +
SQL β€” with relevance threshold
+
SELECT id, chunk_content, product_name, relevance
+FROM search_project.support_kb
+WHERE content = 'billing issue'
+  AND relevance >= 0.6
+LIMIT 10;
+ +

Metadata filtering

+
SQL β€” combine semantic + metadata filter
+
-- Only search within a specific product and priority
+SELECT id, chunk_content, relevance
+FROM search_project.support_kb
+WHERE content = 'cannot connect'
+  AND product_name = 'DataSync Pro'
+  AND priority <= 2
+  AND relevance >= 0.5;
+ +
SQL β€” metadata-only filter (no vector search)
+
SELECT *
+FROM search_project.support_kb
+WHERE product_name = 'DataSync Pro'
+  AND created_at BETWEEN '2024-01-01' AND '2024-12-31';
+ +

Supported filtering operators

+
+ + + + + + +
TypeOperators
Semantic (content col)= 'query', LIKE 'query', NOT LIKE, IN ('q1','q2'), NOT IN, OR, AND (intersection)
Metadata=, !=, <>, >, <, >=, <=, BETWEEN, LIKE, IN, NOT IN, AND, OR, NOT
Exclusionid != x, id NOT IN (SELECT id FROM kb WHERE content = '...')
+ +
πŸ’‘

Default LIMIT is 10. Default relevance threshold is β‰₯ 0 (no filtering). Specify both to control result count and quality independently.

+
+ +
+

# Hybrid Search

+

Hybrid search combines semantic similarity (vector embeddings) with exact keyword matching (BM25 full-text index). Use it when your queries include specific identifiers, acronyms, product codes, or technical terms that embeddings might miss.

+ +
⚠

Hybrid search requires PGVector as the knowledge base storage backend.

+ +
SQL β€” enable hybrid search (default alpha)
+
SELECT *
+FROM search_project.support_kb
+WHERE content = 'ACME-213 error'
+  AND hybrid_search = true;  -- alpha defaults to 0.5
+ +
SQL β€” tune the semantic/keyword balance
+
-- hybrid_search_alpha: 0 = pure keyword, 1 = pure semantic
+SELECT *
+FROM search_project.support_kb
+WHERE content = 'ticket ERR-4421'
+  AND hybrid_search_alpha = 0.3;  -- lean toward exact keyword match
+ +
SQL β€” disable reranker for hybrid search
+
-- Uses alpha-weighted average of BM25 + embedding scores instead
+SELECT *
+FROM search_project.support_kb
+WHERE content = 'ERR-4421'
+  AND hybrid_search_alpha = 0.2
+  AND reranking = false;
+ +

How it works

+

When you trigger hybrid search, both paths run in parallel:

+
+ + + + + +
PathMethodBest for
SemanticEmbedding vector similarityConceptual queries, natural language, paraphrases
KeywordBM25 full-text indexExact terms, product codes, acronyms, ticket IDs
+

Results from both paths are merged and reranked (via the KB's reranking model if available, or via alpha-weighted averaging if not).

+ +
βœ“

When to use hybrid search: any time users search for specific identifiers, technical terms, model numbers, or internal terminology alongside natural language queries.

+
+ +
+

# DESCRIBE KNOWLEDGE_BASE

+
SQL
+
DESCRIBE KNOWLEDGE_BASE support_kb;
+
+-- List all knowledge bases
+SHOW KNOWLEDGE_BASES;
+SHOW KNOWLEDGE_BASES WHERE project = 'search_project';
+ +

The DESCRIBE output includes: NAME, PROJECT, STORAGE, PARAMS (embedding/reranking config), INSERT_STARTED_AT, INSERT_FINISHED_AT, PROCESSED_ROWS, ERROR.

+
+ +
+

# DROP KNOWLEDGE_BASE

+
+
DROP KNOWLEDGE_BASERemoves the KB and all stored embeddings
+
DROP KNOWLEDGE_BASE [IF EXISTS] kb_name;
+DROP KNOWLEDGE_BASE [IF EXISTS] project_name.kb_name;
+
+
⚠

This permanently removes all embeddings and metadata from the vector store. Cannot be undone.

+ +
+
+ + +
+

Agents

+

An agent is the conversational interface over your data. It combines an LLM with access to knowledge bases and database tables, enabling natural language queries over structured and unstructured data alike. This is the final piece of the semantic search pipeline.

+ +

How agents work

+
+
1

Input Processing

Builds a real-time data catalog from 5-row samples of each connected object. Extracts the question and structures LLM input.

+
2

Planning

Determines which knowledge bases and tables are relevant. Prepares SQL queries as needed.

+
3

Exploration Loop

Executes queries, collects results, adjusts if needed. Up to 20 queries per request.

+
4

Synthesis

Aggregates results and synthesizes a natural language or structured response.

+
+ +
πŸ’‘

Performance tip: keep connected objects to ≀ 10. Create views to pre-aggregate and simplify data before connecting to the agent. The clearer your prompt_template, the more accurate the responses.

+
+ +
+

# CREATE AGENT

+
+
CREATE AGENT
+
CREATE AGENT [project_name.]agent_name
+USING
+  model = {
+    "provider":    "openai",
+    "model_name":  "gpt-4o",
+    "api_key":     "sk-...",
+    "base_url":    "https://...",   -- optional
+    "api_version": "2024-02-01"    -- optional, required for Azure
+  },
+  data = {
+    "knowledge_bases": ["project.kb_name", ...],
+    "tables":          ["datasource.table_name", ...]
+  },
+  prompt_template = 'describe your data here',
+  timeout = 30,
+  mode    = 'text';  -- 'text' or 'sql'
+
+ +
SQL β€” semantic search agent
+
CREATE AGENT search_project.support_agent
+USING
+  model = {
+    "provider":   "openai",
+    "model_name": "gpt-4o",
+    "api_key":    "sk-..."
+  },
+  data = {
+    "knowledge_bases": ["search_project.support_kb"],
+    "tables": [
+      "my_pg.products",
+      "my_pg.users"
+    ]
+  },
+  prompt_template = '
+    search_project.support_kb contains customer support tickets with fields:
+      - chunk_content: ticket text
+      - product_name:  product the ticket is about
+      - priority:      1 (critical) to 4 (low)
+      - created_at:    submission date
+    my_pg.products contains product catalog.
+    my_pg.users contains user account data.
+    Answer questions accurately and cite which tickets are relevant.
+  ',
+  timeout = 60;
+ +

Supported LLM providers

+
+ + + + + + + + + + +
ProviderNotable models
openaigpt-4o, gpt-4.1, gpt-4.1-mini, o3-mini, o4-mini
anthropicclaude-3-opus-20240229, claude-3-sonnet-20240229, claude-3-haiku-20240307
googlegemini-2.5-pro-preview-03-25, gemini-2.0-flash, gemini-1.5-pro
bedrockAll AWS Bedrock models (requires aws_region_name, aws_access_key_id, aws_secret_access_key)
ollamallama2, mistral, mixtral, codellama, gemma, phi, qwen, and more
nvidia_nimmeta/llama-3_1-70b-instruct, mistralai/mistral-large, and more
writerpalmyra-x5, palmyra-x4
+ +

Connect all objects from a source at once

+
SQL β€” wildcard data connection
+
CREATE AGENT my_agent
+USING
+  model = { ... },
+  data = {
+    "knowledge_bases": ["search_project.*"],  -- all KBs in project
+    "tables":          ["my_pg.*"]            -- all tables in datasource
+  },
+  prompt_template = '...';
+ +

Use default model from config

+
JSON β€” config.json
+
{
+  "default_llm": {
+    "provider":   "openai",
+    "model_name": "gpt-4o",
+    "api_key":    "sk-..."
+  }
+}
+

When default_llm is set, omit the model parameter from CREATE AGENT.

+
+ +
+

# SELECT FROM AGENT

+

Query the agent with a natural language question. The agent returns either a free-text answer or structured columns depending on how you write the SELECT.

+ +
SQL β€” natural language answer
+
SELECT answer
+FROM search_project.support_agent
+WHERE question = 'What are the most common issues with DataSync Pro?';
+ +
SQL β€” structured output
+
-- Agent formats its response to match the requested columns
+SELECT issue_type, ticket_count, example_ticket_id
+FROM search_project.support_agent
+WHERE question = 'What are the top 5 issue types for DataSync Pro this month?';
+ +
SQL β€” override params at query time
+
-- Test with a different model without changing the agent definition
+SELECT answer
+FROM search_project.support_agent
+WHERE question = 'Summarize open critical tickets'
+USING
+  model = {
+    "provider":   "anthropic",
+    "model_name": "claude-3-5-sonnet-20241022",
+    "api_key":    "sk-ant-..."
+  };
+
+ +
+

# ALTER AGENT

+
+
ALTER AGENTUpdate model, data, or prompt β€” any combination
+
ALTER AGENT agent_name
+USING
+  model           = { ... },
+  data            = { "knowledge_bases": [...], "tables": [...] },
+  prompt_template = '...';
+
+ +
SQL β€” add a new KB to an agent
+
ALTER AGENT search_project.support_agent
+USING
+  data = {
+    "knowledge_bases": [
+      "search_project.support_kb",
+      "search_project.docs_kb"     -- new KB added
+    ],
+    "tables": ["my_pg.products"]
+  };
+ +
SQL β€” upgrade the model
+
ALTER AGENT search_project.support_agent
+USING
+  model = {
+    "provider":   "openai",
+    "model_name": "gpt-4.1",
+    "api_key":    "sk-..."
+  };
+
+ +
+

# DROP / SHOW AGENTS

+
SQL
+
-- Drop an agent
+DROP AGENT agent_name;
+DROP AGENT project_name.agent_name;
+
+-- List agents
+SHOW AGENTS;
+SHOW AGENTS WHERE project = 'search_project';
+SHOW AGENTS WHERE name = 'support_agent';
+
+
+ + +
+

Jobs

+

Jobs schedule any SQL statement (or sequence of statements) to run automatically β€” once at a future time, or repeatedly on an interval. Use jobs to keep your knowledge bases up to date as new data arrives.

+ +

# CREATE JOB

+
+
CREATE JOB
+
CREATE JOB [IF NOT EXISTS] [project_name.]job_name [AS] (
+  <statement_1>[; <statement_2>][; ...]
+)
+[START <date>]
+[END   <date>]
+[EVERY [number] <period>]
+[IF (<condition_statement>)];
+
+ +
+ + + + + + + +
ClauseDescription
START <date>When to begin. Defaults to now if omitted.
END <date>When to stop recurring. Omit for indefinite repetition.
EVERY [n] <period>Repetition frequency. Omit to run once. Period values: minute, hour, day, week, month.
IF (...)Only execute if the condition query returns rows.
+ +

Date format: 'YYYY-MM-DD HH:MM:SS' or 'YYYY-MM-DD'. Timezone: UTC.

+ +
SQL β€” refresh KB every hour
+
CREATE JOB search_project.refresh_support_kb (
+  INSERT INTO search_project.support_kb
+    SELECT ticket_id, subject, body, product_name, priority, created_at
+    FROM my_pg.support_tickets
+    WHERE created_at > LAST
+)
+EVERY hour;
+ +
SQL β€” conditional job (only if new data exists)
+
CREATE JOB search_project.conditional_kb_refresh (
+  INSERT INTO search_project.support_kb
+    SELECT ticket_id, subject, body, product_name, priority, created_at
+    FROM my_pg.support_tickets
+    WHERE created_at > LAST
+)
+EVERY 30 minutes
+IF (
+  SELECT * FROM my_pg.support_tickets
+  WHERE created_at > LAST
+);
+ +
SQL β€” multi-step job with dynamic table name
+
CREATE JOB search_project.daily_snapshot (
+  CREATE TABLE my_pg.`kb_snapshot_{{START_DATE}}` (
+    SELECT id, chunk_content, product_name, relevance
+    FROM search_project.support_kb
+    WHERE content = 'critical errors'
+      AND relevance >= 0.7
+  )
+)
+EVERY day;
+

Available template variables: {{START_DATETIME}}, {{START_DATE}}, {{PREVIOUS_START_DATETIME}}.

+
+ +
+

# LAST keyword

+

LAST stores the maximum value seen in the previous run. Use it to process only new rows on each execution β€” turning any data source into a stream.

+ +
SQL β€” basic LAST usage
+
-- First run: returns nothing (no prior state)
+-- Second run: returns rows inserted since the first run
+SELECT id, body
+FROM my_pg.support_tickets
+WHERE id > LAST;
+ +
SQL β€” LAST with seed value for first run
+
-- First run uses 1000 as the seed; subsequent runs use LAST
+SELECT id, body
+FROM my_pg.support_tickets
+WHERE id > COALESCE(LAST, 1000);
+ +
πŸ’‘

To reset the LAST context in the editor: SET context = 0; or SET context = null;

+
+ +
+

# DROP JOB

+
+
DROP JOB
+
DROP JOB [IF EXISTS] [project_name.]job_name;
+
+
+ +
+

# SHOW / Query JOBS

+
SQL
+
-- All jobs
+SHOW JOBS;
+
+-- Jobs in a project
+SHOW JOBS WHERE project = 'search_project';
+
+-- Full details
+SELECT * FROM search_project.jobs;
+SELECT * FROM information_schema.jobs;
+
+-- Execution history (includes errors)
+SELECT * FROM log.jobs_history
+WHERE project = 'search_project'
+ORDER BY run_start DESC
+LIMIT 20;
+ +

The jobs table has columns: NAME, PROJECT, RUN_START, RUN_END, NEXT_RUN_AT, SCHEDULE_STR, QUERY. The history table adds ERROR.

+
+
+ + +
+

Functions

+ +

# Standard Functions

+

MindsDB runs queries on DuckDB internally β€” all DuckDB functions are available. MySQL-style functions are adapted automatically.

+ +

DuckDB function categories

+
+ + + + + + + + + + +
CategoryExamples
AggregateSUM, AVG, COUNT, MIN, MAX, LAST, LIST
TextLOWER, UPPER, TRIM, REPLACE, REGEXP_MATCHES, STRING_SPLIT
Date / TimeNOW, DATE_TRUNC, DATE_DIFF, STRFTIME, EPOCH
NumericROUND, FLOOR, CEIL, ABS, RANDOM, POW
JSONJSON_EXTRACT, JSON_OBJECT, JSON_ARRAY
WindowROW_NUMBER, RANK, LAG, LEAD, FIRST_VALUE
List / ArrayLIST_AGG, UNNEST, ARRAY_LENGTH, LIST_DISTINCT
+ +

MySQL-compatible functions

+
+ + + + + + + + + +
FunctionDescription
CHAR(), FORMAT(), INSTR(), LOCATE()String manipulation
LENGTH(), SUBSTRING_INDEX(), UNHEX()String utilities
ADDDATE(), DATE_ADD(), DATE_SUB(), DATEDIFF()Date arithmetic
DATE_FORMAT(), FROM_UNIXTIME(), CURDATE(), CURTIME()Date formatting
TIMESTAMPDIFF(), CONVERT_TZ()Timezone & diff
REGEXP_SUBSTR(), SHA2()Regex & crypto
+
+ +
+

# Variables

+

Store API keys and reusable values as session variables. Reference them in CREATE KNOWLEDGE_BASE, CREATE AGENT, and other statements.

+ +
SQL β€” set variables
+
-- From an environment variable (must start with MDB_)
+SET @openai_key = from_env('MDB_OPENAI_API_KEY');
+
+-- Directly
+SET @pgvector_host = '127.0.0.1';
+
+-- Use in CREATE statements
+CREATE KNOWLEDGE_BASE my_kb
+USING
+  embedding_model = {
+    "provider":   "openai",
+    "model_name": "text-embedding-3-large",
+    "api_key":    @openai_key
+  },
+  ...;
+
+ +
+

# FROM_ENV()

+

Pull values directly from environment variables into SQL statements. Only variables prefixed with MDB_ can be accessed.

+ +
SQL
+
-- Inline in a CREATE statement
+CREATE KNOWLEDGE_BASE my_kb
+USING
+  embedding_model = {
+    "provider":   "openai",
+    "model_name": "text-embedding-3-large",
+    "api_key":    from_env('MDB_OPENAI_API_KEY')
+  },
+  ...;
+
+CREATE AGENT my_agent
+USING
+  model = {
+    "provider":   "openai",
+    "model_name": "gpt-4o",
+    "api_key":    from_env('MDB_OPENAI_API_KEY')
+  },
+  ...;
+ +
⚠

Only environment variables with names starting with MDB_ are accessible via from_env().

+
+ +

MindsDB SQL Reference Β· Built with the Anton dark design system

+
+ +
+
+ + + + + + + + + diff --git a/docs/integrations/ai-engines/neuralforecast.mdx b/docs/integrations/ai-engines/neuralforecast.mdx deleted file mode 100644 index 619578e8552..00000000000 --- a/docs/integrations/ai-engines/neuralforecast.mdx +++ /dev/null @@ -1,200 +0,0 @@ ---- -title: Nixtla's NeuralForecast Integration with MindsDB -sidebarTitle: NeuralForecast ---- - -Nixtla’s NeuralForecast provides a diverse array of neural forecasting models, prioritizing their ease of use and resilience. These models encompass a spectrum of options, including traditional networks like MLP and RNNs, as well as cutting-edge innovations such as NBEATS, NHITS, TFT, and various other architectural approaches. - -You can learn more about its features [here](https://nixtla.github.io/neuralforecast/). - -## How to bring NeuralForecast Models to MindsDB - -Before creating a model, you will need to create an ML engine for NeuralForecast using the `CREATE ML_ENGINE` statement: - -```sql -CREATE ML_ENGINE neuralforecast -FROM neuralforecast; -``` - -Once the ML engine is created, we use the `CREATE MODEL` statement to create the NeuralForecast model in MindsDB. - -```sql -CREATE MODEL model_name -FROM data_source - (SELECT * FROM table_name) -PREDICT column_to_be_predicted -GROUP BY column_name, column_name, ... -ORDER BY date_column -WINDOW 12 -- model looks back at sets of 12 rows each -HORIZON 3 -- model forecasts the next 3 rows -USING - engine = 'neuralforecast' - frequency = 'Q', - train_time = 0.01, - exogenous_vars = ['var_1', 'var_2']; -``` - -To ensure that the model is created based on the NeuralForecast engine, include the `USING` clause at the end. - -The `frequency` parameter informs the model about the expected time difference between each measurement ([supported values here](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases)). And the `train_time` parameter defines the training time - it defaults to 1, and lower values will reduce training time linearly by reducing the number of searches allowed for the best configuration by AutoNHITS. You can also define `exogenous_vars` as a parameter in the `USING` clause - these are complementary variables in the table that may improve forecast accuracy. - -## Example - -Let's go through an example of how to use Nixtla's NeuralForecast with MindsDB to forecast monthly expenditures based on historical data. - -Please note that before using the NeuralForecast engine, you should create it from the MindsDB editor, or other clients through which you interact with MindsDB, with the below command: - -```sql -CREATE ML_ENGINE neuralforecast -FROM neuralforecast; -``` - -You can check the available engines with this command: - -```sql -SHOW ML_ENGINES; -``` - -If you see the NeuralForecast engine on the list, you are ready to follow the tutorials. - -We use a table from our MySQL public demo database, so let’s start by connecting MindsDB to it: - -```sql -CREATE DATABASE mysql_demo_db -WITH ENGINE = 'mysql', -PARAMETERS = { - "user": "user", - "password": "MindsDBUser123!", - "host": "samples.mindsdb.com", - "port": "3306", - "database": "public" -}; -``` - -Now that we’ve connected our database to MindsDB, let’s query the data to be used in the example: - -```sql -SELECT * -FROM mysql_demo_db.historical_expenditures -LIMIT 3; -``` - -Here is the output: - -```sql -+------------+----------+-------------+ -| month | category | expenditure | -+------------+----------+-------------+ -| 1982-04-01 | clothing | 359.9 | -| 1982-05-01 | clothing | 386.6 | -| 1982-06-01 | clothing | 350.5 | -+------------+----------+-------------+ -``` - -The `historical_expenditures` table stores monthly expenditure data for various categories, such as `food`, `clothing`, `industry`, and more. - -Let's create a model table to predict the expenditures: - -```sql -CREATE MODEL quarterly_expenditure_forecaster -FROM mysql_demo_db - (SELECT * FROM historical_expenditures) -PREDICT expenditure -GROUP BY category -ORDER BY month -WINDOW 12 -HORIZON 3 -USING ENGINE = 'neuralforecast'; -``` - -The `CREATE MODEL` statement creates, trains, and deploys the model. Here, we predict the `expenditure` column values. As it is a time series model, we order the data by the `month` column. Additionally, we group data by the `category` column - the predictions are made for each group independently (here, for each category). - -Next, we define the `WINDOW` and `HORIZON` clauses. The `WINDOW` clause specifies the number of rows we look back at (here, we look back at sets of 12 rows). And the `HORIZON` clause defines for how many rows the predictions are made (here, for the next 3 rows). - - -Please visit our docs on the [`CREATE MODEL`](/sql/create/model) statement to learn more. - - -The `ENGINE` parameter in the `USING` clause specifies the ML engine used to make predictions. - -We can check the training status with the following query: - -```sql -DESCRIBE quarterly_expenditure_forecaster; -``` - -Once the model status is `complete`, the behavior is the same as with any other AI table – you can query for batch predictions by joining it with a data table: - -```sql -SELECT m.month as month, m.expenditure as forecasted -FROM mindsdb.quarterly_expenditure_forecaster as m -JOIN mysql_demo_db.historical_expenditures as t -WHERE t.month > LATEST -AND t.category = 'clothing'; -``` - -Here is the output data: - -```sql -+----------------------------+------------------+ -| month | forecasted | -+----------------------------+------------------+ -| 2017-10-01 00:00:00.000000 | 10802.2109375 | -| 2017-11-01 00:00:00.000000 | 10749.2041015625 | -| 2017-12-01 00:00:00.000000 | 12423.849609375 | -+----------------------------+------------------+ -``` - -The `historical_expenditures` table is used to make batch predictions. Upon joining the `quarterly_expenditure_forecaster` model with the `historical_expenditures` table, we get predictions for the next quarter as defined by the `HORIZON 3` clause. - -Please note that the output `month` column contains both the date and timestamp. This format is used by default, as the timestamp is required when dealing with the hourly frequency of data. - -MindsDB provides the `LATEST` keyword that marks the latest training data point. In the `WHERE` clause, we specify the `month > LATEST` condition to ensure the predictions are made for data after the latest training data point. - -Let’s consider our `quarterly_expenditure_forecaster` model. We train the model using data until the third quarter of 2017, and the predictions come for the fourth quarter of 2017 (as defined by `HORIZON 3`). - -## NeuralForecast + HierarchicalForecast - -The NeuralForecast handler also supports hierarchical reconciliation via Nixtla’s [HierarchicalForecast package](https://nixtla.github.io/hierarchicalforecast/). Hierarchical reconciliation may improve prediction accuracy when the data has a hierarchical structure. - -In this example, there may be a hierarchy as total expenditure is comprised of 7 different categories. - -```sql -SELECT DISTINCT category -FROM mysql_demo_db.historical_expenditures; -``` - -Here are the available categories: - -```sql -+-------------------+ -| category | -+-------------------+ -| food | -| household_goods | -| clothing | -| department_stores | -| other | -| cafes | -| industry | -+-------------------+ -``` - -Spending in each category may be related over time. For example, if spending on `food` rises in October 2017, it may be more likely that spending on `cafes` also rises in October 2017. Hierarchical reconciliation can account for this shared information. - -Here is how we can create a model: - -```sql -CREATE MODEL hierarchical_expenditure_forecaster -FROM mysql_demo_db - (SELECT * FROM historical_expenditures) -PREDICT expenditure -GROUP BY category -ORDER BY month -HORIZON 3 -USING - ENGINE = 'neuralforecast', - HIERARCHY = [β€˜category’]; -``` - -Predictions with this model account for the hierarchical structure. The output may differ from the default model, which does not assume any hierarchy. diff --git a/docs/integrations/ai-engines/statsforecast.mdx b/docs/integrations/ai-engines/statsforecast.mdx deleted file mode 100644 index 483b2c9c327..00000000000 --- a/docs/integrations/ai-engines/statsforecast.mdx +++ /dev/null @@ -1,359 +0,0 @@ ---- -title: Nixtla's StatsForecast Integration with MindsDB -sidebarTitle: StatsForecast ---- - -Nixtla’s StatsForecast integration offers univariate time series forecasting models. StatsForecast uses classical methods such as ARIMA, rather than deep learning. Models train very quickly and generalize well, so are unlikely to overfit. Models also perform well on short time series, where deep learning models may be more likely to overfit. - -You can learn more about its features [here](https://nixtla.github.io/statsforecast/). - -## How to bring StatsForecast Models to MindsDB - -Before creating a model, you will need to create an ML engine for StatsForecast using the `CREATE ML_ENGINE` statement: - -```sql -CREATE ML_ENGINE statsforecast -FROM statsforecast; -``` - -Once the ML engine is created, we use the `CREATE MODEL` statement to create the StatsForecast model in MindsDB. - -```sql -CREATE MODEL model_name -FROM data_source - (SELECT * FROM table_name) -PREDICT column_to_be_predicted -GROUP BY column_name, column_name, ... -ORDER BY date_column -WINDOW 12 -- model looks back at sets of 12 rows each -HORIZON 3 -- model forecasts the next 3 rows -USING - engine = 'statsforecast', - model_name = 'model', - frequency = 'X', - season_length = 1, - hierarchy = ['column']; -``` - -The following parameters can be used while creating the StatsForecast model: - -- `model_name` is an optional parameter that lets users specify one of the models from [this list](https://github.com/Nixtla/statsforecast?tab=readme-ov-file#models), which otherwise is chosen automatically. -- `frequency` is an optional parameter that defines the frequency of data such as daily, weekly, monthly, etc. Available values include "H", "M", "MS", "Q", "SM", "BM", "BMS", "BQ", "BH". -- `season_length` is an optional parameter that defines the length of the season depending on frequency. For instance, `season_length` defaults to `12` if `frequency` is set to `M` (months). -- `hierarchy` is an optional parameter that may improve prediction accuracy when the data has a hierarchical structure. [See more here](/integrations/ai-engines/statsforecast#statsforecast-hierarchicalforecast). - -To ensure that the model is created based on the StatsForecast engine, include the `USING` clause at the end. - -## Example - -Let's go through an example of how to use Nixtla's StatsForecast with MindsDB to forecast monthly expenditures. - -Please note that before using the StatsForecast engine, you should create it from the MindsDB editor, or other clients through which you interact with MindsDB, with the below command: - -```sql -CREATE ML_ENGINE statsforecast -FROM statsforecast; -``` - -You can check the available engines with this command: - -```sql -SHOW ML_ENGINES; -``` - -If you see the StatsForecast engine on the list, you are ready to follow the tutorials. - -### Tutorial using SQL - -In this tutorial, we create a model to predict expenditures based on historical data using the StatsForecast engine. - -We use a table from our MySQL public demo database, so let’s start by connecting MindsDB to it: - -```sql -CREATE DATABASE mysql_demo_db -WITH ENGINE = 'mysql', -PARAMETERS = { - "user": "user", - "password": "MindsDBUser123!", - "host": "samples.mindsdb.com", - "port": "3306", - "database": "public" -}; -``` - -Now that we’ve connected our database to MindsDB, let’s query the data to be used in the example: - -```sql -SELECT * -FROM mysql_demo_db.historical_expenditures -LIMIT 3; -``` - -Here is the output: - -```sql -+------------+----------+-------------+ -| month | category | expenditure | -+------------+----------+-------------+ -| 1982-04-01 | food | 1162.6 | -| 1982-05-01 | food | 1150.9 | -| 1982-06-01 | food | 1160 | -+------------+----------+-------------+ -``` - -The `historical_expenditures` table stores monthly expenditure data for various categories, such as `food`, `clothing`, `industry`, and more. - -Let's create a model table to predict the expenditures: - -```sql -CREATE MODEL quarterly_expenditure_forecaster -FROM mysql_demo_db - (SELECT * FROM historical_expenditures) -PREDICT expenditure -GROUP BY category -ORDER BY month -HORIZON 3 -USING ENGINE = 'statsforecast'; -``` - - -Please visit our docs on the [`CREATE MODEL`](/sql/create/model) statement to learn more. - - -Please note that the `WINDOW` clause is not required because StatsForecast automatically calculates the best window as part of hyperparameter tuning. - -The `ENGINE` parameter in the `USING` clause specifies the ML engine used to make predictions. - -We can check the training status with the following query: - -```sql -DESCRIBE quarterly_expenditure_forecaster; -``` - -One of the pros of using the StatsForecast engine is that it is fast - it doesn’t take long until the model completes the training process. - -Once the model status is `complete`, the behavior is the same as with any other AI table – you can query for batch predictions by joining it with a data table: - -```sql -SELECT m.month as month, m.expenditure as forecasted -FROM mindsdb.quarterly_expenditure_forecaster as m -JOIN mysql_demo_db.historical_expenditures as t -WHERE t.month > LATEST -AND t.category = 'food'; -``` - -Here is the output data: - -```sql -+----------------------------+-----------------+ -| month | forecasted | -+----------------------------+-----------------+ -| 2017-10-01 00:00:00.000000 | 10256.251953125 | -| 2017-11-01 00:00:00.000000 | 10182.58984375 | -| 2017-12-01 00:00:00.000000 | 10316.259765625 | -+----------------------------+-----------------+ -``` - -The `historical_expenditures` table is used to make batch predictions. Upon joining the `quarterly_expenditure_forecaster` model with the `historical_expenditures` table, we get predictions for the next quarter as defined by the `HORIZON 3` clause. - -Please note that the output `month` column contains both the date and timestamp. This format is used by default, as the timestamp is required when dealing with the hourly frequency of data. - -MindsDB provides the `LATEST` keyword that marks the latest training data point. In the `WHERE` clause, we specify the `month > LATEST` condition to ensure the predictions are made for data after the latest training data point. - -Let’s consider our `quarterly_expenditure_forecaster` model. We train the model using data until the third quarter of 2017, and the predictions come for the fourth quarter of 2017 (as defined by `HORIZON 3`). - -### Tutorial using MQL - -In this tutorial, we create a model to predict expenditures based on historical data using the StatsForecast engine. - -Before we start, visit our docs to learn how to connect [Mongo Compass](https://docs.mindsdb.com/connect/mongo-compass) and [Mongo Shell](https://docs.mindsdb.com/connect/mongo-shell) to MindsDB. - -We use a collection from our Mongo public demo database, so let’s start by connecting MindsDB to it from Mongo Compass or Mongo Shell: - -```bash -> use mindsdb -> db.databases.insertOne({ - 'name': 'mongo_demo_db', - 'engine': 'mongodb', - 'connection_args': { - "host": "mongodb+srv://user:MindsDBUser123!@demo-data-mdb.trzfwvb.mongodb.net/", - "database": "public" - } - }) -``` - -Now that we’ve connected our database to MindsDB, let’s query the data to be used in the example. - -```bash -> use mongo_demo_db -> db.historical_expenditures.find({}).limit(3) -``` - -Here is the output: - -```bash -{ - _id: '63fd2388bee7187f230f56fc', - month: '1982-04-01', - category: 'food', - expenditure: '1162.6' -} -{ - _id: '63fd2388bee7187f230f56fd', - month: '1982-05-01', - category: 'food', - expenditure: '1150.9' -} -{ - _id: '63fd2388bee7187f230f56fe', - month: '1982-06-01', - category: 'food', - expenditure: '1160' -} -``` - -The `historical_expenditures` collection stores monthly expenditure data for various categories, such as `food`, `clothing`, `industry`, and more. - -Let's create a model to predict the expenditures: - -```bash -> use mindsdb -> db.predictors.insertOne({ - name: 'quarterly_expenditure_forecaster', - predict: 'expenditure', - connection: 'mongo_demo_db', - select_data_query: 'db.historical_expenditures.find({})', - training_options: { - timeseries_settings: { - order_by: ['month'], - group_by: ['category'], - horizon: 3 - }, - engine: 'statsforecast' - } - }) -``` - - -Please visit our docs on the [`insertOne`](/mongo/insert) statement to learn more. - - -Please note that the `window` clause is not required because StatsForecast automatically calculates the best window as part of hyperparameter tuning. - -The `engine` parameter in the `training_options` clause specifies the ML engine used to make predictions. - -We can check the training status with the following query: - -```bash -> db.models.find({ - name: 'quarterly_expenditure_forecaster' - }) -``` - -One of the pros of using the StatsForecast engine is that it is fast - it doesn’t take long until the model completes the training process. - -Once the model status is `complete`, the behavior is the same as with any other AI collection – you can query for batch predictions by joining it with a data collection: - -```bash -> db.quarterly_expenditure_forecaster.find({ - "collection": "mongo_pred_01.historical_expenditures", - "query": {"category": "food"} - }).limit(3) -``` - -By default the forecasts are made for `month > LATEST`. - -Here is the output data: - -```bash -{ - _id: '63fd2388bee7187f230f58a5', - month: 2017-10-01T00:00:00.000Z, - category: 'food', - expenditure: 10256.251953125 -} -{ - _id: '63fd2388bee7187f230f58a4', - month: 2017-11-01T00:00:00.000Z, - category: 'food', - expenditure: 10182.58984375 -} -{ - _id: '63fd2388bee7187f230f58a3', - month: 2017-12-01T00:00:00.000Z, - category: 'food', - expenditure: 10316.259765625 -} -``` - -The `historical_expenditures` collection is used to make batch predictions. Upon joining the `quarterly_expenditure_forecaster` model with the `historical_expenditures` collection, we get predictions for the next quarter as defined by the `horizon: 3` clause. - -Please note that the output `month` column contains both the date and timestamp. This format is used by default, as the timestamp is required when dealing with the hourly frequency of data. - -MindsDB provides the `latest` keyword that marks the latest training data point. In the `where` clause, we specify the `month > latest` condition to ensure the predictions are made for data after the latest training data point. - -Let’s consider our `quarterly_expenditure_forecaster` model. We train the model using data until the third quarter of 2017, and the predictions come for the fourth quarter of 2017 (as defined by `horizon: 3`). - -## StatsForecast + HierarchicalForecast - -The StatsForecast handler also supports hierarchical reconciliation via Nixtla’s [HierarchicalForecast package](https://nixtla.github.io/hierarchicalforecast/). Hierarchical reconciliation may improve prediction accuracy when the data has a hierarchical structure. - -In this example, there may be a hierarchy as total expenditure is comprised of 7 different categories. - -```sql -SELECT DISTINCT category -FROM mysql_demo_db.historical_expenditures; -``` - -Here are the available categories: - -```sql -+-------------------+ -| category | -+-------------------+ -| food | -| household_goods | -| clothing | -| department_stores | -| other | -| cafes | -| industry | -+-------------------+ -``` - -Spending in each category may be related over time. For example, if spending on `food` rises in October 2017, it may be more likely that spending on `cafes` also rises in October 2017. Hierarchical reconciliation can account for this shared information. - -Here is how we can create a model: - -```sql -CREATE MODEL hierarchical_expenditure_forecaster -FROM mysql_demo_db - (SELECT * FROM historical_expenditures) -PREDICT expenditure -GROUP BY category -ORDER BY month -HORIZON 3 -USING - ENGINE = 'statsforecast', - HIERARCHY = [β€˜category’]; -``` - -The `CREATE MODEL` statement creates, trains, and deploys the model. Here, we predict the `expenditure` column values. As it is a time series model, we order the data by the `month` column. Additionally, we group data by the `category` column - the predictions are made for each group independently (here, for each category). The `HORIZON` clause defines for how many rows the predictions are made (here, for the next 3 rows). - -You can use the `DESCRIBE [MODEL]` command to check for details: - -```sql -DESCRIBE hierarchical_expenditure_forecaster.model; -``` - -On execution, we get: - -```sql -+------------+-----------+---------------+--------------+ -| model_name | frequency | season_length | hierarchy | -+------------+-----------+---------------+--------------+ -| AutoARIMA | MS | 1 | ["category"] | -+------------+-----------+---------------+--------------+ -``` - -Predictions with this model account for the hierarchical structure. The output may differ from the default model, which does not assume any hierarchy. diff --git a/docs/integrations/ai-engines/timegpt.mdx b/docs/integrations/ai-engines/timegpt.mdx deleted file mode 100644 index 4a0471066fa..00000000000 --- a/docs/integrations/ai-engines/timegpt.mdx +++ /dev/null @@ -1,120 +0,0 @@ ---- -title: Nixtla's TimeGPT Integration with MindsDB -sidebarTitle: TimeGPT ---- - -TimeGPT by Nixtla is a generative pre-trained model specifically designed for predicting time series data. TimeGPT takes time series data as input and produces forecasted outputs. TimeGPT can be effectively employed in various applications, including demand forecasting, anomaly detection, financial prediction, and more. - -You can learn more about its features [here](https://nixtla.github.io/nixtla/). - -## How to bring TimeGPT Models to MindsDB - -Before creating a model, you will need to create an ML engine for TimeGPT using the `CREATE ML_ENGINE` statement and providing the TimeGPT API key: - -```sql -CREATE ML_ENGINE timegpt -FROM timegpt -USING - timegpt_api_key = 'timegpt_api_key'; -``` - -Once the ML engine is created, we use the `CREATE MODEL` statement to create the TimeGPT model in MindsDB. - -```sql -CREATE MODEL model_name -FROM data_source - (SELECT * FROM table_name) -PREDICT column_to_be_predicted -GROUP BY column_name, column_name, ... -ORDER BY date_column -HORIZON 3 -- model forecasts the next 3 rows -USING ENGINE = 'timegpt'; -``` - -To ensure that the model is created based on the TimeGPT engine, include the `USING` clause at the end, which defines the `engine` and lists all parameters used with time-series models, including `GROUP BY`, `ORDER BY`, `HORIZON`. - -What's different about the TimeGPT engine is that it does not expose the `WINDOW` parameter in its API, so as a user you need to send a payload with at least N rows, where N depends on the model and the frequency of the series. This is automatically handled by MindsDB in the [TimeGPT handler code](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/timegpt_handler). - -## Example - -Nixtla's TimeGPT model can be used to obtain real-time forecasts of the trading data from Binance. - - -Follow [this link](https://www.youtube.com/watch?v=8LfpFocdyEo&list=PLq3sJIV6w5BoHJ9gFSedwtb_pqk--4K89&index=3) to watch a video on integrating TimeGPT model with Binance data. - - -First, connect to Binance from MindsDB executing this command: - -```sql -CREATE DATABASE my_binance -WITH ENGINE = 'binance'; -``` - -Please note that before using the TimeGPT engine, you should create it from the MindsDB editor, or other clients through which you interact with MindsDB, with the below command: - -```sql -CREATE ML_ENGINE timegpt -FROM timegpt -USING - timegpt_api_key = 'timegpt_api_key'; -``` - -You can check the available engines with this command: - -```sql -SHOW ML_ENGINES; -``` - -If you see the TimeGPT engine on the list, you are ready to follow the tutorials. - -Now let's create a TimeGPT model and train it with data from Binance. - -```sql -CREATE MODEL cryptocurrency_forecast_model -FROM my_binance - ( - SELECT * - FROM aggregated_trade_data - WHERE symbol = 'BTCUSDT' - ) -PREDICT open_price -ORDER BY open_time -HORIZON 10 -USING ENGINE = 'timegpt'; -``` - -Use the `CREATE MODEL` statement to create, train, and deploy a model. The `FROM` clause defines the training data used to train the model - here, the latest Binance data is used. The `PREDICT` clause specifies the column to be predicted - here, the open price of the BTC/USDT trading pair is to be forecasted. - -As it is a time-series model, you should order the data by a date column - here, it is the open time when the open price takes effect. Finally, the `HORIZON` clause defines how many rows into the future the model will forecast - here, it forecasts the next 10 rows (the next 10 minutes, as the interval between Binance data rows is one minute). - - -Please note that the TimeGPT engine is sensitive to inconsistent intervals between data rows. Please check your data for missing, duplicated or irregular timestamps to mitigate errors that may arise if the intervals between data rows are inconsistent. - -In this example, the intervals between Binance data rows are consistently equal to one minute. - - -Before proceeding, make sure that the model status reads `complete`. - -```sql -DESCRIBE cryptocurrency_forecast_model; -``` - -To make forecasts, you must save the Binance data into a view: - -```sql -CREATE VIEW btcusdt_recent AS ( - SELECT * - FROM my_binance.aggregated_trade_data - WHERE symbol = 'BTCUSDT' -); -``` - -This view is going to be joined with the model to get forecasts: - -```sql -SELECT m.open_time , - m.open_price -FROM btcusdt_recent AS d -JOIN cryptocurrency_forecast_model AS m -WHERE d.open_time > LATEST; -``` diff --git a/docs/integrations/app-integrations/jira.mdx b/docs/integrations/app-integrations/jira.mdx deleted file mode 100644 index f4a3e8a4a42..00000000000 --- a/docs/integrations/app-integrations/jira.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: Jira -sidebarTitle: Jira ---- - -This documentation describes the integration of MindsDB with [Jira](https://www.atlassian.com/software/jira/guides/getting-started/introduction), the #1 agile project management tool used by teams to plan, track, release and support world-class software with confidence. -The integration allows MindsDB to access data from Jira and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect Jira to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to Jira from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/jira_handler) as an engine. - -```sql -CREATE DATABASE jira_datasource -WITH - ENGINE = 'jira', - PARAMETERS = { - "url": "https://example.atlassian.net", - "username": "john.doe@example.com", - "api_token": "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" - }; -``` - -Required connection parameters include the following: - -* `url`: The base URL for your Jira instance/server. -* `username`: The email address associated with your Jira account. -* `api_token`: The API token generated for your Jira account. -* `cloud`: (Optional) Set to `true` for Jira Cloud or `false` for Jira Server. Defaults to `true`. - - -Refer this [guide](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/) for instructions on how to create API tokens for your account. - - -## Usage - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM jira_datasource.table_name -LIMIT 10; -``` - - -The above example utilize `jira_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - \ No newline at end of file diff --git a/docs/model-context-protocol/usage.mdx b/docs/model-context-protocol/usage.mdx deleted file mode 100644 index 5f18ac91937..00000000000 --- a/docs/model-context-protocol/usage.mdx +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: MindsDB's MCP Server Usage and Tools -sidebarTitle: Usage ---- - -**MindsDB** is an MCP server that enables your MCP applications to answer questions over large-scale federated data spanning databases, data warehouses, and SaaS applications. - -## Start MindsDB as an MCP Server - -Follow the steps below to use MindsDB as an MCP server. - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). - -2. [Connect your data source](/mindsdb_sql/sql/create/database) and/or [upload files](/mindsdb_sql/sql/create/file) to MindsDB in order to ask questions over your data. - - - You can use our sample dataset that stores the sales manager data. - - ```sql - CREATE DATABASE sales_manager_data - WITH ENGINE = "postgres", - PARAMETERS = { - "user": "demo_user", - "password": "demo_password", - "host": "samples.mindsdb.com", - "port": "5432", - "database": "sales_manager_data" - }; - ``` - - -3. Start MindsDB MCP server, either with or without authentication. - - * Start MindsDB MCP server without authentication to connect it to [Cursor](/mcp/cursor_usage). - - ```bash - docker run --name mindsdb_container -p 47334:47334 -p 47335:47335 mindsdb/mindsdb - ``` - - * Start MindsDB MCP server with authentication to connect it to [OpenAI](/mcp/openai) or [Anthropic](/mcp/anthropic). - - ```bash - docker run --name mindsdb_container -p 47334:47334 -p 47335:47335 -e MINDSDB_USERNAME=admin -e MINDSDB_PASSWORD=password123 mindsdb/mindsdb - ``` - - Then get an auth token from MindsDB: - ```bash - curl -X POST -d '{"username":"admin","password":"password123"}' -H "Content-Type: application/json" http://localhost:47334/api/login - ``` - This will return a token that you can use in your MCP client. - -4. To confirm the MindsDB MCP server is running use `http://127.0.0.1:47334/mcp/status`. A successful response means your MCP environment is ready. - - -## MCP Tools - -MindsDB MCP API exposes a set of tools that enable users to interact with their data and extract valuable insights. - -**1. List Databases** - -The `list_databases` tool lists all data sources connected to MindsDB. - -**2. Query** - -The `query` tool executes queries on the federated data to extract data relevant to answering a given question. diff --git a/docs/og-image.png b/docs/og-image.png new file mode 100644 index 00000000000..0fa78280b10 Binary files /dev/null and b/docs/og-image.png differ diff --git a/docs/use-cases/in-database_ml/mindsdb-superset-snowflake.mdx b/docs/use-cases/in-database_ml/mindsdb-superset-snowflake.mdx deleted file mode 100644 index 08461e8a42b..00000000000 --- a/docs/use-cases/in-database_ml/mindsdb-superset-snowflake.mdx +++ /dev/null @@ -1,188 +0,0 @@ -# Using MindsDB Machine Learning to Solve a Real-World **time series** Problem - -Let’s use these powerful AI tables in a real-world scenario. (if you are not familiar with AI-Tables, you can learn about them in [here](/sql/tutorials/ai-tables/). - -Imagine that you are a data analyst at the Chicago Transit Authority. Every day, you need to optimize the number of buses per route to avoid overcrowded or empty buses. You need machine learning to forecast the number of rides per bus, per route, and by time of day. The data you have looks like the table below with route_id, timestamp, number of rides, and day-type (W = weekend) - -![Income vs Debt model](/assets/sql/tutorials/snowflake-superset/8-multivariate_problem.jpg) - -This is a difficult machine learning problem that is common in databases. A timestamp indicates that we are dealing with the time-series problem. The data is further complicated by the type of day (day-type) the row contains and this is called multivariate. Additionally, there is high-cardinality as each route will have multiple row entries each with different timestamps, rides, and day types. - -Let’s see how we can use machine learning with MindsDB to optimize the number of buses per route and visualize the results. - -## Set Up MindsDB - -First things first! You need to connect your database to MindsDB. One of the easy ways to do so is to create a [MindsDB cloud](/setup/cloud/) account. If you prefer to deploy MindsDB locally, please refer to installation instructions via [Docker](/setup/self-hosted/docker/) or [PyPI](/setup/self-hosted/pip/windows/). - -Once an account is created you can connect to Snowflake using standard parameters like database name (in this case the Chicago Transit Authority), host, port, username, password, etc. - -![mindsdb connect](/assets/sql/tutorials/snowflake-superset/9-connect_to_MindsDB.png) - - -## Connect MindsDB to the Data for model training - -MindsDB works through a MySQL Wire protocol. Therefore, you can connect to it using any MySQL client. Here, we’ll use the DBeaver database client and can see the Snowflake databases we are connected to. - -![Dbeaver connect](/assets/sql/tutorials/snowflake-superset/10-DBeaver connection.png) - -### Step 1: Getting the Training Data - -We start by getting the training data from the database that we connected to our MindsDB cloud account. It is always good to first make sure that all the databases are present and the connections correct. - - -```sql -show databases; -``` - -![show dbs](/assets/sql/tutorials/snowflake-superset/12-show_dtabases.png) - -MindsDB comes with some built-in databases as follows: - -* INFORMATION_SCHEMA stores information about MindsDB, -* MINDSDB stores metadata about the predictors and allows access to the created predictors as tables, -* DATASOURCE for connecting to data or uploading files. - -The SNF database is the database of the Chicago Transit Authority that we connected. It provides us with the training data. Let’s check it. - -```sql -SELECT * -FROM CHICAGO_TRANSIT_AUTHORITY.PUBLIC.CTA_BUS_RIDES_LATEST -LIMIT 100; -``` - -![show dbs](/assets/sql/tutorials/snowflake-superset/13-info_schema.png) - -The training data consists of the number of rides per bus route and day. For example, on 2001-07-03, there were 7354 rides on bus route 3. - -You can download the dataset [here](https://github.com/mindsdb/benchmarks/blob/main/benchmarks/datasets/chicago_transit_ts/CTA_2019_2020.csv) and execute the SQL commands along with the tutorial! - -### Step 2: Training the Predictive Model - -Let’s move on to the next step, which is training the predictive model. For that, we’ll use the MINDSDB database. - -```sql -use mindsdb; -show tables -``` -![show dbs](/assets/sql/tutorials/snowflake-superset/14-table.png) - -MINDSDB database comes with the predictors and commands tables. The predictors table lets us see the status of our predictive models. For example, assuming that we have already trained our predictive model for forecasting the number of rides, we’ll see the following. - -```sql -SELECT name, status FROM MINDSDB.PREDICTORS; -``` - -![show status](/assets/sql/tutorials/snowflake-superset/15-query.png) - -The process of training a predictive model using MindsDB is as simple as creating a view or a table. - -```sql -CREATE MODEL mindsdb.rides_forecaster_demo FROM snf ( -SELECT ROUTE, RIDES, DATE -FROM CHICAGO_TRANSIT_AUTHORITY.PUBLIC.CTA_BUS_RIDES_LATEST WHERE DATE > '2020-01-01') -PREDICT RIDES ORDER BY DATE GROUP BY ROUTE -WINDOW 10 HORIZON 7; -``` - -Let’s discuss the statement above. We create a predictor table using the `CREATE MODEL` statement and specifying the database from which the training data comes. The code in `yellow` selects the filtered training data. After that, we use the `PREDICT` keyword to define the column whose data we want to forecast. -Next, there are standard SQL clauses, such as `ORDER BY, GROUP BY, WINDOW, and HORIZON`. We use the `ORDER BY` clause and the DATE column as its argument. By doing so, we emphasize that we deal with a time-series problem. We order the rows by date. The `GROUP BY` clause divides the data into partitions. Here, each of them relates to a particular bus route. We take into account just the last ten rows for every given prediction. Hence, we use `WINDOW` 10. To prepare the forecast of the number of bus rides for the next week, we define `HORIZON` 7. -Now, you can execute the CREATE MODEL statement and wait until your predictive model is complete. The MINDSDB.PREDICTORS table stores its name as rides_forecaster_demo and its status as training. Once your predictive model is ready, the status changes to complete. - -## Step 3: Getting the Forecasts - -We are ready to go to the last step, i.e., using the predictive model to get future data. One way is to query the rides_forecaster_demo predictive model directly. Another way is to join this predictive model table to the table with historical data before querying it. - -We consider a time-series problem. Therefore, it is better to join our predictive model table to the table with historical data. - -```sql -SELECT tb.ROUTE, tb.RIDES AS PREDICTED_RIDES -FROM snf.PUBLIC.CTA_BUS_RIDES_LATEST AS ta -JOIN mindsdb.rides_forecaster_demo AS tb -WHERE ta.ROUTE = "171" AND ta.DATE > LATEST -LIMIT 7; -``` - -Let’s analyze it. We join the table that stores historical data (i.e., snf.PUBLIC.CTA_BUS_RIDES_LATEST) to our predictive model table (i.e., mindsdb.rides_forecaster_demo). The queried information is the route and the predicted number of rides per route. And the usage of the condition ta.DATE > LATEST (provided by MindsDB) ensures that we get the future number of rides per route. -Let’s run the query above to forecast the number of rides for route 171 in the next seven days. - -![Predictive query](/assets/sql/tutorials/snowflake-superset/16-predictive_query.png) - -Now we know the number of rides for route 171 in the next seven days. We could do it in the same way for all the other routes. - -Thanks to the special SQL syntax that includes CREATE MODEL, PREDICT, and > LATEST, MindsDB makes it straightforward to run predictors on our chosen data. -Now, let’s visualize our predictions. - -## Visualizing the Results using Apache Superset - -Apache Superset is a modern, open-source data exploration and visualization platform designed for all data personas in an organization. Superset ships with a powerful SQL editor and a no-code chart builder experience. Superset ships with support for most SQL databases out of the box and over 50 visualization types. - -You can connect to the Snowflake database or your MindsDB database that has a Snowflake connection within. Upon starting up your Superset workspace, your earlier defined database connection is ready to use! So you have access to the Chicago Transit Authority data, as well as to the predictions made by MindsDB. - - -### Visualizing Data - -The two data sets that we are relevant for visualization are the stops_by_route and forecasts data sets. The stops_by_route data set contains the exact location of each bus stop for each bus route. And the forecasts data set stores the actual and predicted number of rides, confidence interval, and lower and upper bounds of prediction, per route and timestamp. - -Superset lets us visualize the stops_by_route data set as follows. - -![Visualize query](/assets/sql/tutorials/snowflake-superset/17-stops_by_route_Superset.jpg) - -Every bus route has a different color. Also, there is volatility associated with each bus route. Let’s publish this chart to a new dashboard by clicking the **+Save** button, then switch to the **Save as** tab, and then type in β€œRoutes Dashboard” in the **Add to Dashboard** field. - -Now, let’s craft a time-series line chart to visualize actual vs predicted riders. Let’s look at the chart that presents the actual number of bus riders (in blue) and the predicted number of bus rides (in purple). - -![Predictive query](/assets/sql/tutorials/snowflake-superset/18-timeseries_chart.jpg) - -Predictions made by MindsDB closely resemble the actual data, except for a short time during March 2020 when the large-scale lockdowns took place. There we see a sudden drop in the number of bus rides. But MindsDB took some time to cope with this new reality and adjust its predictions. - -Lastly, let’s add a data zoom to this chart for end-users to zoom in on specific date ranges. Click the **Customize** tab and then click **Data Zoom** to enable it. Then, click the **+ Save** button and publish to the same β€œRoutes Dashboard”. - -Let’s head over to the dashboard now and customize it to make it more dynamic and explorable. Click **Dashboards** in the top nav bar and then select β€œRoutes Dashboard” from the list of dashboards. You can rearrange the chart positions by clicking the pencil icon, dragging the corners of the chart objects, and then clicking **Save**. - -![Timeseries chart](/assets/sql/tutorials/snowflake-superset/19-timeseries2.jpg) - -Let’s add some dashboard filters to this dashboard so dashboard consumers can filter the charts down to specific bus routes and volatility values. Click the right arrow (->) to pop open the filter tray. Then select the pencil icon to start editing this dashboard’s filters. Create the following filters with appropriate filter names: - -* A **Value** filter on the **route** column from the **forecasts** table. -* A **Numerical range** filter on the **volatility** column from the **stops_by_route** table. - -Click Save to publish these filters. - -![Filters](/assets/sql/tutorials/snowflake-superset/20-filters1.jpg) - -![Filters](/assets/sql/tutorials/snowflake-superset/20-filters2.jpg) - -Let’s give these filters for a test ride! Use the routes filter to only show information for routes 1, 100, and 1001. - -![Timeseries chart](/assets/sql/tutorials/snowflake-superset/21-graph.jpg) - -We could zoom in to see the time during the first large-scale lockdowns in March 2020. For these particular routes, the predictions made by MindsDB are not so far off. - -![Timeseries chart](/assets/sql/tutorials/snowflake-superset/22-graph.jpg) - -Now, let’s use our volatility filter to view only the routes with volatility values greater than 55. - -![Timeseries chart](/assets/sql/tutorials/snowflake-superset/23-graph.jpg) - - -## Conclusions: Powerful forecasting with MindsDB, your database, and Superset - -The combination of MindsDB and your database covers all the phases of the ML lifecycle. And Superset helps you to visualize the data in any form of diagrams, charts, or dashboards. - - -![Timeseries chart](/assets/sql/tutorials/snowflake-superset/24-MindsDB_ML-Workflow.png) - - -MindsDB provides easy-to-use predictive models through AI Tables. You can create these predictive models using SQL statements and feeding the input data. Also, you can query them the same way you query a table. The easiest way to get started with Superset is with the free tier for [Preset Cloud](https://preset.io/product/), a hassle-free and fully hosted cloud service for Superset. - -We encourage you to try some predictions with your own data, so please sign up for a [free MindsDB cloud account](https://cloud.mindsdb.com/signup) and if you need any help with MindsDB, feel free to ask our [Slack](https://mindsdb.com/joincommunity) and [Github](https://github.com/mindsdb/mindsdb/discussions) communities. - -## What's Next? - -Have fun while trying it out yourself! - -* Bookmark [MindsDB repository on GitHub](https://github.com/mindsdb/mindsdb). -* Sign up for a free [MindsDB account](https://cloud.mindsdb.com/register). -* Engage with the MindsDB community on [Slack](https://mindsdb.com/joincommunity) or [GitHub](https://github.com/mindsdb/mindsdb/discussions) to ask questions and share your ideas and thoughts. - -If this tutorial was helpful, please give us a GitHub star [here](https://github.com/mindsdb/mindsdb). diff --git a/docs/use-cases/overview.mdx b/docs/use-cases/overview.mdx deleted file mode 100644 index 6a757cf6bfb..00000000000 --- a/docs/use-cases/overview.mdx +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: Applications of MindsDB -sidebarTitle: Overview -icon: "flask-gear" ---- - -MindsDB integrates with numerous data sources and AI frameworks so you can easily bring data and AI together to create and automate custom workflows with MindsDB. - -Common use cases include fine-tuning models, chatbots, alert systems, content generation, natural language processing, classification, regressions, forecasting. - - -This section presents some of the common applications of MindsDB in form of tutorials. - - - - - - - - - - - diff --git a/docs/use-cases/predictive_analytics/expenditures-statsforecast.mdx b/docs/use-cases/predictive_analytics/expenditures-statsforecast.mdx deleted file mode 100644 index 26942cfc11e..00000000000 --- a/docs/use-cases/predictive_analytics/expenditures-statsforecast.mdx +++ /dev/null @@ -1,83 +0,0 @@ ---- -title: Forecast Monthly Expenditures with Nixtla's StatsForecast and MindsDB -sidebarTitle: Forecast Monthly Expenditures ---- - -In this tutorial, we'll create a model to forecast expenditures based on historical data using the Nixtla's StatsForecast engine. - -## Connect a database - -We use a table from our MySQL public demo database, so let’s start by connecting it to MindsDB. - -```sql -CREATE DATABASE mysql_historical -WITH ENGINE = 'mysql', -PARAMETERS = { - "user": "user", - "password": "MindsDBUser123!", - "host": "samples.mindsdb.com", - "port": "3306", - "database": "public" -}; -``` - -Now that we’ve connected our database to MindsDB, let’s query the data to be used in the example: - -```sql -SELECT * -FROM mysql_historical.historical_expenditures -LIMIT 3; -``` - -## Deploy a time-series model - -Please note that before using the StatsForecast engine, you should create it with the below command: - -```sql -CREATE ML_ENGINE statsforecast -FROM statsforecast; -``` - -You can check the available engines with this command: - -```sql -SHOW ML_ENGINES; -``` - -Let’s create a model table to forecast the expenditures: - -```sql -CREATE MODEL quarterly_expenditure_forecaster -FROM mysql_historical - (SELECT * FROM historical_expenditures) -PREDICT expenditure -ORDER BY month -GROUP BY category -WINDOW 12 -HORIZON 3 -USING ENGINE = 'statsforecast'; -``` - -We can check the training status with the following query: - -```sql -DESCRIBE quarterly_expenditure_forecaster; -``` - -## Make predictions - -Once the model status is complete, the behavior is the same as with any other AI table – you can query for batch predictions by joining it with a data table. - -```sql -SELECT m.month as month, m.expenditure as forecasted -FROM mindsdb.quarterly_expenditure_forecaster as m -JOIN mysql_historical.historical_expenditures as t -WHERE t.month > LATEST -AND t.category = 'food'; -``` - -The `historical_expenditures` table is used to make batch predictions. Upon joining the `quarterly_expenditure_forecaster` model with the `historical_expenditures` table, we get predictions for the next quarter as defined by the `HORIZON 3` clause. - -MindsDB provides the `LATEST` keyword that marks the latest training data point. In the `WHERE` clause, we specify the `month > LATEST` condition to ensure the predictions are made for data after the latest training data point. - -If we train the model using data from January 2020 until December 2020 (as defined by `WINDOW 12`), then the predictions come for the first quarter of 2021 (as defined by `HORIZON 3`). diff --git a/docs/use-cases/predictive_analytics/house-sales-statsforecast.mdx b/docs/use-cases/predictive_analytics/house-sales-statsforecast.mdx deleted file mode 100644 index dd9c625817a..00000000000 --- a/docs/use-cases/predictive_analytics/house-sales-statsforecast.mdx +++ /dev/null @@ -1,137 +0,0 @@ ---- -title: Forecasting Quarterly House Sales with StatsForecast -sidebarTitle: House Sales with StatsForecast ---- - -## Introduction - -In this tutorial, we introduce Nixtla’s StatsForecast integration which offers numerous univariate time series forecasting models optimized for high performance and scalability. We’ll go through an example to predict the real estate sales. - -## Prerequisites - -### MindsDB Setup - -Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). - -### Creating an ML Engine - -Please note that before using the StatsForecast engine, you should create it with the below command: - -```sql -CREATE ML_ENGINE statsforecast -FROM statsforecast; -``` - -You can check the available engines with this command: - -```sql -SHOW ML_ENGINES; -``` - -If you see the StatsForecast engine on the list, you are ready to follow the tutorials. - -## Tutorial - -### Connecting the Data - -In this tutorial, we take our [House Sales tutorial](/sql/tutorials/house-sales-forecasting) and redo it using the StatsForecast engine. - -We use a table from our MySQL public demo database, so let’s start by connecting MindsDB to it: - -```sql -CREATE DATABASE mysql_demo_db -WITH ENGINE = 'mysql', -PARAMETERS = { - "user": "user", - "password": "MindsDBUser123!", - "host": "samples.mindsdb.com", - "port": "3306", - "database": "public" -}; -``` - -Now that we’ve connected our database to MindsDB, let’s query the data to be used in the example: - -```sql -SELECT * -FROM mysql_demo_db.house_sales -LIMIT 3; -``` - -Here is the output: - -```sql -+----------+--------------------------+-----+--------+ -|saledate |house_price_moving_average|type |bedrooms| -+----------+--------------------------+-----+--------+ -|30/09/2007|441854 |house|2 | -|31/12/2007|441854 |house|2 | -|31/03/2008|441854 |house|2 | -+----------+--------------------------+-----+--------+ -``` - -The `house_sales` table stores quarterly house price moving averages per property. - -### Creating a Model - -Let's create a model table to predict the house price moving average values: - -```sql -CREATE MODEL mindsdb.house_sales_predictor -FROM mysql_demo_db - (SELECT * FROM house_sales) -PREDICT house_price_moving_average -ORDER BY saledate -GROUP BY bedrooms, type -WINDOW 8 -HORIZON 4 -USING ENGINE = 'statsforecast'; -``` - -The sytax is the same as in original tutorial. But here, we add the `USING` clause that specifies the ML engine used to make predictions. - -We can check the training status with the following query: - -```sql -DESCRIBE house_sales_predictor; -``` - -### Making Predictions - -Once the model status is `complete`, the behavior is the same as with any other AI table – you can query for batch predictions by joining it with a data table: - -```sql -SELECT m.saledate AS date, m.house_price_moving_average AS forecast -FROM mindsdb.house_sales_predictor AS m -JOIN mysql_demo_db.house_sales AS t -WHERE t.saledate > LATEST -AND t.type = 'house' -AND t.bedrooms = 2 -LIMIT 3; -``` - -Here is the output data: - -```sql -+----------------------------+----------+ -| date | forecast | -+----------------------------+----------+ -| 2019-12-31 00:00:00.000000 | 510712 | -| 2020-03-31 00:00:00.000000 | 510712 | -| 2020-06-30 00:00:00.000000 | 510712 | -+----------------------------+----------+ -``` - -## What's Next? - -Have fun while trying it out yourself! - -- Bookmark [MindsDB repository on GitHub](https://github.com/mindsdb/mindsdb). -- Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -- Engage with the MindsDB community on - [Slack](https://mindsdb.com/joincommunity) or - [GitHub](https://github.com/mindsdb/mindsdb/discussions) to ask questions and - share your ideas and thoughts. - -If this tutorial was helpful, please give us a GitHub star -[here](https://github.com/mindsdb/mindsdb). diff --git a/docs/use-cases/predictive_analytics/house-sales-timegpt.mdx b/docs/use-cases/predictive_analytics/house-sales-timegpt.mdx deleted file mode 100644 index d0bb29e6d55..00000000000 --- a/docs/use-cases/predictive_analytics/house-sales-timegpt.mdx +++ /dev/null @@ -1,134 +0,0 @@ ---- -title: Forecasting Quarterly House Sales with TimeGPT -sidebarTitle: House Sales with TimeGPT ---- - -## Introduction - -In this tutorial, we introduce Nixtla’s TimeGPT integration which offers the first foundational model for time series forecasting. We’ll go through an example to predict the real estate sales. - -## Prerequisites - -### MindsDB Setup - -Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). - -### Creating an ML Engine - -You can check the available engines with this command: - -```sql -SHOW ML_ENGINES; -``` - -If you see the TimeGPT engine on the list, you are ready to follow the tutorials. If you do not see TimeGPT on the list, you will have to create an instance of the engine first with this command: - -```sql -CREATE ML_ENGINE timegpt FROM timegpt USING timegpt_api_key = '...' -``` - -Notice that the `USING` clause is optional, but you must pass an API key eventually (either at model creation, engine creation, model usage, or in the mindsdb configuration file). - -## Tutorial - -### Connecting the Data - -In this tutorial, we take our [House Sales tutorial](/sql/tutorials/house-sales-forecasting) and redo it using the StatsForecast engine. - -We use a table from our MySQL public demo database, so let’s start by connecting MindsDB to it: - -```sql -CREATE DATABASE mysql_demo_db_houses -WITH ENGINE = 'mysql', -PARAMETERS = { - "user": "user", - "password": "MindsDBUser123!", - "host": "samples.mindsdb.com", - "port": "3306", - "database": "public" -}; -``` - -Now that we’ve connected our database to MindsDB, let’s query the data to be used in the example: - -```sql -SELECT * -FROM mysql_demo_db.house_sales -LIMIT 3; -``` - -Here is the output: - -```sql -+----------+--------------------------+-----+--------+ -|saledate |house_price_moving_average|type |bedrooms| -+----------+--------------------------+-----+--------+ -|30/09/2007|441854 |house|2 | -|31/12/2007|441854 |house|2 | -|31/03/2008|441854 |house|2 | -+----------+--------------------------+-----+--------+ -``` - -The `house_sales` table stores quarterly house price moving averages per property. - -### Creating a Model - -Let's create a model table to predict the house price moving average values: - -```sql -CREATE MODEL nixtla_timegpt_house_sales_predictor -FROM mysql_demo_db - (SELECT * FROM house_sales) -PREDICT house_price_moving_average -ORDER BY saledate -GROUP BY bedrooms, type -WINDOW 8 -HORIZON 4 -USING ENGINE = 'timegpt'; -``` - -The syntax is the same as in the [original tutorial](/sql/tutorials/house-sales-forecasting). But here, we add the `USING` clause that specifies the ML engine used to make predictions. - -We can check the training status with the following query: - -```sql -DESCRIBE nixtla_timegpt_house_sales_predictor; -``` - -### Making Predictions - -Once the model status is `complete`, the behavior is the same as with any other AI table – you can query for batch predictions by joining it with a data table: - -```sql - -SELECT m.saledate AS date, m.house_price_moving_average AS forecast -FROM nixtla_timegpt_house_sales_predictor AS m -JOIN mysql_demo_db.house_sales AS t -LIMIT 3; -``` - -Here is the output data: - -```sql -+----------------------------+----------+ -| date | forecast | -+----------------------------+----------+ -| 2019-09-30 00:01:00.000000 | 335449.03125 | -| 2019-09-30 00:02:00.000000 | 335449.03125 | -| 2019-09-30 00:03:00.000000 | 335449.03125 | -+----------------------------+----------+ -``` - -## What's Next? - -Have fun while trying it out yourself! - -- Bookmark [MindsDB repository on GitHub](https://github.com/mindsdb/mindsdb). -- Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -- Engage with the MindsDB community on - [Slack](https://mindsdb.com/joincommunity) or - [GitHub](https://github.com/mindsdb/mindsdb/discussions) to ask questions and - share your ideas and thoughts. - -If this tutorial was helpful, please give us a GitHub star -[here](https://github.com/mindsdb/mindsdb). diff --git a/docs/use-cases/predictive_analytics/monthly-expediture-timegpt.mdx b/docs/use-cases/predictive_analytics/monthly-expediture-timegpt.mdx deleted file mode 100644 index 40ca8e6ed46..00000000000 --- a/docs/use-cases/predictive_analytics/monthly-expediture-timegpt.mdx +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: Forecasting Monthly Expenditures with TimeGPT -sidebarTitle: House Sales with TimeGPT ---- - -## Introduction - -In this tutorial, we introduce Nixtla’s TimeGPT integration which offers the first foundational model for time series forecasting. Follow along to see how it works. - -## Prerequisites - -### MindsDB Setup - -Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). - -### Creating an ML Engine - -You can check the available engines with this command: - -```sql -SHOW ML_ENGINES; -``` - -If you see the TimeGPT engine on the list, you are ready to follow the tutorials. If you do not see TimeGPT on the list, you will have to create an instance of the engine first with this command: - -```sql -CREATE ML_ENGINE timegpt FROM timegpt USING timegpt_api_key = '...' -``` - -Notice that the `USING` clause is optional, but you must pass an API key eventually (either at model creation, engine creation, model usage, or in the mindsdb configuration file). - -## Tutorial - -### Connecting the Data - -In this tutorial, we take our the Monthly Expenditures dataset. - -We use a table from our MySQL public demo database, so let’s start by connecting MindsDB to it: - -```sql -CREATE DATABASE mysql_demo_db -WITH ENGINE = 'mysql', -PARAMETERS = { - "user": "user", - "password": "MindsDBUser123!", - "host": "samples.mindsdb.com", - "port": "3306", - "database": "public" -}; -``` - -Now that we’ve connected our database to MindsDB, let’s query the data to be used in the example: - -```sql -SELECT * -FROM mysql_demo_db.historical_expenditures -LIMIT 3; -``` - -Here is the output: - -```sql -| month | category | expenditure | -| ----- | -------- | ----------- | -| 1982-04-01 | food | 1162.6 | -| 1982-05-01 | food | 1150.9 | -| 1982-06-01 | food | 1160 | -``` - -### Creating a Model - -Let's create a model table to predict the expenditure values: - -```sql -CREATE MODEL nixtla_timegpt_quarterly_expenditure_forecaster -FROM mysql_demo_db - (SELECT * FROM historical_expenditures) -PREDICT expenditure -ORDER BY month -GROUP BY category -WINDOW 12 -HORIZON 3 -USING ENGINE = 'timegpt'; -``` - -We add the `USING` clause that specifies the ML engine used to make predictions. - -We can check the training status with the following query: - -```sql -DESCRIBE nixtla_timegpt_quarterly_expenditure_forecaster; -``` - -### Making Predictions - -Once the model status is `complete`, the behavior is the same as with any other AI table – you can query for batch predictions by joining it with a data table: - -```sql -SELECT m.month as month, m.expenditure as forecasted -FROM nixtla_timegpt_quarterly_expenditure_forecaster as m -JOIN mysql_demo_db.historical_expenditures as t -WHERE t.month > LATEST -AND t.category = 'food'; -``` - -Here is the output data: - -```sql -| month | forecasted | -| ----- | ---------- | -| 2017-09-01 00:01:00.000000 | 10307.9423828125 | -| 2017-09-01 00:02:00.000000 | 10307.931640625 | -| 2017-09-01 00:03:00.000000 | 10307.9384765625 | -``` - -## What's Next? - -Have fun while trying it out yourself! - -- Bookmark [MindsDB repository on GitHub](https://github.com/mindsdb/mindsdb). -- Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -- Engage with the MindsDB community on - [Slack](https://mindsdb.com/joincommunity) or - [GitHub](https://github.com/mindsdb/mindsdb/discussions) to ask questions and - share your ideas and thoughts. - -If this tutorial was helpful, please give us a GitHub star -[here](https://github.com/mindsdb/mindsdb). diff --git a/mindsdb/__about__.py b/mindsdb/__about__.py index 0a86dd86850..baaa11312b2 100644 --- a/mindsdb/__about__.py +++ b/mindsdb/__about__.py @@ -1,6 +1,6 @@ __title__ = "MindsDB" __package_name__ = "mindsdb" -__version__ = "26.0.1" +__version__ = "26.2.0" __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks" __email__ = "jorge@mindsdb.com" __author__ = "MindsDB Inc" diff --git a/mindsdb/__main__.py b/mindsdb/__main__.py index f5d2a672e2d..3a8921e995c 100644 --- a/mindsdb/__main__.py +++ b/mindsdb/__main__.py @@ -34,7 +34,12 @@ ) from mindsdb.utilities.ps import is_pid_listen_port, get_child_pids import mindsdb.interfaces.storage.db as db -from mindsdb.utilities.fs import clean_process_marks, clean_unlinked_process_marks, create_pid_file, delete_pid_file +from mindsdb.utilities.fs import ( + clean_process_marks, + clean_unlinked_process_marks, + create_pid_file, + delete_pid_file, +) from mindsdb.utilities.context import context as ctx from mindsdb.utilities.auth import register_oauth_client, get_aws_meta_data from mindsdb.utilities.sentry import sentry_sdk # noqa: F401 @@ -154,12 +159,25 @@ def close_api_gracefully(trunc_processes_struct): def clean_mindsdb_tmp_dir(): """Clean the MindsDB tmp dir at exit.""" - temp_dir = config["paths"]["tmp"] - for file in temp_dir.iterdir(): - if file.is_dir(): - shutil.rmtree(file) - else: - file.unlink() + try: + temp_dir = config["paths"]["tmp"] + if not temp_dir.exists(): + return + + for file in temp_dir.iterdir(): + try: + if file.is_dir(): + # https://docs.python.org/3/library/shutil.html#shutil.rmtree + shutil.rmtree(file) + else: + # https://docs.python.org/3/library/pathlib.html#pathlib.Path.unlink + file.unlink(missing_ok=True) + except PermissionError as e: + logger.error(f"Failed to clean %s: %s{file}: {e}") + except FileNotFoundError: + logger.error(f"File not found during cleanup: {file}") + except Exception as e: + logger.error(f"Failed to clean MindsDB tmp dir: {e}") def set_error_model_status_by_pids(unexisting_pids: List[int]): @@ -360,6 +378,15 @@ def start_process(trunc_process_data: TrunkProcessData) -> None: sys.exit(0) + if config.cmd_args.mcp_stdio: + # StreamHandler writes to stderr by default, which MCP treats as notification messages. + # Raise the log level to ERROR to suppress notification spam, and explicitly set the + # stream to stderr in case the user has overridden it in their config. + os.environ["MINDSDB_CONSOLE_LOG_LEVEL"] = "ERROR" + config["logging"]["handlers"]["console"]["level"] = "ERROR" + config["logging"]["handlers"]["console"]["stream"] = "ext://sys.stderr" + log.configure_logging() + config.raise_warnings(logger=logger) os.environ["MINDSDB_RUNTIME"] = "1" @@ -430,6 +457,12 @@ def start_process(trunc_process_data: TrunkProcessData) -> None: clean_process_marks() + if config.cmd_args.mcp_stdio: + from mindsdb.api.mcp.mcp_instance import mcp + + mcp.run() + sys.exit(0) + # Get config values for APIs http_api_config = config.get("api", {}).get("http", {}) mysql_api_config = config.get("api", {}).get("mysql", {}) @@ -443,7 +476,8 @@ def start_process(trunc_process_data: TrunkProcessData) -> None: restart_on_failure=http_api_config.get("restart_on_failure", False), max_restart_count=http_api_config.get("max_restart_count", TrunkProcessData.max_restart_count), max_restart_interval_seconds=http_api_config.get( - "max_restart_interval_seconds", TrunkProcessData.max_restart_interval_seconds + "max_restart_interval_seconds", + TrunkProcessData.max_restart_interval_seconds, ), ), TrunkProcessEnum.MYSQL: TrunkProcessData( @@ -454,17 +488,24 @@ def start_process(trunc_process_data: TrunkProcessData) -> None: restart_on_failure=mysql_api_config.get("restart_on_failure", False), max_restart_count=mysql_api_config.get("max_restart_count", TrunkProcessData.max_restart_count), max_restart_interval_seconds=mysql_api_config.get( - "max_restart_interval_seconds", TrunkProcessData.max_restart_interval_seconds + "max_restart_interval_seconds", + TrunkProcessData.max_restart_interval_seconds, ), ), TrunkProcessEnum.JOBS: TrunkProcessData( - name=TrunkProcessEnum.JOBS.value, entrypoint=start_scheduler, args=(config.cmd_args.verbose,) + name=TrunkProcessEnum.JOBS.value, + entrypoint=start_scheduler, + args=(config.cmd_args.verbose,), ), TrunkProcessEnum.TASKS: TrunkProcessData( - name=TrunkProcessEnum.TASKS.value, entrypoint=start_tasks, args=(config.cmd_args.verbose,) + name=TrunkProcessEnum.TASKS.value, + entrypoint=start_tasks, + args=(config.cmd_args.verbose,), ), TrunkProcessEnum.ML_TASK_QUEUE: TrunkProcessData( - name=TrunkProcessEnum.ML_TASK_QUEUE.value, entrypoint=start_ml_task_queue, args=(config.cmd_args.verbose,) + name=TrunkProcessEnum.ML_TASK_QUEUE.value, + entrypoint=start_ml_task_queue, + args=(config.cmd_args.verbose,), ), TrunkProcessEnum.LITELLM: TrunkProcessData( name=TrunkProcessEnum.LITELLM.value, @@ -474,7 +515,8 @@ def start_process(trunc_process_data: TrunkProcessData) -> None: restart_on_failure=litellm_api_config.get("restart_on_failure", False), max_restart_count=litellm_api_config.get("max_restart_count", TrunkProcessData.max_restart_count), max_restart_interval_seconds=litellm_api_config.get( - "max_restart_interval_seconds", TrunkProcessData.max_restart_interval_seconds + "max_restart_interval_seconds", + TrunkProcessData.max_restart_interval_seconds, ), ), } @@ -554,7 +596,11 @@ async def join_process(trunc_process_data: TrunkProcessData): trunc_process_data.process = None if trunc_process_data.name == TrunkProcessEnum.HTTP.value: # do not open GUI on HTTP API restart - trunc_process_data.args = (config.cmd_args.verbose, None, True) + trunc_process_data.args = ( + config.cmd_args.verbose, + None, + True, + ) start_process(trunc_process_data) api_name, port, started = await wait_api_start( trunc_process_data.name, diff --git a/mindsdb/api/a2a/README.md b/mindsdb/api/a2a/README.md index cddb2ccf8dd..787b2d8c409 100644 --- a/mindsdb/api/a2a/README.md +++ b/mindsdb/api/a2a/README.md @@ -14,7 +14,7 @@ The A2A API runs as part of the MindsDB HTTP API, allowing you to: ## Prerequisites - MindsDB running -- Python 3.10 or higher +- Python 3.10.20 or higher ## Running A2A API diff --git a/mindsdb/api/common/middleware.py b/mindsdb/api/common/middleware.py index 7730b178ad4..6fb93380191 100644 --- a/mindsdb/api/common/middleware.py +++ b/mindsdb/api/common/middleware.py @@ -1,13 +1,15 @@ import os +import time import hmac import secrets import hashlib +from collections import deque from http import HTTPStatus from typing import Optional -from starlette.middleware.base import BaseHTTPMiddleware from starlette.responses import JSONResponse from starlette.requests import Request +from starlette.types import ASGIApp, Receive, Scope, Send from mindsdb.utilities import log from mindsdb.utilities.config import config @@ -24,6 +26,10 @@ def get_pat_fingerprint(token: str) -> str: return hmac.new(SECRET_KEY.encode(), token.encode(), hashlib.sha256).hexdigest() +if config["auth"]["token"]: + TOKENS.append(get_pat_fingerprint(config["auth"]["token"])) + + def generate_pat() -> str: logger.debug("Generating new auth token") token = "pat_" + secrets.token_urlsafe(32) @@ -56,23 +62,106 @@ def revoke_pat(raw_token: str) -> bool: return False -class PATAuthMiddleware(BaseHTTPMiddleware): - def _extract_bearer(self, request: Request) -> Optional[str]: - h = request.headers.get("Authorization") +class PATAuthMiddleware: + """Pure ASGI middleware (compatible with SSE / streaming responses). + The class is not inherited from starlette.middleware.base.BaseHTTPMiddleware + bacause it collect responses to buffer, which is not good for streaming + """ + + def __init__(self, app: ASGIApp) -> None: + self.app = app + + @staticmethod + def _extract_bearer(headers: dict) -> Optional[str]: + h = headers.get("authorization") if not h or not h.startswith("Bearer "): return None return h.split(" ", 1)[1].strip() or None - async def dispatch(self, request: Request, call_next): + async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: + if scope["type"] != "http": + await self.app(scope, receive, send) + return + if config.get("auth", {}).get("http_auth_enabled", False) is False: - return await call_next(request) + await self.app(scope, receive, send) + return - token = self._extract_bearer(request) - if not token or not verify_pat(token): - return JSONResponse({"detail": "Unauthorized"}, status_code=HTTPStatus.UNAUTHORIZED) + if scope.get("method") == "OPTIONS": + await self.app(scope, receive, send) + return - request.state.user = config["auth"].get("username") - return await call_next(request) + request = Request(scope) + token = self._extract_bearer(dict(request.headers)) + if not token or not verify_pat(token): + response = JSONResponse({"detail": "Unauthorized"}, status_code=HTTPStatus.UNAUTHORIZED) + await response(scope, receive, send) + return + + scope.setdefault("state", {})["user"] = config["auth"].get("username") + await self.app(scope, receive, send) + + +class RateLimitMiddleware: + """Rate limiting middleware using a sliding window counter. Tracks requests per client IP.""" + + def __init__(self, app: ASGIApp, requests_per_minute: int) -> None: + self.app = app + self.requests_per_minute = requests_per_minute + self._window = 60.0 # seconds + self._counters: dict[str, deque] = {} + + def _get_client_key(self, scope: Scope) -> str: + client = scope.get("client") + if client: + return client[0] + return "unknown" + + async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: + if scope["type"] != "http": + await self.app(scope, receive, send) + return + + if scope.get("method") == "OPTIONS": + await self.app(scope, receive, send) + return + + # Clients usually repeat this request until + # the connection is established, so no rate limit it. + if scope.get("method") == "GET" and scope.get("path", "").endswith("/sse"): + await self.app(scope, receive, send) + return + + client_key = self._get_client_key(scope) + now = time.monotonic() + window_start = now - self._window + + timestamps = self._counters.setdefault(client_key, deque()) + + # Evict timestamps outside the current window + while timestamps and timestamps[0] <= window_start: + timestamps.popleft() + + if len(timestamps) >= self.requests_per_minute: + retry_after = int(self._window - (now - timestamps[0])) + 1 + else: + retry_after = None + timestamps.append(now) + + if retry_after is not None: + response = JSONResponse( + {"detail": f"Too Many Requests, retry after {retry_after} seconds"}, + status_code=HTTPStatus.TOO_MANY_REQUESTS, + headers={"Retry-After": str(retry_after)}, + ) + await response(scope, receive, send) + return + + stale_keys = [k for k, ts in self._counters.items() if not ts or ts[-1] <= window_start] + for k in stale_keys: + del self._counters[k] + + await self.app(scope, receive, send) # Used by mysql protocol diff --git a/mindsdb/api/executor/command_executor.py b/mindsdb/api/executor/command_executor.py index 3cc2e5f50b5..25d8858f458 100644 --- a/mindsdb/api/executor/command_executor.py +++ b/mindsdb/api/executor/command_executor.py @@ -75,7 +75,8 @@ import mindsdb.utilities.profiler as profiler -from mindsdb.api.executor.sql_query.result_set import Column, ResultSet +from mindsdb.api.executor.sql_query.result_set import ResultSet +from mindsdb.utilities.types.column import Column from mindsdb.api.executor.sql_query import SQLQuery from mindsdb.api.executor.data_types.answer import ExecuteAnswer from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import ( @@ -1483,13 +1484,11 @@ def answer_drop_kb(self, statement: DropKnowledgeBase, database_name: str) -> Ex def answer_create_agent(self, statement, database_name): project_name, name = match_two_part_name(statement.name, default_db_name=database_name) - provider = statement.params.pop("provider", None) try: _ = self.session.agents_controller.add_agent( name=name, project_name=project_name, - model_name=statement.model, - provider=provider, + model=statement.model, params=variables_controller.fill_parameters(statement.params), ) except EntityExistsError as e: @@ -1520,7 +1519,7 @@ def answer_update_agent(self, statement: UpdateAgent, database_name: str): _ = self.session.agents_controller.update_agent( name, project_name=project_name, - model_name=model, + model=model, params=variables_controller.fill_parameters(statement.params), ) except (EntityExistsError, EntityNotExistsError, ValueError) as e: diff --git a/mindsdb/api/executor/data_types/sql_answer.py b/mindsdb/api/executor/data_types/sql_answer.py new file mode 100644 index 00000000000..0a8b6087dbf --- /dev/null +++ b/mindsdb/api/executor/data_types/sql_answer.py @@ -0,0 +1,129 @@ +from typing import Generator +from dataclasses import dataclass + +import orjson +import numpy as np +import pandas as pd + +from mindsdb.utilities.json_encoder import CustomJSONEncoder +from mindsdb.api.executor.sql_query.result_set import ResultSet +from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE +from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE + + +@dataclass +class SQLAnswer: + """Container for SQL query execution results and metadata. + + Attributes: + resp_type: Type of response (OK, ERROR, TABLE, COLUMNS_TABLE). + result_set: Query result data as a ResultSet object. + status: Status code for the response. + state_track: List of state tracking information. + error_code: Error code if query execution failed. + error_message: Human-readable error message if query failed. + affected_rows: Number of rows affected by the query (for DML operations). + mysql_types: List of MySQL data types for result columns. + """ + + resp_type: RESPONSE_TYPE = RESPONSE_TYPE.OK + result_set: ResultSet | None = None + status: int | None = None + state_track: list[list] | None = None + error_code: int | None = None + error_message: str | None = None + affected_rows: int | None = None + mysql_types: list[MYSQL_DATA_TYPE] | None = None + + @property + def type(self) -> RESPONSE_TYPE: + """Get the response type. + + Returns: + RESPONSE_TYPE: The type of this SQL response. + """ + return self.resp_type + + def stream_http_response_sse(self, context: dict | None) -> Generator[str, None, None]: + """Stream response in Server-Sent Events (SSE) format. + + Args: + context: Optional context information. + + Yields: + str: SSE-formatted data lines (prefixed with "data: "). + """ + for piece in self.stream_http_response_jsonlines(context=context): + yield f"data: {piece}\n" + + def stream_http_response_jsonlines(self, context: dict | None) -> Generator[str, None, None]: + """Stream response as newline-delimited JSON (JSONL). + + Args: + context: Optional context information. + + Yields: + str: JSON-encoded lines terminated with newline characters. + """ + _default_json = CustomJSONEncoder().default + + if self.resp_type in (RESPONSE_TYPE.OK, RESPONSE_TYPE.ERROR): + response = self.dump_http_response(context=context) + yield orjson.dumps(response).decode() + "\n" + return + + yield ( + orjson.dumps( + { + "type": RESPONSE_TYPE.TABLE, + "column_names": [column.alias or column.name for column in self.result_set.columns], + } + ).decode() + + "\n" + ) + + for el in self.result_set.stream_data(): + el.replace([np.nan, pd.NA, pd.NaT], None, inplace=True) + yield ( + orjson.dumps( + el.to_dict("split")["data"], + default=_default_json, + option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_PASSTHROUGH_DATETIME, + ).decode() + + "\n" + ) + + def dump_http_response(self, context: dict | None = None) -> dict: + """Serialize the complete response as a single dictionary. + + Args: + context: Optional context information. + + Returns: + dict: Serialized response. + """ + if context is None: + context = {} + if self.resp_type == RESPONSE_TYPE.OK: + return { + "type": self.resp_type, + "affected_rows": self.affected_rows, + "context": context, + } + elif self.resp_type in (RESPONSE_TYPE.TABLE, RESPONSE_TYPE.COLUMNS_TABLE): + data = self.result_set.to_lists(json_types=True) + return { + "type": RESPONSE_TYPE.TABLE, + "data": data, + "column_names": [column.alias or column.name for column in self.result_set.columns], + "context": context, + } + elif self.resp_type == RESPONSE_TYPE.ERROR: + return { + "type": RESPONSE_TYPE.ERROR, + "error_code": self.error_code or 0, + "error_message": self.error_message, + "context": context, + } + else: + raise ValueError(f"Unsupported response type for dump HTTP response: {self.resp_type}") diff --git a/mindsdb/api/executor/datahub/classes/response.py b/mindsdb/api/executor/datahub/classes/response.py deleted file mode 100644 index cd0e990ed71..00000000000 --- a/mindsdb/api/executor/datahub/classes/response.py +++ /dev/null @@ -1,14 +0,0 @@ -from dataclasses import dataclass, field -from typing import List, Dict - -import pandas as pd - -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - - -@dataclass -class DataHubResponse: - data_frame: pd.DataFrame = field(default_factory=pd.DataFrame) - columns: List[Dict] = field(default_factory=list) - affected_rows: int | None = None - mysql_types: list[MYSQL_DATA_TYPE] | None = None diff --git a/mindsdb/api/executor/datahub/datanodes/datanode.py b/mindsdb/api/executor/datahub/datanodes/datanode.py index 256760fc959..8be9e355949 100644 --- a/mindsdb/api/executor/datahub/datanodes/datanode.py +++ b/mindsdb/api/executor/datahub/datanodes/datanode.py @@ -1,10 +1,11 @@ from pandas import DataFrame -from mindsdb.api.executor.datahub.classes.response import DataHubResponse +from mindsdb.integrations.libs.response import DataHandlerResponse class DataNode: type = "meta" + has_support_stream = False def __init__(self): pass @@ -21,5 +22,5 @@ def get_table_columns_df(self, table_name: str, schema_name: str | None = None) def get_table_columns_names(self, table_name: str, schema_name: str | None = None) -> list[str]: pass - def query(self, query=None, session=None) -> DataHubResponse: + def query(self, query=None, session=None) -> DataHandlerResponse: pass diff --git a/mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py b/mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py index 4eabef3d7d7..ac309f72e6d 100644 --- a/mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +++ b/mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py @@ -6,12 +6,13 @@ from mindsdb.api.executor.datahub.datanodes.datanode import DataNode from mindsdb.api.executor.datahub.datanodes.integration_datanode import IntegrationDataNode from mindsdb.api.executor.datahub.datanodes.project_datanode import ProjectDataNode -from mindsdb.api.executor import exceptions as exc +from mindsdb.api.executor.datahub.classes.tables_row import TablesRow from mindsdb.api.executor.utilities.sql import query_df from mindsdb.api.executor.utilities.sql import get_query_tables +from mindsdb.api.executor import exceptions as exc from mindsdb.interfaces.database.projects import ProjectController -from mindsdb.api.executor.datahub.classes.response import DataHubResponse -from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES +from mindsdb.integrations.libs.response import TableResponse, INF_SCHEMA_COLUMNS_NAMES +from mindsdb.utilities.types.column import Column from mindsdb.utilities import log from .system_tables import ( @@ -47,8 +48,6 @@ TriggersTable, ) -from mindsdb.api.executor.datahub.classes.tables_row import TablesRow - logger = log.getLogger(__name__) @@ -206,7 +205,7 @@ def get_tables(self): def get_tree_tables(self): return {name: table for name, table in self.tables.items() if table.visible} - def query(self, query: ASTNode, session=None) -> DataHubResponse: + def query(self, query: ASTNode, session=None) -> TableResponse: query_tables = [x[1] for x in get_query_tables(query)] if len(query_tables) != 1: @@ -225,9 +224,8 @@ def query(self, query: ASTNode, session=None) -> DataHubResponse: dataframe = self._get_empty_table(tbl) data = query_df(dataframe, query, session=self.session) - columns_info = [{"name": k, "type": v} for k, v in data.dtypes.items()] - - return DataHubResponse(data_frame=data, columns=columns_info, affected_rows=0) + columns = [Column(name=k, dtype=v) for k, v in data.dtypes.items()] + return TableResponse(data=data, columns=columns, affected_rows=0) def _get_empty_table(self, table): columns = table.columns diff --git a/mindsdb/api/executor/datahub/datanodes/integration_datanode.py b/mindsdb/api/executor/datahub/datanodes/integration_datanode.py index 228bd29468c..0bcaae6aad4 100644 --- a/mindsdb/api/executor/datahub/datanodes/integration_datanode.py +++ b/mindsdb/api/executor/datahub/datanodes/integration_datanode.py @@ -2,27 +2,24 @@ import inspect import functools from dataclasses import astuple -from typing import Iterable, List -import numpy as np import pandas as pd from sqlalchemy.types import Integer, Float from mindsdb_sql_parser.ast.base import ASTNode from mindsdb_sql_parser.ast import Insert, Identifier, CreateTable, TableColumn, DropTables -from mindsdb.api.executor.datahub.classes.response import DataHubResponse from mindsdb.api.executor.datahub.datanodes.datanode import DataNode +from mindsdb.api.executor.datahub.datanodes.system_tables import infer_mysql_type from mindsdb.api.executor.datahub.classes.tables_row import TablesRow from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.integrations.libs.response import HandlerResponse, INF_SCHEMA_COLUMNS_NAMES +from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES, DataHandlerResponse, ErrorResponse, OkResponse from mindsdb.integrations.utilities.utils import get_class_name from mindsdb.metrics import metrics from mindsdb.utilities import log from mindsdb.utilities.profiler import profiler from mindsdb.utilities.exception import QueryError -from mindsdb.api.executor.datahub.datanodes.system_tables import infer_mysql_type logger = log.getLogger(__name__) @@ -57,9 +54,11 @@ def wrapper(self, *args, **kwargs): query_time_with_labels = metrics.INTEGRATION_HANDLER_QUERY_TIME.labels(handler_class_name, result.type) query_time_with_labels.observe(elapsed_seconds) - num_rows = 0 - if result.data_frame is not None: - num_rows = len(result.data_frame.index) + num_rows = getattr(result, "affected_rows", None) + if num_rows is None: + num_rows = getattr(result, "rows_fetched", -1) + if num_rows is None: + num_rows = -1 response_size_with_labels = metrics.INTEGRATION_HANDLER_RESPONSE_SIZE.labels( handler_class_name, result.type ) @@ -164,12 +163,12 @@ def create_table( self, table_name: Identifier, result_set: ResultSet = None, - columns: List[TableColumn] = None, + columns: list[TableColumn] = None, is_replace: bool = False, is_create: bool = False, raise_if_exists: bool = True, **kwargs, - ) -> DataHubResponse: + ) -> OkResponse: # is_create - create table # if !raise_if_exists: error will be skipped # is_replace - drop table if exists @@ -197,18 +196,18 @@ def create_table( if result_set is None: # it is just a 'create table' - return DataHubResponse() + return OkResponse() # native insert if hasattr(self.integration_handler, "insert"): df = result_set.to_df() - result: HandlerResponse = self.integration_handler.insert(table_name.parts[-1], df) + result: DataHandlerResponse = self.integration_handler.insert(table_name.parts[-1], df) if result is not None: affected_rows = result.affected_rows else: affected_rows = None - return DataHubResponse(affected_rows=affected_rows) + return OkResponse(affected_rows=affected_rows) insert_columns = [Identifier(parts=[x.alias]) for x in result_set.columns] @@ -232,29 +231,28 @@ def create_table( if len(values) == 0: # not need to insert - return DataHubResponse() + return OkResponse() insert_ast = Insert(table=table_name, columns=insert_columns, values=values, is_plain=True) try: - result: DataHubResponse = self.query(insert_ast) + result: DataHandlerResponse = self.query(insert_ast) except Exception as e: msg = f"[{self.ds_type}/{self.integration_name}]: {str(e)}" raise DBHandlerException(msg) from e - return DataHubResponse(affected_rows=result.affected_rows) + return OkResponse(affected_rows=result.affected_rows) def has_support_stream(self) -> bool: - # checks if data handler has query_stream method - return hasattr(self.integration_handler, "query_stream") and callable(self.integration_handler.query_stream) + """Check if the integration handler supports streaming responses. - @profiler.profile() - def query_stream(self, query: ASTNode, fetch_size: int = None) -> Iterable: - # returns generator of results from handler (split by chunks) - return self.integration_handler.query_stream(query, fetch_size=fetch_size) + Returns: + bool: True if the integration handler supports streaming responses, False otherwise. + """ + return getattr(self.integration_handler, "stream_response", False) @profiler.profile() - def query(self, query: ASTNode | str = None, session=None) -> DataHubResponse: + def query(self, query: ASTNode | str = None, session=None) -> DataHandlerResponse: """Execute a query against the integration data source. This method processes SQL queries either as ASTNode objects or raw SQL strings @@ -266,20 +264,20 @@ def query(self, query: ASTNode | str = None, session=None) -> DataHubResponse: session: Session object (currently unused but kept for compatibility) Returns: - DataHubResponse: Response object + DataHandlerResponse: Response object Raises: NotImplementedError: If query is not ASTNode or str type Exception: If the query execution fails with an error response """ if isinstance(query, ASTNode): - result: HandlerResponse = self.query_integration_handler(query=query) + result: DataHandlerResponse = self.query_integration_handler(query=query) elif isinstance(query, str): - result: HandlerResponse = self.native_query_integration(query=query) + result: DataHandlerResponse = self.native_query_integration(query=query) else: raise NotImplementedError("Thew query argument must be ASTNode or string type") - if result.type == RESPONSE_TYPE.ERROR: + if type(result) is ErrorResponse: if isinstance(query, ASTNode): try: query_str = query.to_string() @@ -302,32 +300,12 @@ def query(self, query: ASTNode | str = None, session=None) -> DataHubResponse: else: raise exception from result.exception - if result.type == RESPONSE_TYPE.OK: - return DataHubResponse(affected_rows=result.affected_rows) - - df = result.data_frame - # region clearing df from NaN values - # recursion error appears in pandas 1.5.3 https://github.com/pandas-dev/pandas/pull/45749 - if isinstance(df, pd.Series): - df = df.to_frame() - - columns_info = [{"name": k, "type": v} for k, v in df.dtypes.items()] - try: - # replace python's Nan, np.nan and pd.NA to None - # TODO keep all NAN to the end of processing, bacause replacing also changes dtypes - df.replace([np.nan, pd.NA, pd.NaT], None, inplace=True) - except Exception: - logger.exception("Issue with clearing DF from NaN values:") - # endregion - - return DataHubResponse( - data_frame=df, columns=columns_info, affected_rows=result.affected_rows, mysql_types=result.mysql_types - ) + return result @collect_metrics - def query_integration_handler(self, query: ASTNode) -> HandlerResponse: + def query_integration_handler(self, query: ASTNode) -> DataHandlerResponse: return self.integration_handler.query(query) @collect_metrics - def native_query_integration(self, query: str) -> HandlerResponse: + def native_query_integration(self, query: str) -> DataHandlerResponse: return self.integration_handler.native_query(query) diff --git a/mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py b/mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py index 8c274873465..b7fd38e3b3a 100644 --- a/mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +++ b/mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py @@ -381,7 +381,7 @@ def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs): class AgentsTable(MdbTable): name = "AGENTS" - columns = ["NAME", "PROJECT", "MODEL_NAME", "PARAMS"] + columns = ["NAME", "PROJECT", "MODEL", "PARAMS"] @classmethod def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs): @@ -394,15 +394,18 @@ def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs): project_names = {i.id: i.name for i in project_controller.get_list()} # NAME, PROJECT, MODEL, PARAMS (skills removed) - data = [ - ( - a.name, - project_names[a.project_id], - a.model_name, - to_json(a.params), + data = [] + for a in all_agents: + params = a.params or {} + model = params.pop("model", {}) + data.append( + [ + a.name, + project_names[a.project_id], + to_json(model), + to_json(params), + ] ) - for a in all_agents - ] return pd.DataFrame(data, columns=cls.columns) diff --git a/mindsdb/api/executor/datahub/datanodes/project_datanode.py b/mindsdb/api/executor/datahub/datanodes/project_datanode.py index 767c88f9fbe..21e07d65d83 100644 --- a/mindsdb/api/executor/datahub/datanodes/project_datanode.py +++ b/mindsdb/api/executor/datahub/datanodes/project_datanode.py @@ -13,12 +13,12 @@ Delete, ) -from mindsdb.utilities.exception import EntityNotExistsError from mindsdb.api.executor.datahub.datanodes.datanode import DataNode from mindsdb.api.executor.datahub.classes.tables_row import TablesRow -from mindsdb.api.executor.datahub.classes.response import DataHubResponse +from mindsdb.utilities.exception import EntityNotExistsError +from mindsdb.utilities.types.column import Column from mindsdb.utilities.partitioning import process_dataframe_in_partitions -from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES +from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES, DataHandlerResponse, OkResponse, TableResponse class ProjectDataNode(DataNode): @@ -100,7 +100,7 @@ def callback(chunk): return ml_handler.predict(model_name, df, project_name=self.project.name, version=version, params=params) - def query(self, query: ASTNode | str = None, session=None) -> DataHubResponse: + def query(self, query: ASTNode | str = None, session=None) -> DataHandlerResponse: if isinstance(query, str): query = parse_sql(query) @@ -110,7 +110,7 @@ def query(self, query: ASTNode | str = None, session=None) -> DataHubResponse: if kb_table: # this is the knowledge db kb_table.update_query(query) - return DataHubResponse() + return OkResponse() raise NotImplementedError(f"Can't update object: {query_table}") @@ -120,7 +120,7 @@ def query(self, query: ASTNode | str = None, session=None) -> DataHubResponse: if kb_table: # this is the knowledge db kb_table.delete_query(query) - return DataHubResponse() + return OkResponse() raise NotImplementedError(f"Can't delete object: {query_table}") @@ -157,17 +157,15 @@ def query(self, query: ASTNode | str = None, session=None) -> DataHubResponse: # this is the view df = self.project.query_view(query, session) - columns_info = [{"name": k, "type": v} for k, v in df.dtypes.items()] - - return DataHubResponse(data_frame=df, columns=columns_info) + columns = [Column(name=k, dtype=v) for k, v in df.dtypes.items()] + return TableResponse(data=df, columns=columns) kb_table = session.kb_controller.get_table(query_table, self.project.id) if kb_table: # this is the knowledge db df = kb_table.select_query(query) - columns_info = [{"name": k, "type": v} for k, v in df.dtypes.items()] - - return DataHubResponse(data_frame=df, columns=columns_info) + columns = [Column(name=k, dtype=v) for k, v in df.dtypes.items()] + return TableResponse(data=df, columns=columns) raise EntityNotExistsError(f"Table '{query_table}' not found in database", self.project.name) else: @@ -175,7 +173,7 @@ def query(self, query: ASTNode | str = None, session=None) -> DataHubResponse: def create_table( self, table_name: Identifier, result_set=None, is_replace=False, params=None, is_create=None, **kwargs - ) -> DataHubResponse: + ) -> OkResponse: # is_create - create table # is_replace - drop table if exists # is_create==False and is_replace==False: just insert @@ -195,9 +193,6 @@ def create_table( kb_table.clear() df = result_set.to_df() - result_df = kb_table.insert(df, params=params) - if isinstance(result_df, pd.DataFrame): - return DataHubResponse(data_frame=result_df) - return DataHubResponse() - + kb_table.insert(df, params=params) + return OkResponse() raise ValueError(f"Table or Knowledge Base '{table_name}' doesn't exist") diff --git a/mindsdb/api/executor/planner/plan_join.py b/mindsdb/api/executor/planner/plan_join.py index 2174d6366ee..00d0942c0ef 100644 --- a/mindsdb/api/executor/planner/plan_join.py +++ b/mindsdb/api/executor/planner/plan_join.py @@ -20,6 +20,7 @@ from mindsdb.api.executor.planner.exceptions import PlanningException from mindsdb.api.executor.planner.steps import ( FetchDataframeStep, + FetchDataframeStepPartition, JoinStep, ApplyPredictorStep, SubSelectStep, @@ -276,21 +277,32 @@ def _check_node_condition(node, **kwargs): self.query_context["binary_ops"] = binary_ops def check_use_limit(self, query_in, join_sequence): - # use limit for first table? - # if only models + # if only models (predictors), not for regular table joins use_limit = False - if query_in.having is None or query_in.group_by is None and query_in.limit is not None: - join = None + optimize_inner_join = False + if query_in.having is None and query_in.group_by is None and query_in.limit is not None: use_limit = True + + # Check what we're joining + has_predictor = False + for item in join_sequence: if isinstance(item, TableInfo): - if item.predictor_info is None and item.sub_select is None: - if join is not None: - if join.join_type.upper() != "LEFT JOIN": - use_limit = False - elif isinstance(item, Join): - join = item + if item.predictor_info is not None: + has_predictor = True + elif isinstance(item, Join) and not has_predictor: + # LEFT JOIN preserves left table row count - LIMIT pushdown is safe + join_type = str(item.join_type).upper() if item.join_type else "" + if join_type in ("LEFT JOIN", "LEFT OUTER JOIN"): + continue + + if query_in.offset is None: + optimize_inner_join = True + continue + use_limit = False + self.query_context["use_limit"] = use_limit + self.query_context["optimize_inner_join"] = optimize_inner_join def plan_join_tables(self, query_in): # plan all nested selects in 'where' @@ -354,6 +366,7 @@ def _check_identifiers(node, is_table, **kwargs): col_parts.append(node.parts[-1]) node.parts = col_parts + query.cte = None # already used before query_traversal(query, _check_identifiers) self.check_query_conditions(query) @@ -367,6 +380,8 @@ def _check_identifiers(node, is_table, **kwargs): # create plan # TODO add optimization: one integration without predictor + planned_steps_before_join = len(self.planner.plan.steps) + self.step_stack = [] for item in join_sequence: if isinstance(item, TableInfo): @@ -395,9 +410,51 @@ def _check_identifiers(node, is_table, **kwargs): query_in.where = query.where + if self.query_context["optimize_inner_join"]: + self.planner.plan.steps = self.optimize_inner_join(self.planner.plan.steps, planned_steps_before_join) + self.close_partition() return self.step_stack.pop() + def optimize_inner_join(self, steps_in, min_step_num): + steps_out = [] + + partition_step = None + partition_used = False + + for i, step in enumerate(steps_in): + if partition_step is None: + if ( + i >= min_step_num + and isinstance(step, FetchDataframeStep) + and not partition_used + and step.query.limit is not None + ): + limit = step.query.limit.value + step.query.limit = None + partition_used = True + + partition_step = FetchDataframeStepPartition( + step_num=step.step_num, + integration=step.integration, + query=step.query, + raw_query=step.raw_query, + params=step.params, + condition={"limit": limit}, + ) + steps_out.append(partition_step) + continue + + elif isinstance(step, (JoinStep, FetchDataframeStep, SubSelectStep)): + partition_step.steps.append(step) + continue + else: + partition_step = None + + steps_out.append(step) + + return steps_out + def process_subselect(self, item): # is sub select item.sub_select.alias = None diff --git a/mindsdb/api/executor/sql_query/result_set.py b/mindsdb/api/executor/sql_query/result_set.py index 4d037af7bff..f3b22e13e63 100644 --- a/mindsdb/api/executor/sql_query/result_set.py +++ b/mindsdb/api/executor/sql_query/result_set.py @@ -1,7 +1,6 @@ import copy from array import array -from typing import Any -from dataclasses import dataclass, field, MISSING +from typing import Any, Generator import numpy as np import pandas as pd @@ -11,8 +10,10 @@ from mindsdb_sql_parser.ast import TableColumn from mindsdb.utilities import log +from mindsdb.utilities.types.column import Column from mindsdb.api.executor.exceptions import WrongArgumentError from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE +from mindsdb.integrations.libs.response import TableResponse logger = log.getLogger(__name__) @@ -57,31 +58,6 @@ def _dump_vector(value: Any) -> Any: return value -@dataclass(kw_only=True, slots=True) -class Column: - name: str = field(default=MISSING) - alias: str | None = None - table_name: str | None = None - table_alias: str | None = None - type: MYSQL_DATA_TYPE | None = None - database: str | None = None - flags: dict = None - charset: str | None = None - - def __post_init__(self): - if self.alias is None: - self.alias = self.name - if self.table_alias is None: - self.table_alias = self.table_name - - def get_hash_name(self, prefix): - table_name = self.table_name if self.table_alias is None else self.table_alias - name = self.name if self.alias is None else self.alias - - name = f"{prefix}_{table_name}_{name}" - return name - - def rename_df_columns(df: pd.DataFrame, names: list | None = None) -> None: """Inplace rename of dataframe columns @@ -104,6 +80,7 @@ def __init__( affected_rows: int | None = None, is_prediction: bool = False, mysql_types: list[MYSQL_DATA_TYPE] | None = None, + table_response: TableResponse = None, ): """ Args: @@ -112,9 +89,13 @@ def __init__( df (pd.DataFrame): injected dataframe, have to have enumerated columns and length equal to columns affected_rows (int): number of affected rows """ - if columns is None: - columns = [] - self._columns = columns + self._table_response: TableResponse = table_response + if table_response: + self._columns = table_response.columns + elif columns is None: + self._columns = [] + else: + self._columns = columns if df is None: if values is None: @@ -132,15 +113,19 @@ def __init__( def __repr__(self): col_names = ", ".join([col.name for col in self._columns]) + if self._table_response is not None: + return f"{self.__class__.__name__}(table response, cols: {col_names})" return f"{self.__class__.__name__}({self.length()} rows, cols: {col_names})" def __len__(self) -> int: + self._load_table_response() if self._df is None: return 0 return len(self._df) def __getitem__(self, slice_val): # return resultSet with sliced dataframe + self._load_table_response() df = self._df[slice_val] return ResultSet(columns=self.columns, df=df) @@ -170,6 +155,10 @@ def from_df( rename_df_columns(df) return cls(df=df, columns=columns, is_prediction=is_prediction, mysql_types=mysql_types) + @classmethod + def from_table_response(cls, table_response): + return cls(table_response=table_response) + @classmethod def from_df_cols(cls, df: pd.DataFrame, columns_dict: dict[str, Column], strict: bool = True) -> "ResultSet": """Create ResultSet from dataframe and dictionary of columns @@ -251,6 +240,7 @@ def get_col_index(self, col): return col_idx def add_column(self, col, values=None): + self._load_table_response() self._columns.append(col) col_idx = len(self._columns) - 1 @@ -259,6 +249,7 @@ def add_column(self, col, values=None): return col_idx def del_column(self, col): + self._load_table_response() idx = self.get_col_index(col) self._columns.pop(idx) @@ -296,27 +287,56 @@ def copy_column_to(self, col, result_set2): return col2 def set_col_type(self, col_idx, type_name): + self._load_table_response() self.columns[col_idx].type = type_name if self._df is not None: self._df[col_idx] = self._df[col_idx].astype(type_name) # --- records --- + def _load_table_response(self): + """Fully load the table response by fetching all data from the table response and storing it in the _df attribute.""" + if self._table_response is None: + return + + self._table_response.fetchall() + if self._df is None: + self._df = self._table_response._data + else: + self._df = pd.concat([self._df, self._table_response._data]) + self._table_response = None + + def stream_data(self) -> Generator[pd.DataFrame, None, None]: + """Stream data from the result set. + + Yields: + pd.DataFrame: Data frame. + """ + if self._df is not None: + yield self._df + else: + for el in self._table_response.iterate_no_save(): + yield el + def get_raw_df(self): + self._load_table_response() + names = range(len(self._columns)) if self._df is None: - names = range(len(self._columns)) return pd.DataFrame([], columns=names) + self._df.columns = names return self._df def add_raw_df(self, df): if len(df.columns) != len(self._columns): raise WrongArgumentError(f"Record length mismatch columns length: {len(df.columns)} != {len(self.columns)}") + self._load_table_response() rename_df_columns(df) if self._df is None: self._df = df else: + rename_df_columns(self._df) self._df = pd.concat([self._df, df], ignore_index=True) def add_raw_values(self, values): @@ -341,6 +361,7 @@ def get_ast_columns(self) -> list[TableColumn]: list[TableColumn]: A list of TableColumn objects with properly mapped SQLAlchemy types """ columns: list[TableColumn] = [] + self._load_table_response() type_mapping = { MYSQL_DATA_TYPE.TINYINT: sqlalchemy_types.INTEGER, @@ -382,6 +403,7 @@ def to_lists(self, json_types=False): array->list, datetime64->str :return: list of lists """ + self._load_table_response() if len(self.get_raw_df()) == 0: return [] @@ -408,6 +430,7 @@ def get_column_values(self, col_idx): def set_column_values(self, col_name, values): # values is one value or list of values + self._load_table_response() cols = self.find_columns(col_name) if len(cols) == 0: col_idx = self.add_column(Column(name=col_name)) @@ -424,7 +447,7 @@ def add_from_result_set(self, rs): for name in self.get_column_names(): col_sequence.append(source_names.index(name)) - raw_df = rs.get_raw_df()[col_sequence] + raw_df = rs.get_raw_df().iloc[:, col_sequence] self.add_raw_df(raw_df) diff --git a/mindsdb/api/executor/sql_query/sql_query.py b/mindsdb/api/executor/sql_query/sql_query.py index 7adecf15a86..0ec9e58a872 100644 --- a/mindsdb/api/executor/sql_query/sql_query.py +++ b/mindsdb/api/executor/sql_query/sql_query.py @@ -20,6 +20,8 @@ ApplyTimeseriesPredictorStep, ApplyPredictorRowStep, ApplyPredictorStep, + InsertToTable, + FetchDataframeStepPartition, ) from mindsdb.api.executor.planner.exceptions import PlanningException @@ -33,15 +35,15 @@ UnknownError, LogicError, ) +from mindsdb.interfaces.query_context.context_controller import query_context_controller import mindsdb.utilities.profiler as profiler from mindsdb.utilities.fs import create_process_mark, delete_process_mark from mindsdb.utilities.exception import EntityNotExistsError -from mindsdb.interfaces.query_context.context_controller import query_context_controller from mindsdb.utilities.context import context as ctx - +from mindsdb.utilities.types.column import Column from . import steps -from .result_set import ResultSet, Column +from .result_set import ResultSet from .steps.base import BaseStepCall @@ -276,6 +278,16 @@ def execute_query(self): ) if self.planner.plan.is_async and ctx.task_id is None: + # release KB locks before inserting in background + db_released, partition_params = self.release_kb_lock(steps) + if db_released: + # faiss db is used as a table to insert + if partition_params.get("threads", 1) > 1: + raise ValueError( + "It is not possible to use threads for FAISS knowledge base, " + f"please remove `threads={partition_params['threads']}` parameter" + ) + # add to task self.run_query.add_to_task() # return query info @@ -288,7 +300,7 @@ def execute_query(self): ctx.run_query_id = self.run_query.record.id - step_result = None + step_result: list[ResultSet] = None process_mark = None try: steps_classes = (x.__class__ for x in steps) @@ -302,7 +314,7 @@ def execute_query(self): except Exception as e: if self.run_query is not None: # set error and place where it stopped - self.run_query.on_error(e, step.step_num, self.steps_data) + self.run_query.on_error(e, step.step_num if "step" in locals() else -1, self.steps_data) raise e else: # mark running query as completed @@ -323,10 +335,6 @@ def execute_query(self): self.fetched_data = step_result try: - if hasattr(self, "columns_list") is False: - # how it becomes False? - self.columns_list = self.fetched_data.columns - if self.columns_list is None: self.columns_list = self.fetched_data.columns @@ -344,5 +352,21 @@ def execute_step(self, step, steps_data=None): return handler(self, steps_data=steps_data).call(step) + def release_kb_lock(self, steps): + # find knowledge bases that are used as tables to insert. + # then release locks of vector for these knowledge bases + # return partition step params and databases names that were unlocked + db_released, partition_params = [], {} + for step in steps: + if isinstance(step, InsertToTable): + db_name = self.session.kb_controller.release_lock(step.table, project_name=self.database) + if db_name: + db_released.append(db_name) + if isinstance(step, FetchDataframeStepPartition): + dbs, _ = self.release_kb_lock(step.steps) + db_released.extend(dbs) + partition_params.update(step.params) + return db_released, partition_params + SQLQuery.register_steps() diff --git a/mindsdb/api/executor/sql_query/steps/apply_predictor_step.py b/mindsdb/api/executor/sql_query/steps/apply_predictor_step.py index 50a0c646e41..a12e56f80fb 100644 --- a/mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +++ b/mindsdb/api/executor/sql_query/steps/apply_predictor_step.py @@ -19,7 +19,8 @@ ApplyPredictorStep, ) -from mindsdb.api.executor.sql_query.result_set import ResultSet, Column +from mindsdb.api.executor.sql_query.result_set import ResultSet +from mindsdb.utilities.types.column import Column from mindsdb.utilities.cache import get_cache, dataframe_checksum from .base import BaseStepCall diff --git a/mindsdb/api/executor/sql_query/steps/fetch_dataframe.py b/mindsdb/api/executor/sql_query/steps/fetch_dataframe.py index b81215b01cb..d73666e49e3 100644 --- a/mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +++ b/mindsdb/api/executor/sql_query/steps/fetch_dataframe.py @@ -11,12 +11,12 @@ ) from mindsdb.api.executor.planner.steps import FetchDataframeStep -from mindsdb.api.executor.datahub.classes.response import DataHubResponse from mindsdb.api.executor.sql_query.result_set import ResultSet from mindsdb.api.executor.planner.step_result import Result from mindsdb.api.executor.exceptions import UnknownError -from mindsdb.integrations.utilities.query_traversal import query_traversal from mindsdb.interfaces.query_context.context_controller import query_context_controller +from mindsdb.integrations.utilities.query_traversal import query_traversal +from mindsdb.integrations.libs.response import TableResponse from .base import BaseStepCall @@ -92,7 +92,7 @@ def call(self, step): if query is None: table_alias = (self.context.get("database"), "result", "result") - response: DataHubResponse = dn.query(step.raw_query, session=self.session) + response: TableResponse = dn.query(step.raw_query, session=self.session) df = response.data_frame else: if isinstance(step.query, (Union, Intersect)): @@ -108,11 +108,15 @@ def call(self, step): query, context_callback = query_context_controller.handle_db_context_vars(query, dn, self.session) - response: DataHubResponse = dn.query(query=query, session=self.session) - df = response.data_frame - + response: TableResponse = dn.query(query=query, session=self.session) + response.set_columns_attrs( + table_name=table_alias[1], + table_alias=table_alias[2], + database=table_alias[0], + ) if context_callback: - context_callback(df, response.columns) + context_callback(response.data_frame, response.columns) + return ResultSet.from_table_response(response) # if query registered, set progress if self.sql_query.run_query is not None: @@ -122,5 +126,5 @@ def call(self, step): table_name=table_alias[1], table_alias=table_alias[2], database=table_alias[0], - mysql_types=response.mysql_types, + mysql_types=[column.type for column in response.columns], ) diff --git a/mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py b/mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py index 30de48b9442..9775a2867e9 100644 --- a/mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +++ b/mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py @@ -1,3 +1,4 @@ +import time import copy import pandas as pd from typing import List @@ -90,10 +91,14 @@ def call(self, step: FetchDataframeStepPartition) -> ResultSet: use_threads = False on_error = step.params.get("error", "raise") - if use_threads: - return self.fetch_threads(run_query, query, thread_count=thread_count, on_error=on_error) - else: - return self.fetch_iterate(run_query, query, on_error=on_error) + try: + if use_threads: + return self.fetch_threads(run_query, query, thread_count=thread_count, on_error=on_error) + else: + return self.fetch_iterate(run_query, query, on_error=on_error) + finally: + # release KB locks after inserting in background + self.sql_query.release_kb_lock(self.substeps) def repeat_till_reach_limit(self, step, limit): first_table_limit = limit * 2 @@ -105,6 +110,7 @@ def repeat_till_reach_limit(self, step, limit): query, context_callback = query_context_controller.handle_db_context_vars(query, dn, self.session) try_num = 1 + started_at = time.time() while True: self.substeps = copy.deepcopy(step.steps) query2 = copy.deepcopy(query) @@ -126,7 +132,8 @@ def repeat_till_reach_limit(self, step, limit): result = result[:limit] break - if try_num > 3: + # break if process is too long or to many tries + if try_num > 3 or time.time() - started_at > 5: # the last try without the limit first_table_limit = None continue diff --git a/mindsdb/api/executor/sql_query/steps/insert_step.py b/mindsdb/api/executor/sql_query/steps/insert_step.py index e27994f9123..843345042a3 100644 --- a/mindsdb/api/executor/sql_query/steps/insert_step.py +++ b/mindsdb/api/executor/sql_query/steps/insert_step.py @@ -1,7 +1,8 @@ from mindsdb_sql_parser.ast import Identifier, Function from mindsdb.api.executor.planner.steps import SaveToTable, InsertToTable, CreateTableStep -from mindsdb.api.executor.sql_query.result_set import ResultSet, Column +from mindsdb.api.executor.sql_query.result_set import ResultSet +from mindsdb.utilities.types.column import Column from mindsdb.utilities.exception import EntityNotExistsError from mindsdb.api.executor.exceptions import NotSupportedYet, LogicError from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES @@ -100,10 +101,6 @@ def call(self, step): response = dn.create_table( table_name=table_name, result_set=data, is_replace=is_replace, is_create=is_create, params=step.params ) - if response.data_frame is not None: - cols = [Column(name=col_name) for col_name in response.data_frame.columns] - values = response.data_frame.values.tolist() - return ResultSet(affected_rows=response.affected_rows, columns=cols, values=values) return ResultSet(affected_rows=response.affected_rows) diff --git a/mindsdb/api/executor/sql_query/steps/prepare_steps.py b/mindsdb/api/executor/sql_query/steps/prepare_steps.py index b846d4f66b2..7b2950a8e5f 100644 --- a/mindsdb/api/executor/sql_query/steps/prepare_steps.py +++ b/mindsdb/api/executor/sql_query/steps/prepare_steps.py @@ -9,18 +9,18 @@ GetTableColumns, ) -from mindsdb.api.executor.sql_query.result_set import ResultSet, Column +from mindsdb.api.executor.sql_query.result_set import ResultSet +from mindsdb.utilities.types.column import Column from mindsdb.utilities.config import config from .base import BaseStepCall class GetPredictorColumnsCall(BaseStepCall): - bind = GetPredictorColumns def call(self, step): - mindsdb_database_name = config.get('default_project') + mindsdb_database_name = config.get("default_project") predictor_name = step.predictor.parts[-1] dn = self.session.datahub.get(mindsdb_database_name) @@ -28,20 +28,14 @@ def call(self, step): data = ResultSet() for column_name in columns_names: - data.add_column(Column( - name=column_name, - table_name=predictor_name, - database=mindsdb_database_name - )) + data.add_column(Column(name=column_name, table_name=predictor_name, database=mindsdb_database_name)) return data class GetTableColumnsCall(BaseStepCall): - bind = GetTableColumns def call(self, step): - table = step.table dn = self.session.datahub.get(step.namespace) ds_query = Select(from_table=Identifier(table), targets=[Star()], limit=Constant(0)) @@ -50,10 +44,12 @@ def call(self, step): data = ResultSet() for column in response.columns: - data.add_column(Column( - name=column['name'], - type=column.get('type'), - table_name=table, - database=self.context.get('database') - )) + data.add_column( + Column( + name=column["name"], + type=column.get("type"), + table_name=table, + database=self.context.get("database"), + ) + ) return data diff --git a/mindsdb/api/executor/sql_query/steps/subselect_step.py b/mindsdb/api/executor/sql_query/steps/subselect_step.py index f99b625f7aa..06050f9b91d 100644 --- a/mindsdb/api/executor/sql_query/steps/subselect_step.py +++ b/mindsdb/api/executor/sql_query/steps/subselect_step.py @@ -16,7 +16,8 @@ from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import SERVER_VARIABLES from mindsdb.api.executor.planner.step_result import Result from mindsdb.api.executor.planner.steps import SubSelectStep, QueryStep -from mindsdb.api.executor.sql_query.result_set import ResultSet, Column +from mindsdb.api.executor.sql_query.result_set import ResultSet +from mindsdb.utilities.types.column import Column from mindsdb.api.executor.utilities.sql import query_df from mindsdb.api.executor.exceptions import KeyColumnDoesNotExist from mindsdb.integrations.utilities.query_traversal import query_traversal @@ -298,6 +299,8 @@ def check_fields(node, is_target=None, **kwargs): "version for the right syntax to use near '$$' at line 1" ) + key, column_quoted = (), False + match node.parts, node.is_quoted: case [column_name], [column_quoted]: if column_name in aliases: diff --git a/mindsdb/api/executor/utilities/sql.py b/mindsdb/api/executor/utilities/sql.py index 48091e5fff1..f02a9e02d67 100644 --- a/mindsdb/api/executor/utilities/sql.py +++ b/mindsdb/api/executor/utilities/sql.py @@ -245,12 +245,30 @@ def query_dfs(dataframes, query_ast, session=None): else: user_functions = None + # region collect table aliases. Strip schema/db prefix from column identifiers, but keep table aliases. + # Examples: + # files.col = 1 -> col = 1 (schema prefix stripped) + # files.a1.col = 1 -> a1.col = 1 (schema prefix stripped, alias kept) + # a1.col = a2.col -> a1.col = a2.col (aliases untouched, no schema prefix) + # "Custom SQL Query".col -> col (replaced subquery alias stripped) + known_aliases = set() + + def collect_aliases(node, is_table, **kwargs): + if not is_table or not isinstance(node, Identifier): + return + known_aliases.add(node.parts[-1].lower()) + if node.alias is not None: + known_aliases.add(node.alias.parts[-1].lower()) + + query_traversal(query_ast, collect_aliases) + # endregion + def adapt_query(node, is_table, **kwargs): if is_table: return if isinstance(node, Identifier): - if len(node.parts) > 1: - node.parts = [node.parts[-1]] + if len(node.parts) > 1 and node.parts[0].lower() not in known_aliases: + node.parts = node.parts[1:] return node if isinstance(node, Function): fnc = mysql_to_duckdb_fnc(node) diff --git a/mindsdb/api/http/initialize.py b/mindsdb/api/http/initialize.py index 66978cb69c5..f0d72aef0bb 100644 --- a/mindsdb/api/http/initialize.py +++ b/mindsdb/api/http/initialize.py @@ -28,6 +28,7 @@ from mindsdb.api.http.namespaces.default import ns_conf as default_ns, check_session_auth from mindsdb.api.http.namespaces.file import ns_conf as file_ns from mindsdb.api.http.namespaces.handlers import ns_conf as handlers_ns +from mindsdb.api.http.namespaces.integrations import ns_conf as integrations_ns from mindsdb.api.http.namespaces.knowledge_bases import ns_conf as knowledge_bases_ns from mindsdb.api.http.namespaces.models import ns_conf as models_ns from mindsdb.api.http.namespaces.projects import ns_conf as projects_ns @@ -280,6 +281,7 @@ def root_index(path): agents_ns, jobs_ns, knowledge_bases_ns, + integrations_ns, ] for ns in protected_namespaces: diff --git a/mindsdb/api/http/namespaces/agents.py b/mindsdb/api/http/namespaces/agents.py index a57c4c09dc2..3a7d6612499 100644 --- a/mindsdb/api/http/namespaces/agents.py +++ b/mindsdb/api/http/namespaces/agents.py @@ -28,14 +28,16 @@ def create_agent(project_name, name, agent): if name is None: return http_error(HTTPStatus.BAD_REQUEST, "Missing field", 'Missing "name" field for agent') - model_name = agent.get("model_name") - provider = agent.get("provider") - params = agent.get("params", {}) + if agent.get("model"): + model = agent["model"] + elif "model_name" in agent: + model = {"model_name": agent.get("model_name"), "provider": agent.get("provider")} + else: + model = None + if agent.get("data"): params["data"] = agent["data"] - if agent.get("model"): - params["model"] = agent["model"] if agent.get("prompt_template"): params["prompt_template"] = agent["prompt_template"] @@ -54,23 +56,21 @@ def create_agent(project_name, name, agent): ) try: - created_agent = agents_controller.add_agent( - name=name, project_name=project_name, model_name=model_name, provider=provider, params=params - ) + created_agent = agents_controller.add_agent(name=name, project_name=project_name, model=model, params=params) return created_agent.as_dict(), HTTPStatus.CREATED except (ValueError, EntityExistsError): # Model doesn't exist. return http_error( HTTPStatus.NOT_FOUND, "Resource not found", - f'The model "{model_name}" does not exist. Please ensure that the name is correct and try again.', + f'The model "{model}" does not exist. Please ensure that the name is correct and try again.', ) except NotImplementedError: # Free users trying to create agent. return http_error( HTTPStatus.UNAUTHORIZED, "Unavailable to free users", - f'The model "{model_name}" does not exist. Please ensure that the name is correct and try again.', + f'The model "{model}" does not exist. Please ensure that the name is correct and try again.', ) @@ -174,13 +174,17 @@ def put(self, project_name, agent_name): # Update try: - model_name = agent.get("model_name", None) - provider = agent.get("provider") params = agent.get("params", {}) + + if agent.get("model"): + model = agent["model"] + elif "model_name" in agent: + model = {"model_name": agent.get("model_name"), "provider": agent.get("provider")} + else: + model = None + if agent.get("data"): params["data"] = agent["data"] - if agent.get("model"): - params["model"] = agent["model"] if agent.get("prompt_template"): params["prompt_template"] = agent["prompt_template"] @@ -188,8 +192,7 @@ def put(self, project_name, agent_name): agent_name, project_name=project_name, name=name, - model_name=model_name, - provider=provider, + model=model, params=params, ) diff --git a/mindsdb/api/http/namespaces/config.py b/mindsdb/api/http/namespaces/config.py index b31e8d9b293..da4412b7891 100644 --- a/mindsdb/api/http/namespaces/config.py +++ b/mindsdb/api/http/namespaces/config.py @@ -16,6 +16,10 @@ from mindsdb.utilities.functions import decrypt, encrypt from mindsdb.utilities.config import Config from mindsdb.integrations.libs.response import HandlerStatusResponse +from mindsdb.interfaces.knowledge_base.default_storage_resolver import ( + get_env_available_engines, + resolve_default_storage_engines, +) logger = log.getLogger(__name__) @@ -34,6 +38,11 @@ def get(self): if value is not None: resp[key] = value + knowledge_bases_config = copy.deepcopy(config["knowledge_bases"]) + knowledge_bases_config.update(resolve_default_storage_engines(config)) + knowledge_bases_config["engines"] = get_env_available_engines() + resp["knowledge_bases"] = knowledge_bases_config + api_status = get_api_status() api_configs = copy.deepcopy(config["api"]) for api_name, api_config in api_configs.items(): @@ -47,12 +56,18 @@ def get(self): def put(self): data = request.json - allowed_arguments = {"auth", "default_llm", "default_embedding_model", "default_reranking_model"} + allowed_arguments = { + "auth", + "default_llm", + "default_embedding_model", + "default_reranking_model", + "knowledge_bases", + } unknown_arguments = list(set(data.keys()) - allowed_arguments) if len(unknown_arguments) > 0: return http_error(HTTPStatus.BAD_REQUEST, "Wrong arguments", f"Unknown argumens: {unknown_arguments}") - nested_keys_to_validate = {"auth"} + nested_keys_to_validate = {"auth", "knowledge_bases"} for key in data.keys(): if key in nested_keys_to_validate: unknown_arguments = list(set(data[key].keys()) - set(Config()[key].keys())) diff --git a/mindsdb/api/http/namespaces/configs/integrations.py b/mindsdb/api/http/namespaces/configs/integrations.py new file mode 100644 index 00000000000..14d3fab27fc --- /dev/null +++ b/mindsdb/api/http/namespaces/configs/integrations.py @@ -0,0 +1,3 @@ +from flask_restx import Namespace + +ns_conf = Namespace("integrations", description="API for integration-level operations (passthrough, capabilities)") diff --git a/mindsdb/api/http/namespaces/databases.py b/mindsdb/api/http/namespaces/databases.py index 895bc59656f..f5a75f6bc73 100644 --- a/mindsdb/api/http/namespaces/databases.py +++ b/mindsdb/api/http/namespaces/databases.py @@ -69,7 +69,7 @@ def post(self): status = HandlerStatusResponse(success=False, error_message=str(import_error)) if status.success is not True: - if hasattr(status, "redirect_url") and isinstance(status, str): + if hasattr(status, "redirect_url") and isinstance(status.redirect_url, str): return { "status": "redirect_required", "redirect_url": status.redirect_url, @@ -136,7 +136,7 @@ def post(self): shutil.rmtree(temp_dir) if not status.success: - if hasattr(status, "redirect_url") and isinstance(status, str): + if hasattr(status, "redirect_url") and isinstance(status.redirect_url, str): return { "status": "redirect_required", "redirect_url": status.redirect_url, diff --git a/mindsdb/api/http/namespaces/default.py b/mindsdb/api/http/namespaces/default.py index cdcf39d387f..4b2e0940ba5 100644 --- a/mindsdb/api/http/namespaces/default.py +++ b/mindsdb/api/http/namespaces/default.py @@ -65,7 +65,10 @@ def post(self): session.permanent = True if config["auth"]["http_auth_type"] in (HTTP_AUTH_TYPE.TOKEN, HTTP_AUTH_TYPE.SESSION_OR_TOKEN): - response["token"] = generate_pat() + if config["auth"]["token"]: + response["token"] = config["auth"]["token"] + else: + response["token"] = generate_pat() return response, 200 diff --git a/mindsdb/api/http/namespaces/file.py b/mindsdb/api/http/namespaces/file.py index 26995a19a0a..c93562b5a9f 100644 --- a/mindsdb/api/http/namespaces/file.py +++ b/mindsdb/api/http/namespaces/file.py @@ -219,7 +219,12 @@ def on_file(file): with requests.get(url, stream=True) as r: if r.status_code != 200: return http_error(400, "Error getting file", f"Got status code: {r.status_code}") - file_path = os.path.join(temp_dir_path, data["file"]) + + temp_dir_real = os.path.realpath(temp_dir_path) + file_path = os.path.realpath(os.path.join(temp_dir_real, data["file"])) + if os.path.commonpath([file_path, temp_dir_real]) != temp_dir_real: + return http_error(400, "Invalid file path", f"Wrong file name: {data['file']}") + with open(file_path, "wb") as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) @@ -234,23 +239,33 @@ def on_file(file): original_file_name = clear_filename(data.get("original_file_name")) file_path = os.path.join(temp_dir_path, data["file"]) + temp_dir_real = os.path.realpath(temp_dir_path) + file_path_real = os.path.realpath(file_path) + if os.path.commonpath([file_path_real, temp_dir_real]) != temp_dir_real: + shutil.rmtree(temp_dir_path, ignore_errors=True) + return http_error(400, "Invalid file path", f"Wrong file name: {data['file']}") + file_path = file_path_real lp = file_path.lower() if lp.endswith((".zip", ".tar.gz")): - if lp.endswith(".zip"): - with zipfile.ZipFile(file_path) as f: - f.extractall(temp_dir_path) - elif lp.endswith(".tar.gz"): - with tarfile.open(file_path) as f: - safe_extract(f, temp_dir_path) + try: + if lp.endswith(".zip"): + with zipfile.ZipFile(file_path) as f: + safe_extract(f, temp_dir_path) + elif lp.endswith(".tar.gz"): + with tarfile.open(file_path) as f: + safe_extract(f, temp_dir_path) + except Exception as e: + shutil.rmtree(temp_dir_path, ignore_errors=True) + return http_error(500, "Error", str(e)) os.remove(file_path) files = os.listdir(temp_dir_path) if len(files) != 1: - os.rmdir(temp_dir_path) + shutil.rmtree(temp_dir_path, ignore_errors=True) return http_error(400, "Wrong content.", "Archive must contain only one data file.") file_path = os.path.join(temp_dir_path, files[0]) mindsdb_file_name = files[0] if not os.path.isfile(file_path): - os.rmdir(temp_dir_path) + shutil.rmtree(temp_dir_path, ignore_errors=True) return http_error(400, "Wrong content.", "Archive must contain data file in root.") try: diff --git a/mindsdb/api/http/namespaces/handlers.py b/mindsdb/api/http/namespaces/handlers.py index 352b1258ae8..551b48ab7ca 100644 --- a/mindsdb/api/http/namespaces/handlers.py +++ b/mindsdb/api/http/namespaces/handlers.py @@ -1,6 +1,6 @@ import os import tempfile -import importlib +import importlib.util as iutil import multipart from pathlib import Path from http import HTTPStatus @@ -31,7 +31,7 @@ def _resolve_handler_readme_path(handler_folder: str) -> Path: if handler_folder_name != handler_folder or ".." in handler_folder: raise ValueError(f"Handler folder '{handler_folder}' is invalid.") - mindsdb_path = Path(importlib.util.find_spec("mindsdb").origin).parent + mindsdb_path = Path(iutil.find_spec("mindsdb").origin).parent base_handlers_path = mindsdb_path.joinpath("integrations/handlers").resolve() readme_path = base_handlers_path.joinpath(handler_folder_name).joinpath("README.md").resolve() @@ -54,6 +54,8 @@ def get(self): handlers = ca.integration_controller.get_handlers_import_status() result = [] for handler_type, handler_meta in handlers.items(): + if handler_meta is None: + continue # remove non-integration handlers if handler_type not in ["utilities", "dummy_data"]: row = {"name": handler_type} @@ -72,12 +74,19 @@ def get(self, handler_name): handler_meta = ca.integration_controller.get_handlers_metadata().get(handler_name) if handler_meta is None: return http_error(HTTPStatus.NOT_FOUND, "Icon not found", f"Icon for {handler_name} not found") - icon_name = handler_meta["icon"]["name"] - handler_folder = handler_meta["import"]["folder"] - mindsdb_path = Path(importlib.util.find_spec("mindsdb").origin).parent - icon_path = mindsdb_path.joinpath("integrations/handlers").joinpath(handler_folder).joinpath(icon_name) - if icon_path.is_absolute() is False: - icon_path = Path(os.getcwd()).joinpath(icon_path) + icon = handler_meta.get("icon") + if icon is None or handler_meta.get("path") is None: + return http_error( + HTTPStatus.NOT_FOUND, + "Icon not found", + f"Icon for '{handler_name}' not found", + ) + icon_name = icon["name"] + # Use the stored handler path directly so community handlers + # (stored outside the mindsdb package) are also served correctly. + icon_path = handler_meta["path"] / icon_name + if not icon_path.is_absolute(): + icon_path = Path(os.getcwd()) / icon_path except Exception: error_message = f"Icon for '{handler_name}' not found" logger.warning(error_message) @@ -92,10 +101,12 @@ class HandlerInfo(Resource): @api_endpoint_metrics("GET", "/handlers/handler") def get(self, handler_name): handler_meta = ca.integration_controller.get_handler_meta(handler_name) + if handler_meta is None: + return http_error(HTTPStatus.NOT_FOUND, "Handler not found", f"Handler '{handler_name}' not found") row = {"name": handler_name} row.update(handler_meta) - del row["path"] - del row["icon"] + row.pop("path", None) + row.pop("icon", None) return row @@ -127,12 +138,18 @@ def make_response(*, error_message=None, readme=None): logger.warning(error_message) return make_response(error_message=error_message) - try: - readme_path = _resolve_handler_readme_path(handler_folder) - except ValueError as exc: - error_message = str(exc) - logger.warning(error_message) - return make_response(error_message=error_message) + # Community handlers have their path set after fetching; use it directly. + # Built-in handlers resolve through the package tree. + handler_path = handler_meta.get("path") + if handler_path is not None: + readme_path = Path(handler_path) / "README.md" + else: + try: + readme_path = _resolve_handler_readme_path(handler_folder) + except ValueError as exc: + error_message = str(exc) + logger.warning(error_message) + return make_response(error_message=error_message) try: with open(readme_path, "r", encoding="utf-8") as readme_file: diff --git a/mindsdb/api/http/namespaces/integrations.py b/mindsdb/api/http/namespaces/integrations.py new file mode 100644 index 00000000000..e3fb7836ca8 --- /dev/null +++ b/mindsdb/api/http/namespaces/integrations.py @@ -0,0 +1,174 @@ +from http import HTTPStatus + +from flask import request +from flask_restx import Resource + +from mindsdb.api.http.utils import http_error +from mindsdb.api.http.namespaces.configs.integrations import ns_conf +from mindsdb.api.mysql.mysql_proxy.classes.fake_mysql_proxy import FakeMysqlProxy +from mindsdb.integrations.libs.passthrough import PassthroughProtocol +from mindsdb.integrations.libs.passthrough_types import ( + ALLOWED_METHODS, + FORBIDDEN_REQUEST_HEADERS, + PassthroughError, + PassthroughNotSupportedError, + PassthroughRequest, + PassthroughResponse, + PassthroughValidationError, +) +from mindsdb.interfaces.database.integrations import integration_controller +from mindsdb.metrics.metrics import api_endpoint_metrics +from mindsdb.utilities import log + +logger = log.getLogger(__name__) + + +def _handler_supports_passthrough(handler_module) -> bool: + handler_cls = getattr(handler_module, "Handler", None) + if handler_cls is None: + return False + # issubclass is the right check for Protocol when classes define the + # methods as real methods (not just dynamic attrs); runtime_checkable + # Protocols support issubclass in that mode. + try: + return issubclass(handler_cls, PassthroughProtocol) + except TypeError: + return False + + +def _get_passthrough_handler(name: str): + """Look up the datasource's handler and verify it satisfies the contract.""" + proxy = FakeMysqlProxy() + handler = proxy.session.integration_controller.get_data_handler(name) + if not isinstance(handler, PassthroughProtocol): + raise PassthroughNotSupportedError(f"datasource '{name}' does not support REST passthrough") + return handler + + +def _parse_passthrough_request(payload: dict) -> PassthroughRequest: + if not isinstance(payload, dict): + raise PassthroughValidationError("request body must be a JSON object") + + method = payload.get("method") + path = payload.get("path") + if not isinstance(method, str) or method.upper() not in ALLOWED_METHODS: + raise PassthroughValidationError(f"'method' must be one of {sorted(ALLOWED_METHODS)}") + if not isinstance(path, str) or not path.startswith("/"): + raise PassthroughValidationError("'path' must be a string starting with '/'") + + headers = payload.get("headers") or {} + if not isinstance(headers, dict): + raise PassthroughValidationError("'headers' must be an object") + for name in headers: + if not isinstance(name, str): + raise PassthroughValidationError("header names must be strings") + if name.lower() in FORBIDDEN_REQUEST_HEADERS or name.lower().startswith("proxy-"): + raise PassthroughValidationError(f"header '{name}' is not allowed in passthrough requests") + + query = payload.get("query") or {} + if not isinstance(query, dict): + raise PassthroughValidationError("'query' must be an object") + + return PassthroughRequest( + method=method.upper(), + path=path, + query={str(k): str(v) for k, v in query.items()}, + headers={str(k): str(v) for k, v in headers.items()}, + body=payload.get("body"), + ) + + +def _serialize_response(resp: PassthroughResponse) -> dict: + return { + "status_code": resp.status_code, + "headers": resp.headers, + "body": resp.body, + "content_type": resp.content_type, + } + + +def _passthrough_error_response(err: PassthroughError): + return { + "error_code": err.error_code, + "message": str(err), + }, err.http_status + + +@ns_conf.route("//passthrough") +@ns_conf.param("name", "Datasource name") +class Passthrough(Resource): + @ns_conf.doc("passthrough") + @api_endpoint_metrics("POST", "/integrations/passthrough") + def post(self, name: str): + payload = request.json or {} + try: + req = _parse_passthrough_request(payload) + handler = _get_passthrough_handler(name) + response = handler.api_passthrough(req) + except PassthroughError as e: + return _passthrough_error_response(e) + except Exception as e: # noqa: BLE001 + logger.exception("passthrough failed for datasource %s", name) + return http_error(HTTPStatus.INTERNAL_SERVER_ERROR, "PassthroughError", str(e)) + + return _serialize_response(response), 200 + + +@ns_conf.route("//passthrough/test") +@ns_conf.param("name", "Datasource name") +class PassthroughTest(Resource): + @ns_conf.doc("passthrough_test") + @api_endpoint_metrics("POST", "/integrations/passthrough/test") + def post(self, name: str): + try: + handler = _get_passthrough_handler(name) + except PassthroughError as e: + return _passthrough_error_response(e) + except Exception as e: # noqa: BLE001 + logger.exception("passthrough test lookup failed for datasource %s", name) + return http_error(HTTPStatus.INTERNAL_SERVER_ERROR, "PassthroughError", str(e)) + + result = handler.test_passthrough() + return result, 200 + + +@ns_conf.route("/capabilities") +class Capabilities(Resource): + """Return structured passthrough capabilities per handler. + + The new ``handlers`` dict is the canonical shape callers should migrate + to. The legacy flat ``bearer_passthrough`` list is still populated for + backward compat β€” Minds can migrate on its own timeline. + """ + + @ns_conf.doc("integration_capabilities") + @api_endpoint_metrics("GET", "/integrations/capabilities") + def get(self): + handlers: dict[str, dict] = {} + bearer_engines: list[str] = [] + handler_modules = getattr(integration_controller, "handler_modules", {}) or {} + for engine, module in handler_modules.items(): + try: + if not _handler_supports_passthrough(module): + continue + handler_cls = getattr(module, "Handler", None) + # Read the declarative auth mode off the handler class. Default + # to "bearer" so protocol-only handlers that don't inherit the + # mixin still land in a sensible bucket. + auth_mode = getattr(handler_cls, "_auth_mode", "bearer") + handlers[engine] = { + "auth_modes": [auth_mode], + "operations": ["passthrough"], + } + if auth_mode == "bearer": + bearer_engines.append(engine) + except Exception: + # A broken handler module should not break the capabilities endpoint. + logger.debug("skipping handler %s during capability probe", engine, exc_info=True) + bearer_engines.sort() + return { + "handlers": handlers, + # TODO: remove in v2 once Minds has migrated to the `handlers` + # structured shape. Keep backward-compat for now. + "bearer_passthrough": bearer_engines, + }, 200 diff --git a/mindsdb/api/http/namespaces/knowledge_bases.py b/mindsdb/api/http/namespaces/knowledge_bases.py index ae7a0246cc4..ccddff6ff24 100644 --- a/mindsdb/api/http/namespaces/knowledge_bases.py +++ b/mindsdb/api/http/namespaces/knowledge_bases.py @@ -13,7 +13,6 @@ from mindsdb.integrations.utilities.rag.splitters.file_splitter import FileSplitter, FileSplitterConfig from mindsdb.interfaces.file.file_controller import FileController from mindsdb.interfaces.knowledge_base.preprocessing.constants import ( - DEFAULT_CONTEXT_DOCUMENT_LIMIT, DEFAULT_CRAWL_DEPTH, DEFAULT_WEB_FILTERS, DEFAULT_WEB_CRAWL_LIMIT, @@ -21,10 +20,8 @@ from mindsdb.interfaces.knowledge_base.preprocessing.document_loader import DocumentLoader from mindsdb.metrics.metrics import api_endpoint_metrics from mindsdb.interfaces.database.projects import ProjectController -from mindsdb.interfaces.knowledge_base.controller import KnowledgeBaseTable from mindsdb.utilities import log from mindsdb.utilities.exception import EntityNotExistsError, EntityExistsError -from mindsdb.integrations.utilities.rag.settings import DEFAULT_LLM_MODEL, DEFAULT_RAG_PROMPT_TEMPLATE logger = log.getLogger(__name__) @@ -298,121 +295,3 @@ def delete(self, project_name: str, knowledge_base_name: str): session_controller.kb_controller.delete(knowledge_base_name, project_name) return "", HTTPStatus.NO_CONTENT - - -def _handle_chat_completion(knowledge_base_table: KnowledgeBaseTable, request): - # Check for required parameters - query = request.json.get("query") - - llm_model = request.json.get("llm_model") - if llm_model is None: - logger.warning(f'Missing parameter "llm_model" in POST body, using default llm_model {DEFAULT_LLM_MODEL}') - - prompt_template = request.json.get("prompt_template") - if prompt_template is None: - logger.warning( - f'Missing parameter "prompt_template" in POST body, using default prompt template {DEFAULT_RAG_PROMPT_TEMPLATE}' - ) - - # Get retrieval config, if set - retrieval_config = request.json.get("retrieval_config", {}) - if not retrieval_config: - logger.warning("No retrieval config provided, using default retrieval config") - - # add llm model to retrieval config - if llm_model is not None: - retrieval_config["llm_model_name"] = llm_model - - # add prompt template to retrieval config - if prompt_template is not None: - retrieval_config["rag_prompt_template"] = prompt_template - - # add llm provider to retrieval config if set - llm_provider = request.json.get("model_provider") - if llm_provider is not None: - retrieval_config["llm_provider"] = llm_provider - - # build rag pipeline - rag_pipeline = knowledge_base_table.build_rag_pipeline(retrieval_config) - - # get response from rag pipeline - rag_response = rag_pipeline(query) - response = { - "message": {"content": rag_response.get("answer"), "context": rag_response.get("context"), "role": "assistant"} - } - - return response - - -def _handle_context_completion(knowledge_base_table: KnowledgeBaseTable, request): - # Used for semantic search. - query = request.json.get("query") - # Keyword search. - keywords = request.json.get("keywords") - # Metadata search. - metadata = request.json.get("metadata") - # Maximum amount of documents to return as context. - limit = request.json.get("limit", DEFAULT_CONTEXT_DOCUMENT_LIMIT) - - # Use default distance function & column names for ID, content, & metadata, to keep things simple. - hybrid_search_df = knowledge_base_table.hybrid_search(query, keywords=keywords, metadata=metadata) - - num_documents = len(hybrid_search_df.index) - context_documents = [] - for i in range(limit): - if i >= num_documents: - break - row = hybrid_search_df.iloc[i] - context_documents.append({"id": row["id"], "content": row["content"], "rank": row["rank"]}) - - return {"documents": context_documents} - - -@ns_conf.route("//knowledge_bases//completions") -@ns_conf.param("project_name", "Name of the project") -@ns_conf.param("knowledge_base_name", "Name of the knowledge_base") -class KnowledgeBaseCompletions(Resource): - @ns_conf.doc("knowledge_base_completions") - @api_endpoint_metrics("POST", "/knowledge_bases/knowledge_base/completions") - def post(self, project_name, knowledge_base_name): - """ - Add support for LLM generation on the response from knowledge base. Default completion type is 'chat' unless specified. - """ - if request.json.get("query") is None: - # "query" is used for semantic search for both completion types. - logger.error('Missing parameter "query" in POST body') - return http_error( - HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "query" parameter in POST body' - ) - - project_controller = ProjectController() - try: - project = project_controller.get(name=project_name) - except EntityNotExistsError: - # Project must exist. - logger.error("Project not found, please check the project name exists") - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - - session = SessionController() - # Check if knowledge base exists - table = session.kb_controller.get_table(knowledge_base_name, project.id) - if table is None: - logger.error("Knowledge Base not found, please check the knowledge base name exists") - return http_error( - HTTPStatus.NOT_FOUND, - "Knowledge Base not found", - f"Knowledge Base with name {knowledge_base_name} does not exist", - ) - - completion_type = request.json.get("type", "chat") - if completion_type == "context": - return _handle_context_completion(table, request) - if completion_type == "chat": - return _handle_chat_completion(table, request) - return http_error( - HTTPStatus.BAD_REQUEST, - "Invalid parameter", - f'Completion type must be one of: "context", "chat". Received {completion_type}', - ) diff --git a/mindsdb/api/http/namespaces/sql.py b/mindsdb/api/http/namespaces/sql.py index 85dee3aaf13..aa0e4d85b79 100644 --- a/mindsdb/api/http/namespaces/sql.py +++ b/mindsdb/api/http/namespaces/sql.py @@ -1,8 +1,9 @@ import time +from enum import Enum from http import HTTPStatus from collections import defaultdict -from flask import request +from flask import request, Response from flask_restx import Resource from mindsdb_sql_parser import parse_sql @@ -12,15 +13,12 @@ import mindsdb.utilities.profiler as profiler from mindsdb.api.http.utils import http_error from mindsdb.api.http.namespaces.configs.sql import ns_conf -from mindsdb.api.mysql.mysql_proxy.mysql_proxy import SQLAnswer from mindsdb.api.mysql.mysql_proxy.classes.fake_mysql_proxy import FakeMysqlProxy -from mindsdb.api.executor.data_types.response_type import ( - RESPONSE_TYPE as SQL_RESPONSE_TYPE, -) +from mindsdb.api.executor.data_types.sql_answer import SQLAnswer +from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE as SQL_RESPONSE_TYPE from mindsdb.api.executor.sql_query.result_set import ResultSet - -from mindsdb.integrations.utilities.query_traversal import query_traversal from mindsdb.api.executor.exceptions import ExecutorException, UnknownError +from mindsdb.integrations.utilities.query_traversal import query_traversal from mindsdb.metrics.metrics import api_endpoint_metrics from mindsdb.utilities import log from mindsdb.utilities.config import Config @@ -33,6 +31,12 @@ logger = log.getLogger(__name__) +class ReponseFormat(Enum): + DEFAULT = None + SSE = "sse" + JSONLINES = "jsonlines" + + @ns_conf.route("/query") @ns_conf.param("query", "Execute query") class Query(Resource): @@ -46,8 +50,15 @@ def post(self): start_time = time.time() query = request.json["query"] context = request.json.get("context", {}) + if "params" in request.json: ctx.params = request.json["params"] + + try: + response_format = ReponseFormat(request.json.get("response_format", None)) + except ValueError: + return http_error(HTTPStatus.BAD_REQUEST, "Invalid stream format", "Please provide a valid stream format.") + if isinstance(query, str) is False or isinstance(context, dict) is False: return http_error(HTTPStatus.BAD_REQUEST, "Wrong arguments", 'Please provide "query" with the request.') logger.debug(f"Incoming query: {query}") @@ -56,8 +67,6 @@ def post(self): profiler.enable() error_type = None - error_code = None - error_text = None error_traceback = None profiler.set_meta(query=query, api="http", environment=Config().get("environment")) @@ -78,76 +87,79 @@ def post(self): try: handler = mysql_proxy.session.integration_controller.get_data_handler(db) - result = handler.native_query(query) + raw_result = handler.native_query(query) except Exception as e: - query_response = {"type": "error", "error_code": 0, "error_message": str(e)} + error_type = "unexpected" + result = SQLAnswer( + resp_type=SQL_RESPONSE_TYPE.ERROR, + error_code=0, + error_message=str(e), + ) else: - if result.type == SQL_RESPONSE_TYPE.ERROR: - query_response = {"type": "error", "error_code": 0, "error_message": result.error_message} - elif result.type == SQL_RESPONSE_TYPE.OK: - query_response = {"type": "ok"} + if raw_result.type == SQL_RESPONSE_TYPE.ERROR: + # raw_result will be ErrorResponse. + error_type = "expected" + result = SQLAnswer( + resp_type=SQL_RESPONSE_TYPE.ERROR, + error_code=0, + error_message=raw_result.error_message, + ) + elif raw_result.type == SQL_RESPONSE_TYPE.OK: + result = SQLAnswer( + resp_type=SQL_RESPONSE_TYPE.OK, + error_code=0, + error_message=None, + ) else: - df = result.data_frame - result_set = ResultSet.from_df(df) - query_response = { - "type": "table", - "column_names": result_set.get_column_names(), - "data": result_set.to_lists(json_types=True), - } - - query_response["context"] = mysql_proxy.get_context() + # raw_result will be TableResponse. + result_set = ResultSet.from_table_response(raw_result) + result = SQLAnswer( + resp_type=SQL_RESPONSE_TYPE.TABLE, + result_set=result_set, + ) else: try: result: SQLAnswer = mysql_proxy.process_query(query) - query_response: dict = result.dump_http_response() except ExecutorException as e: # classified error error_type = "expected" - query_response = { - "type": SQL_RESPONSE_TYPE.ERROR, - "error_code": 0, - "error_message": str(e), - } + result = SQLAnswer( + resp_type=SQL_RESPONSE_TYPE.ERROR, + error_code=0, + error_message=str(e), + ) logger.warning(f"Error query processing: {e}") except QueryError as e: error_type = "expected" if e.is_expected else "unexpected" - query_response = { - "type": SQL_RESPONSE_TYPE.ERROR, - "error_code": 0, - "error_message": str(e), - } + result = SQLAnswer( + resp_type=SQL_RESPONSE_TYPE.ERROR, + error_code=0, + error_message=str(e), + ) if e.is_expected: logger.warning(f"Query failed due to expected reason: {e}") else: logger.exception("Error query processing:") - except UnknownError as e: - # unclassified + except (UnknownError, Exception) as e: error_type = "unexpected" - query_response = { - "type": SQL_RESPONSE_TYPE.ERROR, - "error_code": 0, - "error_message": str(e), - } + result = SQLAnswer( + resp_type=SQL_RESPONSE_TYPE.ERROR, + error_code=0, + error_message=str(e), + ) logger.exception("Error query processing:") - except Exception as e: - error_type = "unexpected" - query_response = { - "type": SQL_RESPONSE_TYPE.ERROR, - "error_code": 0, - "error_message": str(e), - } - logger.exception("Error query processing:") - - if query_response.get("type") == SQL_RESPONSE_TYPE.ERROR: - error_type = "expected" - error_code = query_response.get("error_code") - error_text = query_response.get("error_message") + context = mysql_proxy.get_context() - context = mysql_proxy.get_context() - - query_response["context"] = context + if response_format == ReponseFormat.JSONLINES: + query_response = result.stream_http_response_jsonlines(context=context) + query_response = Response(query_response, mimetype="application/jsonlines") + elif response_format == ReponseFormat.SSE: + query_response = result.stream_http_response_sse(context=context) + query_response = Response(query_response, mimetype="text/event-stream") + else: + query_response = result.dump_http_response(context=context), 200 hooks.after_api_query( company_id=ctx.company_id, @@ -156,21 +168,23 @@ def post(self): command=None, payload=query, error_type=error_type, - error_code=error_code, - error_text=error_text, + error_code=result.error_code, + error_text=result.error_message, traceback=error_traceback, ) end_time = time.time() - log_msg = f"SQL processed in {(end_time - start_time):.2f}s ({end_time:.2f}-{start_time:.2f}), result is {query_response['type']}" - if query_response["type"] is SQL_RESPONSE_TYPE.TABLE: - log_msg += f" ({len(query_response['data'])} rows), " - elif query_response["type"] is SQL_RESPONSE_TYPE.ERROR: - log_msg += f" ({query_response['error_message']}), " - log_msg += f"used handlers {ctx.used_handlers}" + log_msg = f"SQL processed in {(end_time - start_time):.2f}s ({end_time:.2f}-{start_time:.2f}), result is {result.type}, " + if result.type is SQL_RESPONSE_TYPE.TABLE and response_format is ReponseFormat.DEFAULT: + log_msg += f" one-piece result ({len(query_response[0]['data'])} rows), " + elif result.type is SQL_RESPONSE_TYPE.TABLE: + log_msg += f" {response_format} result, " + elif result.type is SQL_RESPONSE_TYPE.ERROR: + log_msg += f" ({result.error_message}), " + log_msg += f"used handlers: {ctx.used_handlers}" logger.debug(log_msg) - return query_response, 200 + return query_response @ns_conf.route("/charter") diff --git a/mindsdb/api/http/namespaces/tree.py b/mindsdb/api/http/namespaces/tree.py index e9e1ee25fa7..87e03225dfd 100644 --- a/mindsdb/api/http/namespaces/tree.py +++ b/mindsdb/api/http/namespaces/tree.py @@ -39,7 +39,8 @@ def get(self, db_name): if isinstance(with_schemas, str): with_schemas = with_schemas.lower() in ("1", "true") else: - with_schemas = False + # Show all schemas by default for better UX + with_schemas = True db_name = db_name.lower() databases = ca.database_controller.get_dict() if db_name not in databases: diff --git a/mindsdb/api/http/start.py b/mindsdb/api/http/start.py index 9cfb8454c89..f2373ebb114 100644 --- a/mindsdb/api/http/start.py +++ b/mindsdb/api/http/start.py @@ -1,5 +1,6 @@ import gc from importlib import import_module +from contextlib import asynccontextmanager, AsyncExitStack gc.disable() @@ -28,7 +29,7 @@ async def _health_check(request): return JSONResponse({"status": "ok"}) -def _mount_optional_api(name: str, mount_path: str, get_app_fn, routes): +def _mount_optional_api(name: str, mount_path: str, get_app_fn, routes) -> object | None: try: optional_app = get_app_fn() except ImportError as exc: @@ -41,8 +42,11 @@ def _mount_optional_api(name: str, mount_path: str, get_app_fn, routes): ) return - optional_app.add_middleware(PATAuthMiddleware) + if name.upper() != "MCP" or config["api"]["mcp"]["oauth"]["enabled"] is False: + optional_app.add_middleware(PATAuthMiddleware) + routes.append(Mount(mount_path, app=optional_app)) + return optional_app def start(verbose, app: Flask = None, is_restart: bool = False): @@ -58,23 +62,44 @@ def start(verbose, app: Flask = None, is_restart: bool = False): process_cache.init() routes = [] + sub_apps = [] # Health check FIRST - async endpoint that bypasses WSGI worker pool # This ensures health checks respond even when all workers are blocked routes.append(Route("/api/util/ping", _health_check, methods=["GET"])) - _mount_optional_api( - "A2A", - "/a2a", - lambda: import_module("mindsdb.api.a2a").get_a2a_app(), - routes, - ) - _mount_optional_api( - "MCP", - "/mcp", - lambda: import_module("mindsdb.api.mcp").get_mcp_app(), - routes, - ) + for name, path, factory in [ + ("A2A", "/a2a", lambda: import_module("mindsdb.api.a2a").get_a2a_app()), + ("MCP", "/mcp", lambda: import_module("mindsdb.api.mcp").get_mcp_app()), + ]: + mounted = _mount_optional_api(name, path, factory, routes) + if mounted is not None: + sub_apps.append(mounted) + + # RFC 9728: /.well-known/oauth-protected-resource must be at the server root, + # not under the /mcp mount, so we register it here before the Flask fallback. + try: + well_known_routes = import_module("mindsdb.api.mcp").get_mcp_well_known_routes() + routes.extend(well_known_routes) + except ImportError: + pass + except Exception as e: + logger.warning(f"Error during registering of mcp well-known routes: {e}") + + @asynccontextmanager + async def lifespan(_): + """Propagate ASGI lifespan events to mounted sub-apps. + + Starlette's Mount does not forward startup/shutdown lifespan events to + sub-applications automatically. This context manager manually enters the + lifespan context of each collected sub-app so their internal state + (e.g. StreamableHTTPSessionManager task group for MCP) is properly + initialized on startup and torn down on shutdown. + """ + async with AsyncExitStack() as stack: + for sub_app in sub_apps: + await stack.enter_async_context(sub_app.router.lifespan_context(sub_app)) + yield # Root app LAST so it won't shadow the others routes.append( @@ -89,4 +114,10 @@ def start(verbose, app: Flask = None, is_restart: bool = False): ) # Setting logging to None makes uvicorn use the existing logging configuration - uvicorn.run(Starlette(routes=routes, debug=verbose), host=host, port=int(port), log_level=None, log_config=None) + uvicorn.run( + Starlette(routes=routes, lifespan=lifespan, debug=verbose), + host=host, + port=int(port), + log_level=None, + log_config=None, + ) diff --git a/mindsdb/api/mcp/__init__.py b/mindsdb/api/mcp/__init__.py index b5601a16e8b..3473a394e61 100644 --- a/mindsdb/api/mcp/__init__.py +++ b/mindsdb/api/mcp/__init__.py @@ -1,182 +1,3 @@ -import os -from textwrap import dedent -from typing import Any -from contextlib import asynccontextmanager -from collections.abc import AsyncIterator -from dataclasses import dataclass +from mindsdb.api.mcp.app import get_mcp_app, get_mcp_well_known_routes -from mcp.server.fastmcp import FastMCP -from mcp.server.transport_security import TransportSecuritySettings -from starlette.requests import Request -from starlette.responses import JSONResponse - -from mindsdb.api.mysql.mysql_proxy.classes.fake_mysql_proxy import FakeMysqlProxy -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE as SQL_RESPONSE_TYPE -from mindsdb.interfaces.storage import db -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -def _get_transport_security() -> TransportSecuritySettings: - default_hosts = ["localhost:*", "127.0.0.1:*"] - env_hosts = os.environ.get("MINDSDB_MCP_ALLOWED_HOSTS", "") - if env_hosts: - custom_hosts = [h.strip() for h in env_hosts.split(",") if h.strip()] - for host in custom_hosts: - if ":" not in host: - default_hosts.append(f"{host}:*") - default_hosts.append(host) - logger.info(f"MCP transport security allowed hosts: {default_hosts}") - return TransportSecuritySettings(allowed_hosts=default_hosts) - - -@dataclass -class AppContext: - db: Any - - -@asynccontextmanager -async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]: - """Manage application lifecycle with type-safe context""" - # Initialize on startup - db.init() - try: - yield AppContext(db=db) - finally: - # TODO: We need better way to handle this in storage/db.py - pass - - -# Configure server with lifespan and transport security -mcp = FastMCP( - "MindsDB", - lifespan=app_lifespan, - dependencies=["mindsdb"], - transport_security=_get_transport_security(), -) - - -# MCP Queries -LISTING_QUERY = "SHOW DATABASES" - - -query_tool_description = dedent("""\ - Executes a SQL query against MindsDB. - - A database must be specified either in the `context` parameter or directly in the query string (e.g., `SELECT * FROM my_database.my_table`). Queries like `SELECT * FROM my_table` will fail without a `context`. - - Args: - query (str): The SQL query to execute. - context (dict, optional): The default database context. For example, `{"db": "my_postgres"}`. - - Returns: - A dictionary describing the result. - - For a successful query with no data to return (e.g., an `UPDATE` statement), the response is `{"type": "ok"}`. - - If the query returns tabular data, the response is a dictionary containing `data` (a list of rows) and `column_names` (a list of column names). For example: `{"type": "table", "data": [[1, "a"], [2, "b"]], "column_names": ["column_a", "column_b"]}`. - - In case of an error, a response is `{"type": "error", "error_message": "the error message"}`. -""") - - -@mcp.tool(name="query", description=query_tool_description) -def query(query: str, context: dict | None = None) -> dict[str, Any]: - """Execute a SQL query against MindsDB - - Args: - query: The SQL query to execute - context: Optional context parameters for the query - - Returns: - Dict containing the query results or error information - """ - - if context is None: - context = {} - - logger.debug(f"Incoming MCP query: {query}") - - mysql_proxy = FakeMysqlProxy() - mysql_proxy.set_context(context) - - try: - result = mysql_proxy.process_query(query) - - if result.type == SQL_RESPONSE_TYPE.OK: - return {"type": SQL_RESPONSE_TYPE.OK} - - if result.type == SQL_RESPONSE_TYPE.TABLE: - return { - "type": SQL_RESPONSE_TYPE.TABLE, - "data": result.result_set.to_lists(json_types=True), - "column_names": [column.alias or column.name for column in result.result_set.columns], - } - else: - return {"type": SQL_RESPONSE_TYPE.ERROR, "error_code": 0, "error_message": "Unknown response type"} - - except Exception as e: - logger.exception("Error processing query:") - return {"type": SQL_RESPONSE_TYPE.ERROR, "error_code": 0, "error_message": str(e)} - - -list_databases_tool_description = ( - "Returns a list of all database connections currently available in MindsDB. " - + "The tool takes no parameters and responds with a list of database names, " - + 'for example: ["my_postgres", "my_mysql", "test_db"].' -) - - -@mcp.tool(name="list_databases", description=list_databases_tool_description) -def list_databases() -> list[str]: - """ - List all databases in MindsDB - - Returns: - list[str]: list of databases - """ - - mysql_proxy = FakeMysqlProxy() - - try: - result = mysql_proxy.process_query(LISTING_QUERY) - if result.type == SQL_RESPONSE_TYPE.ERROR: - return { - "type": "error", - "error_code": result.error_code, - "error_message": result.error_message, - } - - elif result.type == SQL_RESPONSE_TYPE.OK: - return {"type": "ok"} - - elif result.type == SQL_RESPONSE_TYPE.TABLE: - data = result.result_set.to_lists(json_types=True) - data = [val[0] for val in data] - return data - - except Exception as e: - logger.exception("Error while retrieving list of databases") - return { - "type": "error", - "error_code": 0, - "error_message": str(e), - } - - -def _get_status(request: Request) -> JSONResponse: - """ - Status endpoint that returns basic server information. - This endpoint can be used by the frontend to check if the MCP server is running. - """ - - status_info = { - "status": "ok", - "service": "mindsdb-mcp", - } - - return JSONResponse(status_info) - - -def get_mcp_app(): - app = mcp.sse_app() - app.add_route("/status", _get_status, methods=["GET"]) - return app +__all__ = ["get_mcp_app", "get_mcp_well_known_routes"] diff --git a/mindsdb/api/mcp/app.py b/mindsdb/api/mcp/app.py new file mode 100644 index 00000000000..ea810595ac0 --- /dev/null +++ b/mindsdb/api/mcp/app.py @@ -0,0 +1,94 @@ +from contextlib import asynccontextmanager + +from starlette.applications import Starlette +from starlette.middleware import Middleware +from starlette.middleware.authentication import AuthenticationMiddleware +from starlette.middleware.cors import CORSMiddleware +from starlette.requests import Request +from starlette.responses import JSONResponse +from starlette.routing import Route + +from mcp.server.auth.middleware.bearer_auth import BearerAuthBackend +from mcp.server.auth.middleware.auth_context import AuthContextMiddleware + +from mindsdb.utilities.config import config +from mindsdb.api.common.middleware import RateLimitMiddleware +from mindsdb.api.mcp.mcp_instance import mcp + +# region these imports required for correct initialization +from mindsdb.api.mcp import tools # noqa: F401 +from mindsdb.api.mcp import resources # noqa: F401 +from mindsdb.api.mcp import prompts # noqa: F401 +from mindsdb.api.mcp import completions # noqa: F401 +# endregion + + +def _get_status(request: Request) -> JSONResponse: + return JSONResponse({"status": "ok", "service": "mindsdb-mcp"}) + + +def get_mcp_app(): + sse_starlette = mcp.sse_app() + http_starlette = mcp.streamable_http_app() + + @asynccontextmanager + async def lifespan(_): + """Required for streamable_http to run task group""" + async with http_starlette.router.lifespan_context(http_starlette): + yield + + middleware = [] + + # Preserve AuthenticationMiddleware from http_starlette so that + # RequireAuthMiddleware can read scope["user"] set by BearerAuthBackend. + if mcp._token_verifier is not None: + middleware = [ + Middleware(AuthenticationMiddleware, backend=BearerAuthBackend(mcp._token_verifier)), + Middleware(AuthContextMiddleware), + ] + + combined_app = Starlette( + routes=list(sse_starlette.routes) + list(http_starlette.routes), + middleware=middleware, + lifespan=lifespan, + ) + + # Rate limit should be added before CORS, so that CORS adds correct headers + if config["api"]["mcp"]["rate_limit"]["enabled"]: + combined_app.add_middleware( + RateLimitMiddleware, + requests_per_minute=config["api"]["mcp"]["rate_limit"]["requests_per_minute"], + ) + + if config["api"]["mcp"]["cors"]["enabled"]: + combined_app.add_middleware( + CORSMiddleware, + allow_origins=config["api"]["mcp"]["cors"]["allow_origins"], + allow_origin_regex=config["api"]["mcp"]["cors"]["allow_origin_regex"], + allow_methods=["GET", "POST", "DELETE", "OPTIONS"], + allow_headers=config["api"]["mcp"]["cors"]["allow_headers"], + expose_headers=["mcp-session-id"], + ) + + combined_app.add_route("/status", _get_status, methods=["GET"]) + + return combined_app + + +def get_mcp_well_known_routes() -> list[Route]: + """Return OAuth protected resource metadata routes for mounting at the server root. + + RFC 9728 requires /.well-known/oauth-protected-resource to be served at the + server root, not under the /mcp sub-path, so start.py registers these separately. + """ + from mcp.server.auth.routes import create_protected_resource_routes + + auth = mcp.settings.auth + if not auth or not auth.resource_server_url: + return [] + + return create_protected_resource_routes( + resource_url=auth.resource_server_url, + authorization_servers=[auth.issuer_url], + scopes_supported=auth.required_scopes, + ) diff --git a/mindsdb/api/mcp/completions.py b/mindsdb/api/mcp/completions.py new file mode 100644 index 00000000000..94bf2abe2cd --- /dev/null +++ b/mindsdb/api/mcp/completions.py @@ -0,0 +1,35 @@ +from mcp.types import Completion, PromptReference, ResourceTemplateReference + +from mindsdb.api.mcp.mcp_instance import mcp +from mindsdb.api.executor.controllers.session_controller import SessionController +from mindsdb.utilities.context import context as ctx +from mindsdb.api.mcp.resources.schema import _get_database_names +from mindsdb.utilities import log + +logger = log.getLogger(__name__) + + +@mcp.completion() +async def handle_completion(ref, argument, context): + if not isinstance(ref, (ResourceTemplateReference, PromptReference)): + return None + + try: + if argument.name == "database_name": + names = _get_database_names() + return Completion(values=[n for n in names if n.startswith(argument.value)]) + + if argument.name == "table_name": + database_name = (context.arguments or {}).get("database_name") + if not database_name: + return None + ctx.set_default() + session = SessionController() + datanode = session.datahub.get(database_name) + all_tables = datanode.get_tables() + names = [t.TABLE_NAME for t in all_tables] + return Completion(values=[n for n in names if n.startswith(argument.value)]) + except Exception as e: + logger.info(f"Couldn't get completion for parameter {argument.name}: {e}") + + return None diff --git a/mindsdb/api/mcp/mcp_instance.py b/mindsdb/api/mcp/mcp_instance.py new file mode 100644 index 00000000000..fa65ab47711 --- /dev/null +++ b/mindsdb/api/mcp/mcp_instance.py @@ -0,0 +1,36 @@ +from mcp.server.fastmcp import FastMCP +from mcp.server.transport_security import TransportSecuritySettings + +from mindsdb.api.mcp.oauth import build_oauth_components +from mindsdb.utilities.config import config + + +def _create_mcp() -> FastMCP: + token_verifier, auth_settings = build_oauth_components() + + dns_rebinding_protection = config["api"]["mcp"]["dns_rebinding_protection"] + transport_security = TransportSecuritySettings(enable_dns_rebinding_protection=dns_rebinding_protection) + + return FastMCP( + name="MindsDB", + instructions=( + "MindsDB is a data platform that connects to external databases and data sources.\n" + "Use the available resources to discover connected databases and their schema,\n" + "then use the `query` tool to retrieve or manipulate data with SQL.\n" + "\n" + "Workflow:\n" + "1. Read `schema://databases` to list available data sources.\n" + "2. Read `schema://databases/{name}/tables` to explore tables in a source.\n" + "3. Read `schema://databases/{name}/tables/{table}/columns` to inspect columns.\n" + "4. Use the `query` tool to run SQL queries against the data." + ), + dependencies=["mindsdb"], + streamable_http_path="/streamable", + debug=False, + token_verifier=token_verifier, + auth=auth_settings, + transport_security=transport_security, + ) + + +mcp = _create_mcp() diff --git a/mindsdb/api/mcp/oauth.py b/mindsdb/api/mcp/oauth.py new file mode 100644 index 00000000000..32d5efb71dd --- /dev/null +++ b/mindsdb/api/mcp/oauth.py @@ -0,0 +1,167 @@ +from typing import Any +from urllib.parse import urljoin + +import httpx +from pydantic import AnyHttpUrl +from mcp.server.auth.settings import AuthSettings +from mcp.server.auth.provider import AccessToken, TokenVerifier +from mcp.shared.auth_utils import check_resource_allowed, resource_url_from_server_url + +from mindsdb.utilities.config import config +from mindsdb.utilities import log + +logger = log.getLogger(__name__) + + +class IntrospectionTokenVerifier(TokenVerifier): + """Token verifier that uses OAuth 2.0 Token Introspection (RFC 7662). + Intended for use when MindsDB acts as a Resource Server and token + issuance is delegated to an external provider (e.g. Keycloak). + + Args: + introspection_endpoint: Full URL of the RFC 7662 introspection endpoint. + server_url: Public URL of this MCP server (e.g. ``http://host:port/mcp/streamable``). + Used to derive the expected ``aud`` (audience) claim value. + client_id: OAuth client ID used to authenticate against the introspection endpoint. + client_secret: OAuth client secret used to authenticate against the introspection endpoint. + """ + + def __init__( + self, + introspection_endpoint: str, + server_url: str, + client_id: str, + client_secret: str, + ): + self.introspection_endpoint = introspection_endpoint + self.server_url = server_url + self.client_id = client_id + self.client_secret = client_secret + self.resource_url = resource_url_from_server_url(server_url) + + async def verify_token(self, token: str) -> AccessToken | None: + """Verify a bearer token via the introspection endpoint. + + Args: + token: Raw bearer token string extracted from the Authorization header. + + Returns: + AccessToken: Populated access token on successful verification. + None: If the token is inactive, the audience is invalid, the endpoint + is unreachable, or any other error occurs. + """ + # to prevent SSRF attacks it must start from https, or be local server + if not self.introspection_endpoint.startswith(("https://", "http://localhost:", "http://127.0.0.1:")): + return None + + timeout = httpx.Timeout(10.0, connect=5.0) + limits = httpx.Limits(max_connections=10, max_keepalive_connections=5) + + async with httpx.AsyncClient( + timeout=timeout, + limits=limits, + verify=True, + follow_redirects=False, + ) as client: + try: + form_data = { + "token": token, + "client_id": self.client_id, + "client_secret": self.client_secret, + } + headers = {"Content-Type": "application/x-www-form-urlencoded"} + + response = await client.post( + self.introspection_endpoint, + data=form_data, + headers=headers, + ) + + if response.status_code != 200: + return None + + data = response.json() + if not data.get("active", False): + return None + + if not self._validate_resource(data): + return None + + return AccessToken( + token=token, + client_id=data.get("client_id", "unknown"), + scopes=data.get("scope", "").split() if data.get("scope") else [], + expires_at=data.get("exp"), + resource=self.resource_url, + ) + + except Exception as e: + logger.error(f"Error during token verification: {e}") + return None + + def _validate_resource(self, token_data: dict[str, Any]) -> bool: + """Validate that the token was issued for this resource server (RFC 8707). + + Args: + token_data: Parsed JSON response from the introspection endpoint. + + Returns: + bool: True if at least one audience entry matches this server's resource URL, + False if ``aud`` is missing or no entry matches. + """ + if not self.server_url or not self.resource_url: + return False + + aud: list[str] | str | None = token_data.get("aud") + if isinstance(aud, list): + return any(check_resource_allowed(self.resource_url, a) for a in aud) + if isinstance(aud, str): + return check_resource_allowed(self.resource_url, aud) + return False + + +def build_oauth_components() -> tuple[IntrospectionTokenVerifier, AuthSettings] | tuple[None, None]: + """Build token verifier and auth settings from the OAuth config section. + + Returns: + tuple[IntrospectionTokenVerifier, AuthSettings]: Token verifier and auth settings ready + to pass to FastMCP if OAuth is enabled. + tuple[None, None]: If OAuth ``enabled`` is False or not set. + """ + oauth_cfg = config["api"]["mcp"]["oauth"] + if not oauth_cfg.get("enabled", False): + return None, None + + public_url = oauth_cfg.get("public_url", "").rstrip("/") + if public_url: + mcp_endpoint_url = f"{public_url}/mcp/streamable" + else: + host = config["api"]["http"]["host"] + port = config["api"]["http"]["port"] + # Bind-all addresses (0.0.0.0 / ::) are not valid client-facing destinations. + # Replace with loopback so the advertised resource_metadata URL is reachable. + if host in ("0.0.0.0", "", "::"): + host = "127.0.0.1" + mcp_endpoint_url = f"http://{host}:{port}/mcp/streamable" + + issuer_url = oauth_cfg.get("issuer_url", "").rstrip("/") + "/" + client_id = oauth_cfg.get("client_id", "") + client_secret = oauth_cfg.get("client_secret", "") + scope = oauth_cfg.get("scope", "mcp:tools") + + introspection_endpoint = urljoin(issuer_url, "protocol/openid-connect/token/introspect") + + token_verifier = IntrospectionTokenVerifier( + introspection_endpoint=introspection_endpoint, + server_url=mcp_endpoint_url, + client_id=client_id, + client_secret=client_secret, + ) + + auth_settings = AuthSettings( + issuer_url=AnyHttpUrl(issuer_url), + required_scopes=[scope], + resource_server_url=AnyHttpUrl(mcp_endpoint_url), + ) + + return token_verifier, auth_settings diff --git a/mindsdb/api/mcp/prompts/__init__.py b/mindsdb/api/mcp/prompts/__init__.py new file mode 100644 index 00000000000..437673b53d3 --- /dev/null +++ b/mindsdb/api/mcp/prompts/__init__.py @@ -0,0 +1 @@ +from mindsdb.api.mcp.prompts import sample_table # noqa: F401 diff --git a/mindsdb/api/mcp/prompts/sample_table.py b/mindsdb/api/mcp/prompts/sample_table.py new file mode 100644 index 00000000000..2473715aa7d --- /dev/null +++ b/mindsdb/api/mcp/prompts/sample_table.py @@ -0,0 +1,21 @@ +from mcp.types import TextContent + +from mindsdb.api.mcp.mcp_instance import mcp + + +@mcp.prompt(name="sample_table", description="Fetch 5 sample rows from a table and describe its structure.") +def sample_table(database_name: str, table_name: str) -> list[TextContent]: + return [ + TextContent( + type="text", + text=( + f"Use the `query` tool to fetch 5 sample rows from the table `{table_name}` " + f"in database `{database_name}`:\n\n" + f"```sql\n" + f"SELECT * FROM `{database_name}`.`{table_name}` LIMIT 5;\n" + f"```\n\n" + f"After getting the results, briefly describe the table structure " + f"and what kind of data it contains." + ), + ) + ] diff --git a/mindsdb/api/mcp/resources/__init__.py b/mindsdb/api/mcp/resources/__init__.py new file mode 100644 index 00000000000..5cd0b60720d --- /dev/null +++ b/mindsdb/api/mcp/resources/__init__.py @@ -0,0 +1 @@ +from mindsdb.api.mcp.resources import schema # noqa: F401 diff --git a/mindsdb/api/mcp/resources/schema.py b/mindsdb/api/mcp/resources/schema.py new file mode 100644 index 00000000000..6986c7dd420 --- /dev/null +++ b/mindsdb/api/mcp/resources/schema.py @@ -0,0 +1,136 @@ +from pydantic import BaseModel + +from mindsdb.api.mcp.mcp_instance import mcp +from mindsdb.api.executor.controllers.session_controller import SessionController +from mindsdb.utilities.context import context as ctx +from mindsdb.integrations.libs.response import TableResponse, ErrorResponse +from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE + + +class TableInfo(BaseModel): + TABLE_NAME: str + TABLE_TYPE: str + TABLE_SCHEMA: str + + +class ColumnInfo(BaseModel): + COLUMN_NAME: str + MYSQL_DATA_TYPE: str + + +class KnowledgeBaseInfo(BaseModel): + name: str + project: str + metadata_columns: list[str] + content_columns: list[str] + id_column: str + + +def _get_database_names() -> list[str]: + ctx.set_default() + session = SessionController() + databases = session.database_controller.get_list() + return [x["name"] for x in databases if x["type"] == "data"] + + +@mcp.resource( + "schema://databases", + mime_type="application/json", + description=( + "Initial list of connected data source names available for querying. " + "This resource may be cached by the client. " + "To get the current list of databases during a session, use the `query` tool: " + "SHOW DATABASES" + ), +) +def list_databases() -> list[str]: + return _get_database_names() + + +@mcp.resource( + "schema://databases/{database_name}/tables", + mime_type="application/json", + description=( + "Initial list of tables in the specified connected database. " + "This resource may be cached by the client. " + "To get the current list of tables during a session (e.g. after CREATE/DROP TABLE), " + "use the `query` tool: " + "SHOW TABLES FROM {database_name}" + ), +) +def db_tables(database_name: str) -> list[TableInfo]: + ctx.set_default() + session = SessionController() + datanode = session.datahub.get(database_name) + if datanode is None: + raise ValueError(f"Database '{database_name}' is not found.") + all_tables = datanode.get_tables() + all_tables = [ + { + "TABLE_NAME": table.TABLE_NAME, + "TABLE_TYPE": table.TABLE_TYPE, + "TABLE_SCHEMA": table.TABLE_SCHEMA, + } + for table in all_tables + ] + return all_tables + + +@mcp.resource( + "schema://databases/{database_name}/tables/{table_name}/columns", + mime_type="application/json", + description=( + "Initial column names and types for a specific table in a connected database. " + "This resource may be cached by the client. " + "To get the current column list during a session (e.g. after ALTER TABLE), " + "use the `query` tool: " + "SELECT COLUMN_NAME, DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS " + "WHERE TABLE_SCHEMA = '{database_name}' AND TABLE_NAME = '{table_name}'" + ), +) +def db_table_columns(database_name: str, table_name: str) -> list[ColumnInfo]: + ctx.set_default() + session = SessionController() + handler = session.integration_controller.get_data_handler(database_name) + columns_answer = handler.get_columns(table_name) + + if isinstance(columns_answer, TableResponse): + if columns_answer.type != RESPONSE_TYPE.COLUMNS_TABLE: + raise ValueError( + "Database returned a successful response, but the column list does not match the expected format" + ) + df = columns_answer.fetchall() + response = df[["COLUMN_NAME", "MYSQL_DATA_TYPE"]].to_dict(orient="records") + return response + if isinstance(columns_answer, ErrorResponse): + raise ValueError(columns_answer.error_message) + raise ValueError(f"Unexpected handler response type: {columns_answer}") + + +@mcp.resource( + "schema://knowledge_bases", + description=( + "Initial list of knowledge bases with their project, column configuration, and ID column. " + "This resource may be cached by the client. " + "To get the current list of knowledge bases during a session, use the `query` tool: " + "SHOW KNOWLEDGE BASES" + ), +) +def list_knowledge_bases() -> list[KnowledgeBaseInfo]: + ctx.set_default() + session = SessionController() + project_names = session.datahub.get_projects_names() + result = [] + for project_name in project_names: + kbs = session.kb_controller.list(project_name) + for kb in kbs: + result.append( + { + "name": kb.get("name"), + "project": kb.get("project"), + "metadata_columns": kb.get("metadata_columns"), + "content_columns": kb.get("content_columns"), + "id_column": kb.get("id_column"), + } + ) + return result diff --git a/mindsdb/api/mcp/tools/__init__.py b/mindsdb/api/mcp/tools/__init__.py new file mode 100644 index 00000000000..a07edf06817 --- /dev/null +++ b/mindsdb/api/mcp/tools/__init__.py @@ -0,0 +1 @@ +from mindsdb.api.mcp.tools import query # noqa: F401 diff --git a/mindsdb/api/mcp/tools/query.py b/mindsdb/api/mcp/tools/query.py new file mode 100644 index 00000000000..42026e32b1f --- /dev/null +++ b/mindsdb/api/mcp/tools/query.py @@ -0,0 +1,60 @@ +from textwrap import dedent +from typing import Annotated + +from pydantic import Field + +from mindsdb.api.mcp.mcp_instance import mcp +from mindsdb.api.mcp.types import ErrorResponse, QueryResponseAnswer, response_adapter +from mindsdb.api.mysql.mysql_proxy.mysql_proxy import SQLAnswer +from mindsdb.api.mysql.mysql_proxy.classes.fake_mysql_proxy import FakeMysqlProxy +from mindsdb.utilities.context import context as ctx +from mindsdb.utilities import log + +logger = log.getLogger(__name__) + + +query_tool_description = dedent("""\ + Execute a SQL query against MindsDB and return the result. + + Queries use MySQL syntax. Use fully qualified names (`database`.`table`) or set `context` to specify + the default database. Use backticks (`) to quote identifiers that are reserved words or contain + special characters. + + Returns one of: + - `{"type": "ok"}` β€” for statements with no output (INSERT, UPDATE, etc.) + - `{"type": "table", "column_names": [...], "data": [[...], ...]}` β€” for SELECT results + - `{"type": "error", "error_message": "..."}` β€” on failure +""") + + +@mcp.tool(name="query", description=query_tool_description) +def query( + query: Annotated[str, Field(description="SQL query to execute against MindsDB.")], + context: Annotated[ + dict | None, + Field( + description=( + 'Default database context, e.g. {"db": "my_postgres"}. ' + "Required if the query does not use fully qualified table names." + ) + ), + ] = None, +) -> QueryResponseAnswer: + ctx.set_default() + + if context is None: + context = {} + + logger.debug(f"Incoming MCP query: {query}") + + mysql_proxy = FakeMysqlProxy() + mysql_proxy.set_context(context) + + try: + result: SQLAnswer = mysql_proxy.process_query(query) + query_response: dict = result.dump_http_response() + except Exception as e: + logger.exception("Error processing query:") + return ErrorResponse(type="error", error_code=0, error_message=str(e)) + + return response_adapter.validate_python(query_response) diff --git a/mindsdb/api/mcp/types.py b/mindsdb/api/mcp/types.py new file mode 100644 index 00000000000..0275742116f --- /dev/null +++ b/mindsdb/api/mcp/types.py @@ -0,0 +1,25 @@ +from typing import Annotated, Literal, Union + +from pydantic import BaseModel, Field, TypeAdapter + + +class OkResponse(BaseModel): + type: Literal["ok"] + affected_rows: int | None = None + + +class ErrorResponse(BaseModel): + type: Literal["error"] + error_code: int + error_message: str + + +class TableResponse(BaseModel): + type: Literal["table"] + column_names: list[str] + data: list[list] + + +QueryResponseAnswer = Annotated[Union[OkResponse, ErrorResponse, TableResponse], Field(discriminator="type")] + +response_adapter = TypeAdapter(QueryResponseAnswer) diff --git a/mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py b/mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py index 6f3b06387e4..ec9c122f3d6 100644 --- a/mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +++ b/mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py @@ -3,7 +3,7 @@ from mindsdb_sql_parser.ast.base import ASTNode import mindsdb.utilities.profiler as profiler from mindsdb.api.executor.sql_query import SQLQuery -from mindsdb.api.executor.sql_query.result_set import Column +from mindsdb.utilities.types.column import Column from mindsdb.api.executor.planner import utils as planner_utils from mindsdb.api.executor.data_types.answer import ExecuteAnswer from mindsdb.api.executor.command_executor import ExecuteCommands diff --git a/mindsdb/api/mysql/mysql_proxy/mysql_proxy.py b/mindsdb/api/mysql/mysql_proxy/mysql_proxy.py index 5fd02915246..8f691db994c 100644 --- a/mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +++ b/mindsdb/api/mysql/mysql_proxy/mysql_proxy.py @@ -22,8 +22,6 @@ import traceback import logging from functools import partial -from typing import List -from dataclasses import dataclass import mindsdb.utilities.hooks as hooks import mindsdb.utilities.profiler as profiler @@ -65,11 +63,12 @@ getConstName, ) from mindsdb.api.executor.data_types.answer import ExecuteAnswer +from mindsdb.api.executor.data_types.sql_answer import SQLAnswer from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE from mindsdb.api.executor import exceptions as executor_exceptions from mindsdb.api.common.middleware import check_auth -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE -from mindsdb.api.executor.sql_query.result_set import Column, ResultSet +from mindsdb.api.executor.sql_query.result_set import ResultSet +from mindsdb.utilities.types.column import Column from mindsdb.utilities import log from mindsdb.utilities.config import config from mindsdb.utilities.context import context as ctx @@ -93,44 +92,6 @@ def empty_fn(): pass -@dataclass -class SQLAnswer: - resp_type: RESPONSE_TYPE = RESPONSE_TYPE.OK - result_set: ResultSet | None = None - status: int | None = None - state_track: List[List] | None = None - error_code: int | None = None - error_message: str | None = None - affected_rows: int | None = None - mysql_types: list[MYSQL_DATA_TYPE] | None = None - - @property - def type(self): - return self.resp_type - - def dump_http_response(self) -> dict: - if self.resp_type == RESPONSE_TYPE.OK: - return { - "type": self.resp_type, - "affected_rows": self.affected_rows, - } - elif self.resp_type in (RESPONSE_TYPE.TABLE, RESPONSE_TYPE.COLUMNS_TABLE): - data = self.result_set.to_lists(json_types=True) - return { - "type": RESPONSE_TYPE.TABLE, - "data": data, - "column_names": [column.alias or column.name for column in self.result_set.columns], - } - elif self.resp_type == RESPONSE_TYPE.ERROR: - return { - "type": RESPONSE_TYPE.ERROR, - "error_code": self.error_code or 0, - "error_message": self.error_message, - } - else: - raise ValueError(f"Unsupported response type for dump HTTP response: {self.resp_type}") - - class MysqlTCPServer(SocketServer.ThreadingTCPServer): """ Custom TCP Server with increased request queue size diff --git a/mindsdb/api/mysql/mysql_proxy/utilities/dump.py b/mindsdb/api/mysql/mysql_proxy/utilities/dump.py index f580c7bf714..82fa0a5232f 100644 --- a/mindsdb/api/mysql/mysql_proxy/utilities/dump.py +++ b/mindsdb/api/mysql/mysql_proxy/utilities/dump.py @@ -9,7 +9,8 @@ import pandas as pd from pandas.api import types as pd_types -from mindsdb.api.executor.sql_query.result_set import ResultSet, get_mysql_data_type_from_series, Column +from mindsdb.api.executor.sql_query.result_set import ResultSet, get_mysql_data_type_from_series +from mindsdb.utilities.types.column import Column from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import ( MYSQL_DATA_TYPE, DATA_C_TYPE_MAP, diff --git a/mindsdb/integrations/handlers/access_handler/README.md b/mindsdb/integrations/handlers/access_handler/README.md deleted file mode 100644 index 2598589dbf8..00000000000 --- a/mindsdb/integrations/handlers/access_handler/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# Microsoft Access Handler - -This is the implementation of the Microsoft Access handler for MindsDB. - -## Microsoft Access -Microsoft Access is a pseudo-relational database engine from Microsoft. It is part of the Microsoft Office suite of applications that also includes Word, Outlook and Excel, among others. Access is also available for purchase as a stand-alone product. Access uses the Jet Database Engine for data storage. -https://www.techopedia.com/definition/1218/microsoft-access - -## Implementation -This handler was implemented using `pyodbc`, the Python ODBC bridge. - -**Platform Requirements:** -- **Windows Only**: This handler requires the Microsoft Access ODBC driver, which is only available on Windows operating systems. -- The Microsoft Access Driver (*.mdb, *.accdb) must be installed on the system. - -The only required argument to establish a connection is `db_file`. This points to the database file that the connection is to be made to. - -## Usage -In order to make use of this handler and connect to an Access database in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE access_datasource -WITH -engine='access', -parameters={ - "db_file":"C:\\Users\\minurap\\Documents\\example_db.accdb" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM access_datasource.example_tbl -~~~~ \ No newline at end of file diff --git a/mindsdb/integrations/handlers/access_handler/__about__.py b/mindsdb/integrations/handlers/access_handler/__about__.py deleted file mode 100644 index 081c52908c1..00000000000 --- a/mindsdb/integrations/handlers/access_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Microsoft Access handler" -__package_name__ = "mindsdb_access_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Microsoft Access" -__author__ = "Minura Punchihewa" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2025 - MindsDB" diff --git a/mindsdb/integrations/handlers/access_handler/__init__.py b/mindsdb/integrations/handlers/access_handler/__init__.py deleted file mode 100644 index 3a9af28856f..00000000000 --- a/mindsdb/integrations/handlers/access_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args_example, connection_args -try: - from .access_handler import AccessHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Microsoft Access' -name = 'access' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/access_handler/access_handler.py b/mindsdb/integrations/handlers/access_handler/access_handler.py deleted file mode 100644 index c212c42e8be..00000000000 --- a/mindsdb/integrations/handlers/access_handler/access_handler.py +++ /dev/null @@ -1,203 +0,0 @@ -from typing import Optional -import platform - -import pandas as pd - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.utilities import log -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) - -try: - import pyodbc - from sqlalchemy_access.base import AccessDialect - - IMPORT_ERROR = None -except ImportError as e: - pyodbc = None - AccessDialect = None - IMPORT_ERROR = e - -logger = log.getLogger(__name__) - - -class AccessHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Microsoft Access statements. - """ - - name = "access" - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = "access" - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self): - """ - Set up the connection required by the handler. - Returns: - pyodbc.Connection: A connection object to the Access database. - """ - if self.is_connected is True: - return self.connection - - if IMPORT_ERROR is not None: - raise RuntimeError( - f"Microsoft Access handler requires pyodbc and sqlalchemy-access packages. " - f"Install them with: pip install pyodbc sqlalchemy-access. Error: {IMPORT_ERROR}" - ) - - if platform.system() != "Windows": - raise RuntimeError( - "Microsoft Access handler is only supported on Windows. " - "The Microsoft Access ODBC driver is not available on other operating systems." - ) - - self.connection = pyodbc.connect( - r"Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=" + self.connection_data["db_file"] - ) - self.is_connected = True - - return self.connection - - def disconnect(self): - """ - Close any existing connections. - """ - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return self.is_connected - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f"Error connecting to Microsoft Access database {self.connection_data['db_file']}, {e}!") - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - need_to_close = self.is_connected is False - - connection = self.connect() - with connection.cursor() as cursor: - try: - cursor.execute(query) - result = cursor.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame.from_records(result, columns=[x[0] for x in cursor.description]), - ) - - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except Exception as e: - logger.error(f"Error running query: {query} on {self.connection_data['db_file']}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - if IMPORT_ERROR is not None: - raise RuntimeError( - f"Microsoft Access handler requires pyodbc and sqlalchemy-access packages. " - f"Install them with: pip install pyodbc sqlalchemy-access. Error: {IMPORT_ERROR}" - ) - - renderer = SqlalchemyRender(AccessDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - connection = self.connect() - with connection.cursor() as cursor: - df = pd.DataFrame([table.table_name for table in cursor.tables(tableType="Table")], columns=["table_name"]) - - response = Response(RESPONSE_TYPE.TABLE, df) - - return response - - def get_columns(self, table_name: str) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - connection = self.connect() - with connection.cursor() as cursor: - df = pd.DataFrame( - [(column.column_name, column.type_name) for column in cursor.columns(table=table_name)], - columns=["column_name", "data_type"], - ) - - response = Response(RESPONSE_TYPE.TABLE, df) - - return response diff --git a/mindsdb/integrations/handlers/access_handler/connection_args.py b/mindsdb/integrations/handlers/access_handler/connection_args.py deleted file mode 100644 index 96617b4b7c5..00000000000 --- a/mindsdb/integrations/handlers/access_handler/connection_args.py +++ /dev/null @@ -1,15 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - db_file={ - "type": ARG_TYPE.PATH, - "description": "The full path to the Microsoft Access database file (.mdb or .accdb). On Windows, use absolute paths like C:\\Users\\username\\Documents\\database.accdb", - "required": True, - "label": "Database File Path", - } -) - -connection_args_example = OrderedDict(db_file="C:\\Users\\minurap\\Documents\\example_db.accdb") diff --git a/mindsdb/integrations/handlers/access_handler/icon.svg b/mindsdb/integrations/handlers/access_handler/icon.svg deleted file mode 100644 index 35dd43316c0..00000000000 --- a/mindsdb/integrations/handlers/access_handler/icon.svg +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/access_handler/requirements.txt b/mindsdb/integrations/handlers/access_handler/requirements.txt deleted file mode 100644 index e0a90ef8e50..00000000000 --- a/mindsdb/integrations/handlers/access_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -pyodbc>=5.0.0; sys_platform == 'win32' -sqlalchemy-access>=2.0.0; sys_platform == 'win32' diff --git a/mindsdb/integrations/handlers/aerospike_handler/README.md b/mindsdb/integrations/handlers/aerospike_handler/README.md deleted file mode 100644 index 1b4885cf140..00000000000 --- a/mindsdb/integrations/handlers/aerospike_handler/README.md +++ /dev/null @@ -1,37 +0,0 @@ -## Implementation - -This is the implementation of the Aereospike for MindsDB. - -This handler was implemented using `duckdb`, a library that allows SQL queries to be executed on `pandas` DataFrames. - -The required arguments to establish a connection are as follows: - -- `user` is the database user. -- `password` is the database password. -- `host` is the host IP address or URL. -- `port` is the port used to make TCP/IP connection. -- `namespace` is the aerospike namespace. - -Other optional parameters are not supported as of now. - -## Usage - -In order to make use of this handler and connect to the Aereospike database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE aerospike_db -WITH ENGINE = "aerospike", -PARAMETERS = { - "user": "test", - "password": "password", - "host": "localhost", - "port": 3000, - "namespace": "test" - }; -``` - -You can use this established connection to query your table as follows. - -```sql -SELECT * FROM aerospike_db.house_rentals; -``` diff --git a/mindsdb/integrations/handlers/aerospike_handler/__about__.py b/mindsdb/integrations/handlers/aerospike_handler/__about__.py deleted file mode 100644 index c055321651b..00000000000 --- a/mindsdb/integrations/handlers/aerospike_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Aerospike handler" -__package_name__ = "mindsdb_aerospike_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Aerospike" -__author__ = "Biswadip Paul" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/aerospike_handler/__init__.py b/mindsdb/integrations/handlers/aerospike_handler/__init__.py deleted file mode 100644 index 88809236183..00000000000 --- a/mindsdb/integrations/handlers/aerospike_handler/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version - -from .connection_args import connection_args, connection_args_example -try: - from .aerospike_handler import AerospikeHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Aerospike" -name = "aerospike" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/aerospike_handler/aerospike_handler.py b/mindsdb/integrations/handlers/aerospike_handler/aerospike_handler.py deleted file mode 100644 index 174b33a1326..00000000000 --- a/mindsdb/integrations/handlers/aerospike_handler/aerospike_handler.py +++ /dev/null @@ -1,236 +0,0 @@ -import re -from typing import Optional - -import duckdb -import aerospike -import pandas as pd -# from sqlalchemy import create_engine - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast.base import ASTNode - -# from mindsdb.utilities import log -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - - -class AerospikeHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Solr SQL statements. - """ - name = 'aerospike' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - super().__init__(name) - self.parser = parse_sql - self.dialect = 'aerospike' - self.connection_data = connection_data - self.kwargs = kwargs - if not self.connection_data.get('host'): - raise Exception("The host parameter should be provided!") - if not self.connection_data.get('port'): - raise Exception("The port parameter should be provided!") - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self): - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - if self.is_connected is True: - return self.connection - - user = self.connection_data.get('user', None) - password = self.connection_data.get('password', None) - config = { - 'user': user, - 'password': password, - 'hosts': [(self.connection_data.get('host'), self.connection_data.get('port'))], - } - connection = aerospike.client(config).connect() - self.is_connected = True - self.connection = connection.connect() - return self.connection - - def disconnect(self): - """ - Close any existing connections. - """ - if self.is_connected is False: - return - self.connection.close() - self.is_connected = False - return - - def check_connection(self) -> StatusResponse: - """ - Check the connection of the Aerospike database - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - response.error_message = str(e) - - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - return response - - def native_query(self, query: str) -> Response: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - - try: - # where is not supported - selected_bins, aero_ns, aero_set = self.parse_aql_query(query) - aero_ns = aero_ns.lower() - aero_set = aero_set.lower() - scan = connection.scan(aero_ns.lower(), aero_set.lower()) - res = scan.results() - data_df = pd.DataFrame.from_records([r[2] for r in res]) - if ' where ' in query or ' WHERE ' in query or '*' not in selected_bins: - new_query = re.sub(r'FROM [\w\.]+', 'FROM ' + 'data_df', query, 1) - new_query = new_query.replace(f'{aero_set}.', '') - data_df = duckdb.query(new_query).to_df() - - response = Response( - RESPONSE_TYPE.TABLE, - data_df - ) - except Exception as e: - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Retrieve the data from the SQL statement. - """ - return self.native_query(query.to_string()) - - def parse_aql_query(self, aql_query): - # Split the AQL query into tokens - tokens = [t.replace(',', '').upper() for t in re.split(r'\s+', aql_query)] - # Extract the relevant components - select_index = tokens.index("SELECT") - from_index = tokens.index("FROM") - # where_index = tokens.index("WHERE") - - selected_bins = tokens[select_index + 1:from_index] - namespace_set = tokens[from_index + 1] - aero_ns, aero_set = namespace_set.split('.') if '.' in namespace_set else None, namespace_set - if not aero_ns: - aero_ns = self.connection_data.get('namespace') - # filter_condition = " ".join(tokens[where_index + 1:]) - return selected_bins, aero_ns, aero_set - - def get_tables(self) -> Response: - """ - Get a list with all of the tables in Aerospike - """ - need_to_close = self.is_connected is False - connection = self.connect() - - data_lst = [] - request = "sets" - - try: - for node, (err, res) in list(connection.info_all(request).items()): - if res: - entries = [entry.strip() for entry in res.strip().split(';') if entry.strip()] - for entry in entries: - data = [d for d in entry.split('=') if ':set' in d or ':objects' in d] - ele = [None, None, None] - for d in data: - if ':set' in d: - ele[0] = d.split(':')[0] - if ':objects' in d: - ele[1] = d.split(':')[0] - if d[0] or d[1]: - ele[2] = request - data_lst.append(ele) - - response = Response( - RESPONSE_TYPE.TABLE, - pd.DataFrame(data_lst, columns=['table_schema', 'table_name', 'table_type']) - ) - except Exception as e: - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - if need_to_close is True: - self.disconnect() - - return response - - def get_columns(self, table_name: str) -> Response: - """ - Show details about the table - """ - need_to_close = self.is_connected is False - connection = self.connect() - - column_df = pd.DataFrame([], columns=['column_name', 'data_type']) - - try: - response_table = self.get_tables() - df = response_table.data_frame - if not len(df): - return column_df - df = df[df['table_name'] == table_name] - tbl_dtl_arr = df.iloc[0][['table_schema', 'table_name']] - scan = connection.scan(tbl_dtl_arr[0], tbl_dtl_arr[1]) - res = scan.results() - data_df = pd.DataFrame.from_records([r[2] for r in res]) - column_df = pd.DataFrame(data_df.dtypes).reset_index() - column_df.columns = ['column_name', 'data_type'] - response = Response( - RESPONSE_TYPE.TABLE, - column_df - ) - except Exception as e: - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - if need_to_close is True: - self.disconnect() - - return response diff --git a/mindsdb/integrations/handlers/aerospike_handler/connection_args.py b/mindsdb/integrations/handlers/aerospike_handler/connection_args.py deleted file mode 100644 index 95dfcfc436d..00000000000 --- a/mindsdb/integrations/handlers/aerospike_handler/connection_args.py +++ /dev/null @@ -1,46 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Aerospike server.', - 'required': False, - 'label': 'User' - }, - password={ - 'type': ARG_TYPE.STR, - 'description': 'The password to authenticate the user with the Aerospike server.', - 'required': False, - 'label': 'Password', - 'secret': True - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Aerospike server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.', - 'required': True, - 'label': 'Host' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the Aerospike server. Must be an integer.', - 'required': True, - 'label': 'Port' - }, - namespace={ - 'type': ARG_TYPE.STR, - 'description': 'The namespace name to use for the query in the Aerospike server.', - 'required': True, - 'label': 'namespace' - } -) - -connection_args_example = OrderedDict( - user="demo_user", - password="demo_password", - host='127.0.0.1', - port=3000, - namespace="demo", -) diff --git a/mindsdb/integrations/handlers/aerospike_handler/icon.svg b/mindsdb/integrations/handlers/aerospike_handler/icon.svg deleted file mode 100644 index 7e39eefbd81..00000000000 --- a/mindsdb/integrations/handlers/aerospike_handler/icon.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/aerospike_handler/requirements.txt b/mindsdb/integrations/handlers/aerospike_handler/requirements.txt deleted file mode 100644 index 34ad0026bc8..00000000000 --- a/mindsdb/integrations/handlers/aerospike_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -aerospike~=13.0.0 diff --git a/mindsdb/integrations/handlers/aerospike_handler/tests/test_aerospike_handler.py b/mindsdb/integrations/handlers/aerospike_handler/tests/test_aerospike_handler.py deleted file mode 100644 index d75376189fd..00000000000 --- a/mindsdb/integrations/handlers/aerospike_handler/tests/test_aerospike_handler.py +++ /dev/null @@ -1,36 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.aerospike_handler.aerospike_handler import AerospikeHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class AerospikeHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - # "user": "", - # "password": "", - "host": '172.17.0.2', - "port": 3000, - "namespace": "test", - } - cls.handler = AerospikeHandler('test_aerospike_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM house_rentals" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_get_columns(self): - columns = self.handler.get_columns('house_rentals') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/airtable_handler/README.md b/mindsdb/integrations/handlers/airtable_handler/README.md deleted file mode 100644 index 37cea71d36d..00000000000 --- a/mindsdb/integrations/handlers/airtable_handler/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# Airtable Handler - -This is the implementation of the Airtable handler for MindsDB. - -## Airtable -In short, Airtable is a platform that makes it easy to build powerful, custom applications. These tools can streamline just about any process, workflow, or projectβ€”and best of all, you can build them without ever learning to write a single line of code. (Spoiler alert: that’s what low-code/no-code is all about.) Our customers use Airtable to do everything from tracking job interviews to managing large-scale video production, and thousands of companies use Airtable to run their most important business processes every day. -https://www.airtable.com/guides/start/what-is-airtable - -## Implementation -This handler was implemented using `duckdb`, a library that allows SQL queries to be executed on `pandas` DataFrames. - -In essence, when querying a particular table, the entire table will first be pulled into a `pandas` DataFrame using the Airtable API. Once this is done, SQL queries can be run on the DataFrame using `duckdb`. - -Note: Since the entire table needs to be pulled into memory first (DataFrame), it is recommended to be somewhat careful when querying large tables so as not to overload your machine. - -The documentation for the Airtable API is available here, -
-https://airtable.com/api - -The required arguments to establish a connection are, -* `base_id`: the Airtable base ID -* `table_name`: the Airtable table name -* `api_key`: the API key for the Airtable API - -## Usage -In order to make use of this handler and connect to an Access database in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE airtable_datasource -WITH -engine='airtable', -parameters={ - "base_id": "dqweqweqrwwqq", - "table_name": "iris", - "api_key": "knlsndlknslk" -}; -~~~~ - -Now, you can use this established connection to query your table as follows, -~~~~sql -SELECT * FROM airtable_datasource.example_tbl -~~~~ - -At the moment, only `SELECT` queries are allowed to be executed through `duckdb`. This, however, has no restriction on running machine learning algorithms against your data in Airtable using `CREATE PREDICTOR` statements. \ No newline at end of file diff --git a/mindsdb/integrations/handlers/airtable_handler/__about__.py b/mindsdb/integrations/handlers/airtable_handler/__about__.py deleted file mode 100644 index 5282e3dd0f5..00000000000 --- a/mindsdb/integrations/handlers/airtable_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Airtable handler' -__package_name__ = 'mindsdb_airtable_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Airtable" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/airtable_handler/__init__.py b/mindsdb/integrations/handlers/airtable_handler/__init__.py deleted file mode 100644 index 99438e5556e..00000000000 --- a/mindsdb/integrations/handlers/airtable_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE -from .__about__ import __version__ as version, __description__ as description - -from .connection_args import connection_args, connection_args_example -try: - from .airtable_handler import AirtableHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Airtable' -name = 'airtable' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/airtable_handler/airtable_handler.py b/mindsdb/integrations/handlers/airtable_handler/airtable_handler.py deleted file mode 100644 index 89526a87ec2..00000000000 --- a/mindsdb/integrations/handlers/airtable_handler/airtable_handler.py +++ /dev/null @@ -1,211 +0,0 @@ -from typing import Optional - -import pandas as pd -import requests -import duckdb - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -logger = log.getLogger(__name__) - - -class AirtableHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Airtable statements. - """ - - name = 'airtable' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = 'airtable' - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - if self.is_connected is True: - return self.connection - - url = f"https://api.airtable.com/v0/{self.connection_data['base_id']}/{self.connection_data['table_name']}" - headers = {"Authorization": "Bearer " + self.connection_data['api_key']} - - response = requests.get(url, headers=headers) - response = response.json() - records = response['records'] - - new_records = True - while new_records: - try: - if response['offset']: - params = {"offset": response['offset']} - response = requests.get(url, params=params, headers=headers) - response = response.json() - - new_records = response['records'] - records = records + new_records - except Exception: - new_records = False - - rows = [record['fields'] for record in records] - globals()[self.connection_data['table_name']] = pd.DataFrame(rows) - - self.connection = duckdb.connect() - self.is_connected = True - - return self.connection - - def disconnect(self): - """ - Close any existing connections. - """ - - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return self.is_connected - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to Airtable base {self.connection_data["base_id"]}, {e}!') - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - cursor = connection.cursor() - try: - cursor.execute(query) - result = cursor.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, - columns=[x[0] for x in cursor.description] - ) - ) - - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except Exception as e: - logger.error(f'Error running query: {query} on table {self.connection_data["table_name"]} in base {self.connection_data["base_id"]}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - - return self.native_query(query.to_string()) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - [self.connection_data['table_name']], - columns=['table_name'] - ) - ) - - return response - - def get_columns(self) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - { - 'column_name': list(globals()[self.connection_data['table_name']].columns), - 'data_type': globals()[self.connection_data['table_name']].dtypes - } - ) - ) - - return response diff --git a/mindsdb/integrations/handlers/airtable_handler/connection_args.py b/mindsdb/integrations/handlers/airtable_handler/connection_args.py deleted file mode 100644 index 96f49f75038..00000000000 --- a/mindsdb/integrations/handlers/airtable_handler/connection_args.py +++ /dev/null @@ -1,26 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - base_id={ - 'type': ARG_TYPE.STR, - 'description': 'The Airtable base ID.' - }, - table_name={ - 'type': ARG_TYPE.STR, - 'description': 'The Airtable table name.' - }, - api_key={ - 'type': ARG_TYPE.STR, - 'description': 'The API key for the Airtable API.', - 'secret': True - } -) - -connection_args_example = OrderedDict( - base_id='dqweqweqrwwqq', - table_name='iris', - api_key='knlsndlknslk' -) diff --git a/mindsdb/integrations/handlers/airtable_handler/icon.svg b/mindsdb/integrations/handlers/airtable_handler/icon.svg deleted file mode 100644 index 4b441b7a38a..00000000000 --- a/mindsdb/integrations/handlers/airtable_handler/icon.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/airtable_handler/tests/test_airtable_handler.py b/mindsdb/integrations/handlers/airtable_handler/tests/test_airtable_handler.py deleted file mode 100644 index 15832056b87..00000000000 --- a/mindsdb/integrations/handlers/airtable_handler/tests/test_airtable_handler.py +++ /dev/null @@ -1,34 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.airtable_handler.airtable_handler import AirtableHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class AirtableHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "base_id": "dqweqweqrwwqq", - "table_name": "iris", - "api_key": "knlsndlknslk" - } - cls.handler = AirtableHandler('test_airtable_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM iris" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_get_columns(self): - columns = self.handler.get_columns() - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/altibase_handler/README.md b/mindsdb/integrations/handlers/altibase_handler/README.md deleted file mode 100644 index 2b728f2ab83..00000000000 --- a/mindsdb/integrations/handlers/altibase_handler/README.md +++ /dev/null @@ -1,64 +0,0 @@ -# Altibase Handler - -This is the implementation of the Altibase handler for MindsDB. - -## Altibase -ALTIBASE is a hybrid database, relational open source database management system manufactured by The Altibase Corporation. The software comes with a hybrid architecture which allows it to access both memory-resident and disk-resident tables using single interface. - -## Implementation -This handler was implemented using the JDBC drivers provided by Altibase. To establish connection with the database, `JayDeBeApi` library is used. The `JayDeBeApi` module allows you to connect from Python code to databases using Java JDBC. -It is also possible to connect to the database using the ODBC driver. `pyodbc` library is used to connect to the database. - -### The required arguments to establish a connection are: -* `host`: host to server IP Address or hostname -* `port`: port through which TCPIP connection is to be made -* `database`: Database name to be connected -* `jdbc_class`: Java class name of the JDBC driver -### The optional arguments to establish a connection are: -* `user`: username asscociated with database -* `password`: password to authenticate your access -* `jar_location`: Jar filename for the JDBC driver -* `dsn`: datasource name of the Altibase server. Use dsn if you want to use an ODBC connection. - - -## Usage -In order to make use of this handler and connect to Altibase in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE altibase_datasource -WITH -engine='Altibase', -parameters={ - "user":"sys", - "password":"manager", - "host":"127.0.0.1", - "port":20300, - "database":"mydb" -}; -~~~~ - -~~~~sql -CREATE DATABASE altibase_datasource -WITH -engine='Altibase', -parameters={ - "dsn":"altiodbc" -}; -~~~~ - -~~~~sql -CREATE DATABASE altibase_datasource -WITH -engine='Altibase', -parameters={ - "dsn":"altiodbc", - "user":"sys", - "password":"manager", - "host":"127.0.0.1", - "port":20300 -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM altibase_datasource.test; -~~~~ diff --git a/mindsdb/integrations/handlers/altibase_handler/__about__.py b/mindsdb/integrations/handlers/altibase_handler/__about__.py deleted file mode 100644 index f53d5225a54..00000000000 --- a/mindsdb/integrations/handlers/altibase_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Altibase handler' -__package_name__ = 'mindsdb_altibase_handler' -__version__ = '0.0.2' -__description__ = "MindsDB handler for Altibase" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/altibase_handler/__init__.py b/mindsdb/integrations/handlers/altibase_handler/__init__.py deleted file mode 100644 index b74a7f5403d..00000000000 --- a/mindsdb/integrations/handlers/altibase_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .altibase_handler import AltibaseHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Altibase' -name = 'altibase' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'icon_path', 'title', - 'description', 'connection_args', 'connection_args_example', 'import_error' -] diff --git a/mindsdb/integrations/handlers/altibase_handler/altibase_handler.py b/mindsdb/integrations/handlers/altibase_handler/altibase_handler.py deleted file mode 100644 index fd3bc0670b5..00000000000 --- a/mindsdb/integrations/handlers/altibase_handler/altibase_handler.py +++ /dev/null @@ -1,252 +0,0 @@ -from typing import Optional - -import jaydebeapi as jdbcconnector -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast.base import ASTNode -import pandas as pd -import pyodbc -import numpy as np - -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class AltibaseHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Altibase statements. - """ - - name = 'altibase' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ constructor - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - """ - super().__init__(name) - - self.parser = parse_sql - - self.connection_args = connection_data - self.database = self.connection_args.get('database') - self.host = self.connection_args.get('host') - self.port = self.connection_args.get('port') - self.user = self.connection_args.get('user') - self.password = self.connection_args.get('password') - self.dsn = self.connection_args.get('dsn') - - self.connection = None - self.is_connected = False - - def connect(self): - """ Set up any connections required by the handler - Should return output of check_connection() method after attempting connection. - Should switch self.is_connected. - Returns: - connection - """ - if self.is_connected is True: - return self.connection - - if self.dsn: - return self.connect_with_odbc() - else: - return self.connect_with_jdbc() - - def connect_with_odbc(self): - """ Set up any connections required by the handler - Should return output of check_connection() method after attempting connection. - Should switch self.is_connected. - Returns: - connection - """ - conn_str = [f"DSN={self.dsn}"] - - if self.host: - conn_str.append(f"Server={self.host}") - if self.port: - conn_str.append(f"Port={self.port}") - if self.user: - conn_str.append(f"User={self.user}") - if self.password: - conn_str.append(f"Password={self.password}") - - conn_str = ';'.join(conn_str) - - try: - self.connection = pyodbc.connect(conn_str, timeout=10) - self.is_connected = True - except Exception as e: - logger.error(f"Error while connecting to {self.database}, {e}") - - return self.connection - - def connect_with_jdbc(self): - """ Set up any connections required by the handler - Should return output of check_connection() method after attempting connection. - Should switch self.is_connected. - Returns: - connection - """ - jar_location = self.connection_args.get('jar_location') - - jdbc_class = self.connection_args.get('jdbc_class', 'Altibase.jdbc.driver.AltibaseDriver') - jdbc_url = f"jdbc:Altibase://{self.host}:{self.port}/{self.database}" - - try: - if self.user and self.password and jar_location: - connection = jdbcconnector.connect(jclassname=jdbc_class, url=jdbc_url, driver_args=[self.user, self.password], jars=str(jar_location).split(",")) - elif self.user and self.password: - connection = jdbcconnector.connect(jclassname=jdbc_class, url=jdbc_url, driver_args=[self.user, self.password]) - elif jar_location: - connection = jdbcconnector.connect(jclassname=jdbc_class, url=jdbc_url, jars=jar_location.split(",")) - else: - connection = jdbcconnector.connect(jclassname=jdbc_class, url=jdbc_url) - - self.connection = connection - self.is_connected = True - except Exception as e: - logger.error(f"Error while connecting to {self.database}, {e}") - raise e - - return self.connection - - def disconnect(self): - """ Close any existing connections - Should switch self.is_connected. - """ - if self.is_connected is True: - try: - self.connection.close() - self.is_connected = False - except Exception as e: - logger.error(f"Error while disconnecting to {self.database}, {e}") - return False - return True - - def check_connection(self) -> StatusResponse: - """ Check connection to the handler - Returns: - HandlerStatusResponse - """ - responseCode = StatusResponse(success=False) - need_to_close = self.is_connected is False - - try: - self.connect() - responseCode.success = True - except Exception as e: - logger.error(f'Error connecting to database {self.database}, {e}!') - responseCode.error_message = str(e) - finally: - if responseCode.success and need_to_close: - self.disconnect() - if not responseCode.success and self.is_connected: - self.is_connected = False - - return responseCode - - def native_query(self, query: str) -> Response: - """Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - need_to_close = self.is_connected is False - connection = self.connect() - with connection.cursor() as cur: - try: - cur.execute(query) - if cur.description: - result = cur.fetchall() - - if self.dsn: - if len(result) > 0: - result = np.array(result) - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, - columns=[x[0] for x in cur.description] - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except Exception as e: - logger.error(f'Error running query: {query} on {self.database}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - connection.rollback() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind of query: SELECT, INSERT, DELETE, etc - Returns: - HandlerResponse - """ - if isinstance(query, ASTNode): - query_str = query.to_string() - else: - query_str = str(query) - - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ Return list of entities - Return list of entities that will be accesible as tables. - Returns: - HandlerResponse - """ - query = ''' - SELECT - TABLE_NAME, - TABLE_ID, - TABLE_TYPE - FROM - system_.sys_tables_ - WHERE - user_id = USER_ID(); - ''' - - return self.native_query(query) - - def get_columns(self, table_name: str) -> Response: - """ Returns a list of entity columns - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - query = f""" - SELECT - COLUMN_NAME, - DATA_TYPE - FROM - system_.sys_columns_ ct - inner join - system_.sys_tables_ tt - on ct.table_id=tt.table_id - where - tt.table_name = '{table_name.capitalize()}'; - """ - - return self.native_query(query) diff --git a/mindsdb/integrations/handlers/altibase_handler/connection_args.py b/mindsdb/integrations/handlers/altibase_handler/connection_args.py deleted file mode 100644 index ba3d900774f..00000000000 --- a/mindsdb/integrations/handlers/altibase_handler/connection_args.py +++ /dev/null @@ -1,50 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Altibase server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the Altibase server. Must be an integer.' - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Altibase server.' - }, - password={ - 'type': ARG_TYPE.STR, - 'description': 'The password to authenticate the user with the Altibase server.', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the Altibase server.' - }, - jdbc_class={ - 'type': ARG_TYPE.STR, - 'description': 'The driver class of the Altibase JDBC driver' - }, - jar_location={ - 'type': ARG_TYPE.PATH, - 'description': 'The location of the Altibase JDBC driver jar file' - }, - dsn={ - 'type': ARG_TYPE.STR, - 'description': 'Datasource name of the Altibase server. NOTE: use dsn if you want to use an ODBC connection.' - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=20300, - user='sys', - password='manager', - database='mydb', - jdbc_class='Altibase.jdbc.driver.AltibaseDriver', - jar_location='/data/altibase_home/lib/Altibase.jar' -) diff --git a/mindsdb/integrations/handlers/altibase_handler/icon.svg b/mindsdb/integrations/handlers/altibase_handler/icon.svg deleted file mode 100644 index f00226a8577..00000000000 --- a/mindsdb/integrations/handlers/altibase_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/altibase_handler/requirements.txt b/mindsdb/integrations/handlers/altibase_handler/requirements.txt deleted file mode 100644 index 97f1e8c0fd7..00000000000 --- a/mindsdb/integrations/handlers/altibase_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -jaydebeapi -pyodbc diff --git a/mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler.py b/mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler.py deleted file mode 100644 index 17acc233d8a..00000000000 --- a/mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler.py +++ /dev/null @@ -1,67 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.altibase_handler.altibase_handler import AltibaseHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class AltibaseHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "127.0.0.1", - "port": 20300, - "database": "mydb", - "user": "sys", - "password": "manager" - } - } - cls.handler = AltibaseHandler('test_altibase_handler', **cls.kwargs) - - def test_0_connect(self): - assert self.handler.connect() - - def test_1_drop_table(self): - # Not supported 'IF EXISTS' syntax - res = self.handler.query("DROP TABLE TEST_TABLE") - assert res.type is RESPONSE_TYPE.OK - - def test_2_create_table(self): - res = self.handler.query( - '''CREATE TABLE TEST_TABLE ( - ID INT PRIMARY KEY, - NAME VARCHAR(14) - )''' - ) - assert res.type is RESPONSE_TYPE.OK - - def test_3_insert(self): - res = self.handler.query( - """INSERT INTO TEST_TABLE - VALUES - (100,'ONE HUNDRED'), - (200,'TWO HUNDRED'), - (300,'THREE HUNDRED')""" - ) - assert res.type is RESPONSE_TYPE.OK - - def test_4_select(self): - res = self.handler.query('SELECT * FROM TEST_TABLE') - assert res.type is RESPONSE_TYPE.TABLE - - def test_5_check_connection(self): - assert self.handler.check_connection() - - def test_6_get_tables(self): - res = self.handler.get_tables() - assert res.type is RESPONSE_TYPE.TABLE - - def test_7_get_columns(self): - res = self.handler.get_columns("TEST_TABLE") - assert res.type is RESPONSE_TYPE.TABLE - - def test_8_disconnect(self): - assert self.handler.disconnect() - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler_dsn.py b/mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler_dsn.py deleted file mode 100644 index a4deb8a707b..00000000000 --- a/mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler_dsn.py +++ /dev/null @@ -1,68 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.altibase_handler.altibase_handler import AltibaseHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class AltibaseHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "dsn": "altiodbc", - # Override - # "host": "127.0.0.1", - # "port": 20300, - # "user": "sys", - # "password": "manager" - } - } - cls.handler = AltibaseHandler('test_altibase_handler', **cls.kwargs) - - def test_0_connect(self): - assert self.handler.connect() - - def test_1_drop_table(self): - # Not supported 'IF EXISTS' syntax - res = self.handler.query("DROP TABLE TEST_TABLE") - assert res.type is RESPONSE_TYPE.OK - - def test_2_create_table(self): - res = self.handler.query( - '''CREATE TABLE TEST_TABLE ( - ID INT PRIMARY KEY, - NAME VARCHAR(14) - )''' - ) - assert res.type is RESPONSE_TYPE.OK - - def test_3_insert(self): - res = self.handler.query( - """INSERT INTO TEST_TABLE - VALUES - (100,'ONE HUNDRED'), - (200,'TWO HUNDRED'), - (300,'THREE HUNDRED')""" - ) - assert res.type is RESPONSE_TYPE.OK - - def test_4_select(self): - res = self.handler.query('SELECT * FROM TEST_TABLE') - assert res.type is RESPONSE_TYPE.TABLE - - def test_5_check_connection(self): - assert self.handler.check_connection() - - def test_6_get_tables(self): - res = self.handler.get_tables() - assert res.type is RESPONSE_TYPE.TABLE - - def test_7_get_columns(self): - res = self.handler.get_columns("TEST_TABLE") - assert res.type is RESPONSE_TYPE.TABLE - - def test_8_disconnect(self): - assert self.handler.disconnect() - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/anthropic_handler/__init__.py b/mindsdb/integrations/handlers/anthropic_handler/__init__.py index acbe4c9b95e..c49e22ffb50 100644 --- a/mindsdb/integrations/handlers/anthropic_handler/__init__.py +++ b/mindsdb/integrations/handlers/anthropic_handler/__init__.py @@ -1,6 +1,6 @@ from .__about__ import __version__ as version from .__about__ import __description__ as description -from mindsdb.integrations.libs.const import HANDLER_TYPE +from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL, HANDLER_TYPE from mindsdb.utilities import log logger = log.getLogger(__name__) @@ -17,6 +17,7 @@ title = "Anthropic" name = "anthropic" type = HANDLER_TYPE.ML -icon_path = 'icon.svg' +icon_path = "icon.svg" +support_level = HANDLER_SUPPORT_LEVEL.MINDSDB permanent = False -__all__ = ["Handler", "version", "name", "type", "title", "description", "import_error", "icon_path"] +__all__ = ["Handler", "version", "name", "type", "title", "description", "import_error", "icon_path", "support_level"] diff --git a/tests/unused/unit/ml_handlers/test_anthropic.py b/mindsdb/integrations/handlers/anthropic_handler/tests/test_anthropic.py similarity index 83% rename from tests/unused/unit/ml_handlers/test_anthropic.py rename to mindsdb/integrations/handlers/anthropic_handler/tests/test_anthropic.py index 020e1d13e57..1594b79ee71 100644 --- a/tests/unused/unit/ml_handlers/test_anthropic.py +++ b/mindsdb/integrations/handlers/anthropic_handler/tests/test_anthropic.py @@ -6,7 +6,7 @@ from .base_ml_test import BaseMLAPITest -@pytest.mark.skipif(os.environ.get('ANTHROPIC_API_KEY') is None, reason='Missing API key!') +@pytest.mark.skipif(os.environ.get("ANTHROPIC_API_KEY") is None, reason="Missing API key!") class TestAnthropic(BaseMLAPITest): """Test Class for Anthropic Integration Testing""" @@ -19,7 +19,7 @@ def setup_method(self): CREATE ML_ENGINE anthropic FROM anthropic USING - anthropic_api_key = '{self.get_api_key('ANTHROPIC_API_KEY')}'; + anthropic_api_key = '{self.get_api_key("ANTHROPIC_API_KEY")}'; """ ) @@ -33,7 +33,7 @@ def test_invalid_model_parameter(self): engine='anthropic', column='question', model='this-claude-does-not-exist', - api_key='{self.get_api_key('ANTHROPIC_API_KEY')}'; + api_key='{self.get_api_key("ANTHROPIC_API_KEY")}'; """ ) with pytest.raises(Exception): @@ -48,7 +48,7 @@ def test_unknown_model_argument(self): USING engine='anthropic', column='question', - api_key='{self.get_api_key('ANTHROPIC_API_KEY')}', + api_key='{self.get_api_key("ANTHROPIC_API_KEY")}', evidently_wrong_argument='wrong value'; """ ) @@ -64,7 +64,7 @@ def test_single_qa(self): USING engine='anthropic', column='question', - api_key='{self.get_api_key('ANTHROPIC_API_KEY')}'; + api_key='{self.get_api_key("ANTHROPIC_API_KEY")}'; """ ) self.wait_predictor("proj", "test_anthropic_single_qa") @@ -81,10 +81,9 @@ def test_single_qa(self): @patch("mindsdb.integrations.handlers.postgres_handler.Handler") def test_bulk_qa(self, mock_handler): """Test for bulk question/answer pairs""" - df = pd.DataFrame.from_dict({"question": [ - "What is the capital of Sweden?", - "What is the second planet of the solar system?" - ]}) + df = pd.DataFrame.from_dict( + {"question": ["What is the capital of Sweden?", "What is the second planet of the solar system?"]} + ) self.set_handler(mock_handler, name="pg", tables={"df": df}) self.run_sql( @@ -94,7 +93,7 @@ def test_bulk_qa(self, mock_handler): USING engine='anthropic', column='question', - api_key='{self.get_api_key('ANTHROPIC_API_KEY')}'; + api_key='{self.get_api_key("ANTHROPIC_API_KEY")}'; """ ) self.wait_predictor("proj", "test_anthropic_bulk_qa") diff --git a/mindsdb/integrations/handlers/apache_doris_handler/README.md b/mindsdb/integrations/handlers/apache_doris_handler/README.md deleted file mode 100644 index 49978558f37..00000000000 --- a/mindsdb/integrations/handlers/apache_doris_handler/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# Apache Doris Handler - -This is the implementation of the Apache Doris for MindsDB. - -## Apache Doris - -Apache Doris is a new-generation open-source real-time data warehouse based on MPP architecture, with easier use and higher performance for big data analytics. - -## Implementation - -Since Doris uses MySQL dilect of SQL, this handler is implemented using the `mysql-connector-python` library. - -The required arguments to establish a connection are as follows: - -* `user` is the database user. -* `password` is the database password. -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `database` is the database name. - -There are several optional arguments that can be used as well. - -* `ssl` is the `ssl` parameter value that indicates whether SSL is enabled (`True`) or disabled (`False`). -* `ssl_ca` is the SSL Certificate Authority. -* `ssl_cert` stores SSL certificates. -* `ssl_key` stores SSL keys. - -## Usage - -### Database connection - -In order to make use of this handler and connect to the Doris database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE doris_datasource -WITH - ENGINE = 'apache_doris', - PARAMETERS = { - "host": "127.0.0.1", - "port": 9030, - "database": "testdb", - "user": "root", - "password": "password" -}; -``` - -### Queries - -You can use this established connection to query your table just like you would a normal MySQL server. - -```sql -SELECT * FROM mysql_datasource.example_table; -``` diff --git a/mindsdb/integrations/handlers/apache_doris_handler/__about__.py b/mindsdb/integrations/handlers/apache_doris_handler/__about__.py deleted file mode 100644 index bfe9fba5f13..00000000000 --- a/mindsdb/integrations/handlers/apache_doris_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Apache Doris handler' -__package_name__ = 'mindsdb_apache_doris_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Apache Doris" -__author__ = 'Aditya Azad' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/apache_doris_handler/__init__.py b/mindsdb/integrations/handlers/apache_doris_handler/__init__.py deleted file mode 100644 index 54a760db3a1..00000000000 --- a/mindsdb/integrations/handlers/apache_doris_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -try: - from .apache_doris_handler import ApacheDorisHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = 'Apache Doris' -name = 'apache_doris' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/apache_doris_handler/apache_doris_handler.py b/mindsdb/integrations/handlers/apache_doris_handler/apache_doris_handler.py deleted file mode 100644 index 20edd4f7a5c..00000000000 --- a/mindsdb/integrations/handlers/apache_doris_handler/apache_doris_handler.py +++ /dev/null @@ -1,10 +0,0 @@ -from mindsdb.integrations.handlers.mysql_handler import Handler as MySQLHandler - - -class ApacheDorisHandler(MySQLHandler): - """This handler handles connection and execution of the Apache Doris statements.""" - - name = 'apache_doris' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/apache_doris_handler/icon.svg b/mindsdb/integrations/handlers/apache_doris_handler/icon.svg deleted file mode 100644 index ff3cc1cd80a..00000000000 --- a/mindsdb/integrations/handlers/apache_doris_handler/icon.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/apache_doris_handler/requirements.txt b/mindsdb/integrations/handlers/apache_doris_handler/requirements.txt deleted file mode 100644 index ee467569031..00000000000 --- a/mindsdb/integrations/handlers/apache_doris_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/mysql_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/aqicn_handler/README.md b/mindsdb/integrations/handlers/aqicn_handler/README.md deleted file mode 100644 index 33cd418dcc3..00000000000 --- a/mindsdb/integrations/handlers/aqicn_handler/README.md +++ /dev/null @@ -1,90 +0,0 @@ -# World Air Quality Index Handler - -World Air Quality Index handler for MindsDB provides interfaces to connect to [World Air Quality Index](https://aqicn.org) via APIs and pull repository data into MindsDB. - ---- - -## Table of Contents - -- [World Air Quality Index Handler](#world-air-quality-index-handler) - - [Table of Contents](#table-of-contents) - - [About World Air Quality Index Handler](#about-world-air-quality-index-handler) - - [World Air Quality Index Handler Implementation](#world-air-quality-index-handler-implementation) - - [World Air Quality Index Handler Initialization](#world-air-quality-index-handler-initialization) - - [Implemented Features](#implemented-features) - - [Example Usage](#example-usage) - ---- - -## About World Air Quality Index Handler - -The World Air Quality Index project is a non-profit project started in 2007. Its mission is to promote air pollution awareness for citizens and provide a unified and world-wide air quality information. - -The project is providing transparent air quality information for more than 130 countries, covering more than 30,000 stations in 2000 major cities, via those two websites: aqicn.org and waqi.info - - -## World Air Quality Index Handler Implementation - -This handler was implemented using the `requests` library that makes http calls to https://aqicn.org/json-api/doc/ - -## World Air Quality Index Handler Initialization - -The World Air Quality Index handler is initialized with the following parameters: - -- `api_key`: API key to interact with aqicn - -Read about creating an account [here](https://aqicn.org/api/). - -## Implemented Features - -- [x] World Air Quality Index data for lattitude and longitude - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - - -## Example Usage - -The first step is to create a database with the new `aqicn` engine. - -~~~~sql -CREATE DATABASE mindsdb_aqicn -WITH ENGINE = 'aqicn', -PARAMETERS = { - "api_key": "api_key" -}; -~~~~ - -Use the established connection to query your database: - -To get air quality metrics based on your location: - -~~~~sql -SELECT * FROM mindsdb_aqicn.air_quality_user_location; -~~~~ - -To get air quality metrics based on city: - -~~~~sql -SELECT * FROM mindsdb_aqicn.air_quality_city where city="Bangalore"; -~~~~ - -The `city` column is mandatory in the above query. - -To get air quality metrics based on coordinates: - -~~~~sql -SELECT * FROM mindsdb_aqicn.air_quality_lat_lng where lat="12.938539" AND lng="77.5901"; -~~~~ - -The `lat` and `lng` columns are mandatory in the above query. - -To get air quality metrics based on station name: - -~~~~sql -SELECT * FROM mindsdb_aqicn.air_quality_station_by_name where name="bangalore"; -~~~~ - -The `name` column is mandatory in the above query. diff --git a/mindsdb/integrations/handlers/aqicn_handler/__about__.py b/mindsdb/integrations/handlers/aqicn_handler/__about__.py deleted file mode 100644 index 610bf0c9934..00000000000 --- a/mindsdb/integrations/handlers/aqicn_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB World Air Quality Index handler" -__package_name__ = "mindsdb_aqicn_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for World Air Quality Index" -__author__ = "Abhilash K R" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/aqicn_handler/__init__.py b/mindsdb/integrations/handlers/aqicn_handler/__init__.py deleted file mode 100644 index de5fa0ae01a..00000000000 --- a/mindsdb/integrations/handlers/aqicn_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .aqicn_handler import AQICNHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Air Quality Index API" -name = "aqicn" -type = HANDLER_TYPE.DATA -icon_path = "icon.png" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", - "connection_args_example", - "connection_args", -] diff --git a/mindsdb/integrations/handlers/aqicn_handler/aqicn.py b/mindsdb/integrations/handlers/aqicn_handler/aqicn.py deleted file mode 100644 index d524c727314..00000000000 --- a/mindsdb/integrations/handlers/aqicn_handler/aqicn.py +++ /dev/null @@ -1,35 +0,0 @@ -import requests - - -class AQIClient: - def __init__(self, api_key): - self.api_key = api_key - self.params = {"token": api_key} - self.base_endpoint = "https://api.waqi.info/feed" - - def make_request(self, url, additionalParams={}): - newParams = {**self.params, **additionalParams} - resp = requests.get(url, params=newParams) - res = resp.json() - content = {} - if res["status"] == "ok": - content = {'content': resp.json(), 'code': 200} - else: - content = {'content': resp.json(), 'code': 404} - return content - - def air_quality_city(self, city): - url = f'{self.base_endpoint}/{city}/' - return self.make_request(url) - - def air_quality_lat_lng(self, lat, lng): - url = f'{self.base_endpoint}/geo:{lat};{lng}/' - return self.make_request(url) - - def air_quality_user_location(self): - url = f'{self.base_endpoint}/here/' - return self.make_request(url) - - def air_quality_station_by_name(self, name): - url = 'https://api.waqi.info/search/' - return self.make_request(url, {"keyword": name}) diff --git a/mindsdb/integrations/handlers/aqicn_handler/aqicn_handler.py b/mindsdb/integrations/handlers/aqicn_handler/aqicn_handler.py deleted file mode 100644 index 40f9b4ddbbf..00000000000 --- a/mindsdb/integrations/handlers/aqicn_handler/aqicn_handler.py +++ /dev/null @@ -1,97 +0,0 @@ -from mindsdb_sql_parser import parse_sql - -from mindsdb.integrations.handlers.aqicn_handler.aqicn_tables import ( - AQByUserLocationTable, - AQByCityTable, - AQByLatLngTable, - AQByNetworkStationTable -) -from mindsdb.integrations.handlers.aqicn_handler.aqicn import AQIClient -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class AQICNHandler(APIHandler): - """The World Air Quality handler implementation""" - - def __init__(self, name: str, **kwargs): - """Initialize the aqicn handler. - - Parameters - ---------- - name : str - name of a handler instance - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.kwargs = kwargs - self.aqicn_client = None - self.is_connected = False - - ai_user_location_data = AQByUserLocationTable(self) - self._register_table("air_quality_user_location", ai_user_location_data) - - ai_city_data = AQByCityTable(self) - self._register_table("air_quality_city", ai_city_data) - - ai_lat_lng_data = AQByLatLngTable(self) - self._register_table("air_quality_lat_lng", ai_lat_lng_data) - - aq_network_station_data = AQByNetworkStationTable(self) - self._register_table("air_quality_station_by_name", aq_network_station_data) - - def connect(self) -> StatusResponse: - """Set up the connection required by the handler. - - Returns - ------- - StatusResponse - connection object - """ - resp = StatusResponse(False) - self.aqicn_client = AQIClient(self.connection_data.get("api_key")) - content = self.aqicn_client.air_quality_user_location() - if content["code"] != 200: - resp.success = False - resp.error_message = content["content"]["data"] - self.is_connected = False - return resp - self.is_connected = True - resp.success = True - return resp - - def check_connection(self) -> StatusResponse: - """Check connection to the handler. - - Returns - ------- - StatusResponse - Status confirmation - """ - response = self.connect() - self.is_connected = response.success - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - - Parameters - ---------- - query : str - query in a native format - - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/aqicn_handler/aqicn_tables.py b/mindsdb/integrations/handlers/aqicn_handler/aqicn_tables.py deleted file mode 100644 index 01c9f029b52..00000000000 --- a/mindsdb/integrations/handlers/aqicn_handler/aqicn_tables.py +++ /dev/null @@ -1,425 +0,0 @@ -import pandas as pd -from typing import List -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor -from mindsdb.utilities import log -from mindsdb_sql_parser import ast - -logger = log.getLogger(__name__) - - -class AQByUserLocationTable(APITable): - """The Air Quality By User Location Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://aqicn.org/json-api/doc/#api-Geolocalized_Feed-GetHereFeed" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Air Quality Data - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'air_quality_user_location', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.aqicn_client.air_quality_user_location() - - self.check_res(res=response) - - df = pd.json_normalize(response["content"]) - - select_statement_executor = SELECTQueryExecutor( - df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["content"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "status", - "data.aqi", - "data.idx", - "data.attributions", - "data.city.geo", - "data.city.name", - "data.city.url", - "data.city.location", - "data.dominentpol", - "data.iaqi.co.v", - "data.iaqi.dew.v", - "data.iaqi.h.v", - "data.iaqi.no2.v", - "data.iaqi.p.v", - "data.iaqi.pm10.v", - "data.iaqi.so2.v", - "data.iaqi.t.v", - "data.iaqi.w.v", - "data.time.s", - "data.time.tz", - "data.time.v", - "data.time.iso", - "data.forecast.daily.o3", - "data.forecast.daily.pm10", - "data.forecast.daily.pm25", - "data.debug.sync" - ] - - -class AQByCityTable(APITable): - """The Air Quality By City Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://aqicn.org/json-api/doc/#api-City_Feed-GetCityFeed" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Air Quality Data - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'air_quality_city', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'city': - if op == '=': - search_params["city"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for city column.") - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("city" in search_params) - - if not filter_flag: - raise NotImplementedError("city column has to be present in where clause.") - - df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.aqicn_client.air_quality_city(search_params["city"]) - - self.check_res(res=response) - - df = pd.json_normalize(response["content"]) - - select_statement_executor = SELECTQueryExecutor( - df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["content"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "status", - "data.aqi", - "data.idx", - "data.attributions", - "data.city.geo", - "data.city.name", - "data.city.url", - "data.city.location", - "data.dominentpol", - "data.iaqi.co.v", - "data.iaqi.dew.v", - "data.iaqi.h.v", - "data.iaqi.no2.v", - "data.iaqi.o3.v", - "data.iaqi.p.v", - "data.iaqi.pm10.v", - "data.iaqi.pm25.v", - "data.iaqi.so2.v", - "data.iaqi.t.v", - "data.iaqi.w.v", - "data.time.s", - "data.time.tz", - "data.time.v", - "data.time.iso", - "data.forecast.daily.o3", - "data.forecast.daily.pm10", - "data.forecast.daily.pm25", - "data.debug.sync" - ] - - -class AQByLatLngTable(APITable): - """The Air Quality By Lat Lng Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://aqicn.org/json-api/doc/#api-Geolocalized_Feed-GetGeolocFeed" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Air Quality Data - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'air_quality_lat_lng', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'lat': - if op == '=': - search_params["lat"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for lat column.") - if arg1 == 'lng': - if op == '=': - search_params["lng"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for lng column.") - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("lat" in search_params) and ("lng" in search_params) - - if not filter_flag: - raise NotImplementedError("lat and lng columns have to be present in where clause.") - - df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.aqicn_client.air_quality_lat_lng(search_params["lat"], search_params["lng"]) - - self.check_res(res=response) - - df = pd.json_normalize(response["content"]) - - select_statement_executor = SELECTQueryExecutor( - df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["content"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "status", - "data.aqi", - "data.idx", - "data.attributions", - "data.city.geo", - "data.city.name", - "data.city.url", - "data.city.location", - "data.dominentpol", - "data.iaqi.co.v", - "data.iaqi.dew.v", - "data.iaqi.h.v", - "data.iaqi.no2.v", - "data.iaqi.o3.v", - "data.iaqi.p.v", - "data.iaqi.pm10.v", - "data.iaqi.pm25.v", - "data.iaqi.so2.v", - "data.iaqi.t.v", - "data.iaqi.w.v", - "data.time.s", - "data.time.tz", - "data.time.v", - "data.time.iso", - "data.forecast.daily.o3", - "data.forecast.daily.pm10", - "data.forecast.daily.pm25", - "data.debug.sync" - ] - - -class AQByNetworkStationTable(APITable): - """The Air Quality By Network Station Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://aqicn.org/json-api/doc/#api-Search-SearchByName" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Air Quality Data - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'air_quality_station_by_name', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'name': - if op == '=': - search_params["name"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for name column.") - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("name" in search_params) - - if not filter_flag: - raise NotImplementedError("name column have to be present in where clause.") - - df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.aqicn_client.air_quality_station_by_name(search_params["name"]) - - self.check_res(res=response) - - df = pd.json_normalize(response["content"]["data"]) - - select_statement_executor = SELECTQueryExecutor( - df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["content"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - 'uid', - 'aqi', - 'time.tz', - 'time.stime', - 'time.vtime', - 'station.name', - 'station.geo', - 'station.url', - 'station.country' - ] diff --git a/mindsdb/integrations/handlers/aqicn_handler/connection_args.py b/mindsdb/integrations/handlers/aqicn_handler/connection_args.py deleted file mode 100644 index 0bf20a6ce3c..00000000000 --- a/mindsdb/integrations/handlers/aqicn_handler/connection_args.py +++ /dev/null @@ -1,18 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - api_key={ - "type": ARG_TYPE.STR, - "description": "API key", - "required": True, - "label": "api_key", - "secret": True - }, -) - -connection_args_example = OrderedDict( - api_key="api_key", -) diff --git a/mindsdb/integrations/handlers/aqicn_handler/icon.png b/mindsdb/integrations/handlers/aqicn_handler/icon.png deleted file mode 100644 index f40f3104403..00000000000 Binary files a/mindsdb/integrations/handlers/aqicn_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/athena_handler/README.md b/mindsdb/integrations/handlers/athena_handler/README.md deleted file mode 100644 index 2a072070bf3..00000000000 --- a/mindsdb/integrations/handlers/athena_handler/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# Amazon Athena Handler - -This is the implementation of the Athena handler for MindsDB. - -## Athena -Amazon Athena is an interactive query service that makes it easy to analyze data in Amazon S3 using standard SQL. -Athena is serverless, so there is no infrastructure to manage, and you pay only for the queries that you run. - -
-https://aws.amazon.com/athena/ - -## Implementation -This handler was implemented using the `boto3`, the AWS SDK for Python. - -The required arguments to establish a connection are, -* `aws_access_key_id`: the AWS access key -* `aws_secret_access_key`: the AWS secret access key -* `region_name`: the AWS region -* `catalog`: the Athena catalog name -* `database`: the Athena database name -* `workgroup`: the Athena workgroup name -* `results_output_location`: the S3 bucket location to store the query results -* `check_interval`: the interval to check the query status - -## Usage -To use this handler, you need to have an AWS account and an S3 bucket to store the query results. - -```sql -CREATE DATABASE athena_datasource -WITH -engine='athena', -parameters={ - 'aws_access_key_id': 'YOUR_AWS_ACCESS_KEY_ID', - 'aws_secret_access_key': 'YOUR_AWS_SECRET_ACCESS', - 'region_name': 'YOUR_AWS_REGION', - 'catalog': 'AwsDataCatalog', - 'database': 'YOUR_ATHENA_DATABASE', - 'workgroup': 'primary', - 'results_output_location': 's3://YOUR_S3_BUCKET_NAME/', - "check_interval": '0' -}; -``` - -Now, you can use this established connection to query Athena as follows, -```sql - SELECT * FROM athena_datasource.table_name; -``` diff --git a/mindsdb/integrations/handlers/athena_handler/__about__.py b/mindsdb/integrations/handlers/athena_handler/__about__.py deleted file mode 100644 index 9853c64fdf2..00000000000 --- a/mindsdb/integrations/handlers/athena_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Amazon Athena handler' -__package_name__ = 'mindsdb_athena_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Amazon Athena" -__author__ = 'Ryan Watts' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/athena_handler/__init__.py b/mindsdb/integrations/handlers/athena_handler/__init__.py deleted file mode 100644 index afc7c5c8575..00000000000 --- a/mindsdb/integrations/handlers/athena_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .athena_handler import AthenaHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Amazon Athena' -name = 'athena' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/athena_handler/athena_handler.py b/mindsdb/integrations/handlers/athena_handler/athena_handler.py deleted file mode 100644 index 48767f49d5f..00000000000 --- a/mindsdb/integrations/handlers/athena_handler/athena_handler.py +++ /dev/null @@ -1,218 +0,0 @@ -import time -import pandas as pd -from boto3 import client -from typing import Optional - -from mindsdb_sql_parser import parse_sql -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -logger = log.getLogger(__name__) - - -class AthenaHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Athena statements. - """ - - name = 'athena' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = 'athena' - - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - if self.is_connected: - return StatusResponse(success=True) - - try: - self.connection = client( - 'athena', - aws_access_key_id=self.connection_data['aws_access_key_id'], - aws_secret_access_key=self.connection_data['aws_secret_access_key'], - region_name=self.connection_data['region_name'], - ) - self.is_connected = True - return StatusResponse(success=True) - except Exception as e: - logger.error(f'Failed to connect to Athena: {str(e)}') - return StatusResponse(success=False, error_message=str(e)) - - def disconnect(self): - """ - Close any existing connections. - """ - if self.is_connected: - self.connection = None - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - if self.is_connected: - return StatusResponse(success=True) - else: - return self.connect() - - def native_query(self, query: str) -> Response: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - need_to_close = not self.is_connected - self.connect() - - try: - response = self.connection.start_query_execution( - QueryString=query, - QueryExecutionContext={ - 'Database': self.connection_data['database'], - }, - ResultConfiguration={ - 'OutputLocation': self.connection_data['results_output_location'], - }, - WorkGroup=self.connection_data['workgroup'], - ) - query_execution_id = response['QueryExecutionId'] - status = self._wait_for_query_to_complete(query_execution_id) - if status == 'SUCCEEDED': - result = self.connection.get_query_results( - QueryExecutionId=query_execution_id - ) - df = self._parse_query_result(result) - response = Response(RESPONSE_TYPE.TABLE, data_frame=df) - else: - response = Response(RESPONSE_TYPE.ERROR, error_message='Query failed or was cancelled') - except Exception as e: - logger.error(f'Error executing query in Athena: {str(e)}') - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - if need_to_close: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INSERT, DELETE, etc - Returns: - HandlerResponse - """ - - return self.native_query(query.to_string()) - - def get_tables(self) -> Response: - """ - Return list of entities that will be accessible as tables. - Returns: - Response: A response object containing the list of tables and - """ - - query = """ - select - table_schema, - table_name, - table_type - from - information_schema.tables - where - table_schema not in ('information_schema') - and table_type in ('BASE TABLE', 'VIEW') - """ - return self.native_query(query) - - def get_columns(self, table_name: str) -> Response: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - Response: A response object containing the column details - Raises: - ValueError: If the 'table_name' is not a valid string. - """ - if not table_name or not isinstance(table_name, str): - raise ValueError("Invalid value for table name provided.") - - query = f""" - select - column_name as "Field", - data_type as "Type" - from - information_schema.columns - where - table_name = '{table_name}' - """ - return self.native_query(query) - - def _wait_for_query_to_complete(self, query_execution_id: str) -> str: - """ - Wait for the Athena query to complete. - Args: - query_execution_id (str): ID of the query to wait for - Returns: - str: Query execution status - """ - while True: - response = self.connection.get_query_execution(QueryExecutionId=query_execution_id) - status = response['QueryExecution']['Status']['State'] - if status in ['SUCCEEDED', 'FAILED', 'CANCELLED']: - return status - - check_interval = self.connection_data.get('check_interval', 0) - if isinstance(check_interval, str) and check_interval.strip().isdigit(): - check_interval = int(check_interval) - if check_interval > 0: - time.sleep(check_interval) - - def _parse_query_result(self, result: dict) -> pd.DataFrame: - """ - Parse the result of the Athena query into a DataFrame. - Args: - result: Result of the Athena query - Returns: - pd.DataFrame: Query result as a DataFrame - """ - - if not result or 'ResultSet' not in result or 'Rows' not in result['ResultSet']: - return pd.DataFrame() - - rows = result['ResultSet']['Rows'] - headers = [col['VarCharValue'] for col in rows[0]['Data']] - data = [[col.get('VarCharValue') for col in row['Data']] for row in rows[1:]] - return pd.DataFrame(data, columns=headers) diff --git a/mindsdb/integrations/handlers/athena_handler/connection_args.py b/mindsdb/integrations/handlers/athena_handler/connection_args.py deleted file mode 100644 index 8e91c932d2b..00000000000 --- a/mindsdb/integrations/handlers/athena_handler/connection_args.py +++ /dev/null @@ -1,51 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - aws_access_key_id={ - 'type': ARG_TYPE.STR, - 'description': 'The access key for the AWS account.' - }, - aws_secret_access_key={ - 'type': ARG_TYPE.STR, - 'description': 'The secret key for the AWS account.', - 'secret': True - }, - region_name={ - 'type': ARG_TYPE.STR, - 'description': 'The AWS region where the Athena tables are created.' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The name of the Athena database.' - }, - workgroup={ - 'type': ARG_TYPE.STR, - 'description': 'The Athena Workgroup' - }, - catalog={ - 'type': ARG_TYPE.STR, - 'description': 'The AWS Data Catalog' - }, - results_output_location={ - 'type': ARG_TYPE.STR, - 'description': 'The Athena Query Results Output Location s3://bucket-path/athena-query-results' - }, - check_interval={ - 'type': ARG_TYPE.INT, - 'description': 'The interval in seconds to check Athena for query results. Default is 0 seconds.' - } -) - -connection_args_example = OrderedDict( - aws_access_key_id='', - aws_secret_access_key='', - region_name='us-east-1', - catalog='AwsDataCatalog', - database='default', - workgroup='primary', - results_output_location='s3:///athena-query-results/', - check_interval=0 -) diff --git a/mindsdb/integrations/handlers/athena_handler/icon.svg b/mindsdb/integrations/handlers/athena_handler/icon.svg deleted file mode 100644 index e6d7c57efbc..00000000000 --- a/mindsdb/integrations/handlers/athena_handler/icon.svg +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/athena_handler/tests/__init__.py b/mindsdb/integrations/handlers/athena_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/athena_handler/tests/test_athena_handler.py b/mindsdb/integrations/handlers/athena_handler/tests/test_athena_handler.py deleted file mode 100644 index d4999f0a400..00000000000 --- a/mindsdb/integrations/handlers/athena_handler/tests/test_athena_handler.py +++ /dev/null @@ -1,85 +0,0 @@ -import unittest -from unittest.mock import patch, MagicMock, Mock -from collections import OrderedDict -from mindsdb.integrations.handlers.athena_handler.athena_handler import AthenaHandler - - -class CursorContextManager(Mock): - def __enter__(self): - return self - - def __exit__(self, *args): - pass - - description = [['a']] - - def fetchall(self): - return [[1]] - - -class AthenaHandlerTest(unittest.TestCase): - dummy_connection_data = OrderedDict( - aws_access_key_id='aws_access_key_id', - aws_secret_access_key='aws_secret_access_key', - region_name='us-east-1', - database='default', - workgroup='my_workgroup', - catalog='AwsDataCatalog', - results_output_location='s3://bucket-path/athena-query-results', - check_interval=0 - ) - - def setUp(self): - self.patcher = patch('boto3.client') - self.mock_client = self.patcher.start() - self.mock_client.return_value = MagicMock() - self.handler = AthenaHandler('athena', connection_data=self.dummy_connection_data) - - def tearDown(self): - self.patcher.stop() - - def test_connect_success(self): - connection = self.handler.connect() - self.assertIsNotNone(connection) - self.assertTrue(self.handler.is_connected) - - def test_get_columns(self): - self.handler.native_query = MagicMock() - - table_name = "mock_table" - self.handler.get_columns(table_name) - - expected_query = f""" - select - column_name as "Field", - data_type as "Type" - from - information_schema.columns - where - table_name = '{table_name}' - """ - - self.handler.native_query.assert_called_once_with(expected_query) - - def test_get_tables(self): - self.handler.native_query = MagicMock() - - self.handler.get_tables() - - expected_query = """ - select - table_schema, - table_name, - table_type - from - information_schema.tables - where - table_schema not in ('information_schema') - and table_type in ('BASE TABLE', 'VIEW') - """ - - self.handler.native_query.assert_called_once_with(expected_query) - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/aurora_handler/README.md b/mindsdb/integrations/handlers/aurora_handler/README.md deleted file mode 100644 index a4c43983a28..00000000000 --- a/mindsdb/integrations/handlers/aurora_handler/README.md +++ /dev/null @@ -1,58 +0,0 @@ -# Amazon Aurora Handler - -This is the implementation of the Amazon Aurora handler for MindsDB. - -## Amazon Aurora -Amazon Aurora (Aurora) is a fully managed relational database engine that's compatible with MySQL and PostgreSQL. -https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/CHAP_AuroraOverview.html - -## Implementation -This handler was implemented using the existing MindsDB handlers for MySQL and PostgreSQL. - -The required arguments to establish a connection are, -* `host`: the host name or IP address of the Amazon Aurora DB cluster. -* `port`: the TCP/IP port of the Amazon Aurora DB cluster. -* `user`: the username used to authenticate with the Amazon Aurora DB cluster. -* `password`: the password to authenticate the user with the Amazon Aurora DB cluster. -* `database`: the database name to use when connecting with the Amazon Aurora DB cluster. - -There are several optional arguments that can be used as well, -* `db_engine`: the database engine of the Amazon Aurora DB cluster. This can take one of two values: 'mysql' or 'postgresql'. This parameter is optional, but if it is not provided, `aws_access_key_id` and `aws_secret_access_key` parameters must be provided. -* `aws_access_key_id`: the access key for the AWS account. This parameter is optional and is only required to be provided if the `db_engine` parameter is not provided. -* `aws_secret_access_key`: the secret key for the AWS account. This parameter is optional and is only required to be provided if the `db_engine` parameter is not provided. - -## Usage -In order to make use of this handler and connect to an Amazon Aurora MySQL DB Cluster in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE aurora_mysql_datasource -WITH ENGINE = 'aurora', -PARAMETERS = { - "db_engine": "mysql", - "host": "mysqlcluster.cluster-123456789012.us-east-1.rds.amazonaws.com", - "port": 3306, - "user": "admin", - "password": "password", - "database": "example_db" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM aurora_mysql_datasource.example_tbl -~~~~ - -Similar commands can be used to establish a connection and query Amazon Aurora PostgreSQL DB Cluster, -~~~~sql -CREATE DATABASE aurora_postgres_datasource -WITH ENGINE = 'aurora', -PARAMETERS = { - "db_engine": "postgresql", - "host": "postgresmycluster.cluster-123456789012.us-east-1.rds.amazonaws.com", - "port": 5432, - "user": "postgres", - "password": "password", - "database": "example_db " -}; - -SELECT * FROM aurora_postgres_datasource.example_tbl -~~~~ \ No newline at end of file diff --git a/mindsdb/integrations/handlers/aurora_handler/__about__.py b/mindsdb/integrations/handlers/aurora_handler/__about__.py deleted file mode 100644 index 0b76310f331..00000000000 --- a/mindsdb/integrations/handlers/aurora_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Amazon Aurora handler' -__package_name__ = 'mindsdb_aurora_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Amazon Aurora" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/aurora_handler/__init__.py b/mindsdb/integrations/handlers/aurora_handler/__init__.py deleted file mode 100644 index 0aeaa04871c..00000000000 --- a/mindsdb/integrations/handlers/aurora_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .aurora_handler import AuroraHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Amazon Aurora' -name = 'aurora' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/aurora_handler/aurora_handler.py b/mindsdb/integrations/handlers/aurora_handler/aurora_handler.py deleted file mode 100644 index 4a047e28b6d..00000000000 --- a/mindsdb/integrations/handlers/aurora_handler/aurora_handler.py +++ /dev/null @@ -1,141 +0,0 @@ -from typing import Optional - -import boto3 - -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse -) -from mindsdb.integrations.handlers.mysql_handler.mysql_handler import MySQLHandler -from mindsdb.integrations.handlers.postgres_handler.postgres_handler import PostgresHandler - -logger = log.getLogger(__name__) - - -class AuroraHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Amazon Aurora statements. - """ - - name = 'aurora' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - self.dialect = 'aurora' - self.connection_data = connection_data - self.kwargs = kwargs - - database_engine = "" - if 'db_engine' not in self.connection_data: - database_engine = self.get_database_engine() - - if self.connection_data['db_engine'] == 'mysql' or database_engine == 'aurora': - self.db = MySQLHandler( - name=name + 'mysql', - connection_data=self.connection_data - ) - elif self.connection_data['db_engine'] == 'postgresql' or database_engine == 'aurora-postgresql': - self.db = PostgresHandler( - name=name + 'postgresql', - connection_data={key: self.connection_data[key] for key in self.connection_data if key != 'db_engine'} - ) - else: - raise Exception("The database engine should be either MySQL or PostgreSQL!") - - def get_database_engine(self): - try: - session = boto3.session.Session( - aws_access_key_id=self.connection_data['aws_access_key_id'], - aws_secret_access_key=self.connection_data['aws_secret_access_key'] - ) - - rds = session.client('rds') - - response = rds.describe_db_clusters() - - return next(item for item in response if item["DBClusterIdentifier"] == self.connection_data['host'].split('.')[0])['Engine'] - except Exception as e: - logger.error(f'Error connecting to Aurora, {e}!') - logger.error('If the database engine is not provided as a parameter, please ensure that the credentials for the AWS account are passed in instead!') - - def __del__(self): - self.db.__del__() - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - return self.db.connect() - - def disconnect(self): - """ - Close any existing connections. - """ - - return self.db.disconnect() - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - return self.db.check_connection() - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - return self.db.native_query(query) - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - - return self.db.query(query) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - - return self.db.get_tables() - - def get_columns(self, table_name: str) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - - return self.db.get_columns(table_name) diff --git a/mindsdb/integrations/handlers/aurora_handler/connection_args.py b/mindsdb/integrations/handlers/aurora_handler/connection_args.py deleted file mode 100644 index 2c813e07d2f..00000000000 --- a/mindsdb/integrations/handlers/aurora_handler/connection_args.py +++ /dev/null @@ -1,50 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Amazon Aurora DB cluster.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the Amazon Aurora DB cluster.', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the Amazon Aurora DB cluster.' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Amazon Aurora DB cluster.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the Amazon Aurora DB cluster. Must be an integer.' - }, - db_engine={ - 'type': ARG_TYPE.STR, - 'description': "The database engine of the Amazon Aurora DB cluster. This can take one of two values: 'mysql' or 'postgresql'. This parameter is optional, but if it is not provided, 'aws_access_key_id' and 'aws_secret_access_key' parameters must be provided" - }, - aws_access_key_id={ - 'type': ARG_TYPE.STR, - 'description': "The access key for the AWS account. This parameter is optional and is only required to be provided if the 'db_engine' parameter is not provided." - }, - aws_secret_access_key={ - 'type': ARG_TYPE.STR, - 'description': "The secret key for the AWS account. This parameter is optional and is only required to be provided if the 'db_engine' parameter is not provided.", - 'secret': True - }, -) - -connection_args_example = OrderedDict( - db_engine='mysql', - host='mysqlcluster.cluster-123456789012.us-east-1.rds.amazonaws.com', - port=3306, - user='root', - password='password', - database='database' -) diff --git a/mindsdb/integrations/handlers/aurora_handler/icon.svg b/mindsdb/integrations/handlers/aurora_handler/icon.svg deleted file mode 100644 index 002951ad77d..00000000000 --- a/mindsdb/integrations/handlers/aurora_handler/icon.svg +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/aurora_handler/requirements.txt b/mindsdb/integrations/handlers/aurora_handler/requirements.txt deleted file mode 100644 index ee467569031..00000000000 --- a/mindsdb/integrations/handlers/aurora_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/mysql_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/aurora_handler/tests/__init__.py b/mindsdb/integrations/handlers/aurora_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/aurora_handler/tests/test_aurora_mysql_handler.py b/mindsdb/integrations/handlers/aurora_handler/tests/test_aurora_mysql_handler.py deleted file mode 100644 index 815a9d4b478..00000000000 --- a/mindsdb/integrations/handlers/aurora_handler/tests/test_aurora_mysql_handler.py +++ /dev/null @@ -1,37 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.aurora_handler.aurora_handler import AuroraHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class AuroraMySQLHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": "", - "port": 3306, - "user": "admin", - "password": "", - "database": "public", - "db_engine": "mysql" - } - cls.handler = AuroraHandler('test_aurora_mysql_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM person" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_get_columns(self): - columns = self.handler.get_columns('person') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/aurora_handler/tests/test_aurora_postgres_handler.py b/mindsdb/integrations/handlers/aurora_handler/tests/test_aurora_postgres_handler.py deleted file mode 100644 index bbb9c2573c7..00000000000 --- a/mindsdb/integrations/handlers/aurora_handler/tests/test_aurora_postgres_handler.py +++ /dev/null @@ -1,37 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.aurora_handler.aurora_handler import AuroraHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class AuroraPostgresHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": "", - "port": 5342, - "user": "", - "password": "", - "database": "postgres", - "db_engine": "postgresql" - } - cls.handler = AuroraHandler('test_aurora_postgres_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM person" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_get_columns(self): - columns = self.handler.get_columns('person') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/azure_blob_handler/README.md b/mindsdb/integrations/handlers/azure_blob_handler/README.md deleted file mode 100644 index 84b0ce17839..00000000000 --- a/mindsdb/integrations/handlers/azure_blob_handler/README.md +++ /dev/null @@ -1,72 +0,0 @@ ---- -title: Azure Blob Storage -sidebarTitle: Azure Blob Storage ---- - -This documentation describes the integration of MindsDB with [Azure Blob Storage] - -## Prerequisites - -Before proceeding, ensure that MindsDB is installed locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). - -## Connection - -Establish a connection to your Azure Blob Storage from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE azureblob_datasource -WITH - engine = 'azureblob', - parameters = { - "container_name":"", - "connection_string":"" - }; -``` - -Required connection parameters include the following: - -* `container_name`: The name of your container. -* `connection_string`: The connection string of your account, we prefer connection string from Shared Access Signature as it has expiry date, time, and permission control as well, you can found it under "Security & Networking" menu >> "Shared Access Signature". - -Inside Shared Access Signature menu: -* Tick all checkboxes for Allowed services, Allowed resource types, Blob versioning permissions, Allowed blob index permissions. -(Adjustable based on your needs) For Allowed permissions, tick all except Permanent delete & Immutable storage. -* Then Finally click "Generate SAS & connection string". -* Use the generated connection string to connect. - - -## Usage - -Retrieve data from a specified object (file) in the Azure Blob Storage by providing the integration name and the object key: - -```sql -SELECT * -FROM azureblob_datasource.`my-file.csv`; -LIMIT 10; -``` - - -Wrap the object key in backticks (\`) to avoid any issues parsing the SQL statements provided. This is especially important when the object key contains spaces, special characters or prefixes, such as `my-folder/my-file.csv`. - -At the moment, the supported file formats are CSV, TSV, JSON, and Parquet. - - - -The above examples utilize `azureblob_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Known Issue - -`Problem with the SSL CA cert` (you most likely get this error if your current OS is ubuntu / other linux distro) - -* **Symptoms**: Error: Invalid Error: Fail to get a new connection for: https://⟨storage account name⟩.blob.core.windows.net/. Problem with the SSL CA cert (path? access rights?)) - -* **Solution**: -Current workaround is executing the following 2 statements as root: -mkdir -p /etc/pki/tls/certs -ln -s /etc/ssl/certs/ca-certificates.crt /etc/pki/tls/certs/ca-bundle.crt - -References: -https://duckdb.org/docs/extensions/azure.html#authentication -https://medium.com/datamindedbe/quacking-queries-in-the-azure-cloud-with-duckdb-14be50f6e141 - diff --git a/mindsdb/integrations/handlers/azure_blob_handler/__about__.py b/mindsdb/integrations/handlers/azure_blob_handler/__about__.py deleted file mode 100644 index 982fb4ae922..00000000000 --- a/mindsdb/integrations/handlers/azure_blob_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Azure Blob Storage handler' -__package_name__ = 'mindsdb_azure_blob_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Azure Blob Storage" -__author__ = 'Fabian Jevon' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2024- mindsdb' diff --git a/mindsdb/integrations/handlers/azure_blob_handler/__init__.py b/mindsdb/integrations/handlers/azure_blob_handler/__init__.py deleted file mode 100644 index 5a00b096b44..00000000000 --- a/mindsdb/integrations/handlers/azure_blob_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .azure_blob_handler import AzureBlobHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Azure Blob Storage' -name = 'azureblob' -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/azure_blob_handler/azure_blob_handler.py b/mindsdb/integrations/handlers/azure_blob_handler/azure_blob_handler.py deleted file mode 100644 index bccbf83c680..00000000000 --- a/mindsdb/integrations/handlers/azure_blob_handler/azure_blob_handler.py +++ /dev/null @@ -1,346 +0,0 @@ -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) -from mindsdb.utilities import log -import duckdb -import pandas as pd - -from azure.storage.blob import BlobServiceClient - -from contextlib import contextmanager -from typing import List, Text, Optional, Dict -from mindsdb.integrations.libs.api_handler import APIResource, APIHandler -from mindsdb.integrations.utilities.sql_utils import FilterCondition -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.ast import Select, Identifier, Insert -from mindsdb_sql_parser import parse_sql - -logger = log.getLogger(__name__) - - -class ListFilesTable(APIResource): - - def list(self, - targets: List[str] = None, - conditions: List[FilterCondition] = None, - *args, **kwargs) -> pd.DataFrame: - - tables = self.handler.get_files() - data = [] - for path in tables: - path = path.replace('`', '') - item = { - 'path': path, - 'name': path[path.rfind('/') + 1:], - 'extension': path[path.rfind('.') + 1:] - } - - data.append(item) - - return pd.DataFrame(data=data, columns=self.get_columns()) - - def get_columns(self) -> List[str]: - return ["path", "name", "extension", "content"] - - -class FileTable(APIResource): - - def list(self, targets: List[str] = None, table_name=None, *args, **kwargs) -> pd.DataFrame: - return self.handler.read_as_table(table_name) - - def add(self, data, table_name=None): - df = pd.DataFrame(data) - return self.handler.add_data_to_table(table_name, df) - - -class AzureBlobHandler(APIHandler): - """ - This handler handles connection and execution of the SQL statements on Azure Blob. - """ - - name = "azureblob" - supported_file_formats = ['csv', 'tsv', 'json', 'parquet'] - - def __init__(self, name: Text, connection_data: Optional[Dict], **kwargs): - super().__init__(name) - """ constructor - Args: - name (str): the handler name - """ - - self.connection = None - self.is_connected = False - self._tables = {} - self._files_table = ListFilesTable(self) - self.container_name = None - - self.connection_data = connection_data - - if 'container_name' in connection_data: - self.container_name = connection_data['container_name'] - - if 'connection_string' in connection_data: - self.connection_string = connection_data['connection_string'] - - def connect(self) -> BlobServiceClient: - """ Set up any connections required by the handler - Should return output of check_connection() method after attempting - connection. Should switch self.is_connected. - Returns: - HandlerStatusResponse - """ - if self.is_connected is True: - return self.connection - - blob_service_client = BlobServiceClient.from_connection_string(conn_str=self.connection_string) - - self.connection = blob_service_client - self.is_connected = True - return blob_service_client - - def check_connection(self) -> StatusResponse: - """ Check connection to the handler - Returns: - HandlerStatusResponse - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - client = self.connect() - client.get_account_information() - response.success = True - - except Exception as e: - logger.error(f'Error connecting to Azure Blob: {e}!') - response.error_message = e - - if response.success and need_to_close: - self.disconnect() - - elif not response.success and self.is_connected: - self.is_connected = False - - return response - - def disconnect(self): - """ - Closes the connection to the Azure Blob account if it's currently open. - """ - if not self.is_connected: - return - self.connection.close() - self.is_connected = False - - @contextmanager - def _connect_duckdb(self): - """ - Creates temporal duckdb database which is able to connect to the Azure Blob account. - Have to be used as context manager - - Returns: - DuckDBPyConnection - """ - # Connect to Azure Blob via DuckDB. - duckdb_conn = duckdb.connect(":memory:") - duckdb_conn.execute('INSTALL azure') - duckdb_conn.execute('LOAD azure') - - # Configure mandatory credentials. - duckdb_conn.execute(f'SET azure_storage_connection_string="{self.connection_string}"') - - try: - yield duckdb_conn - finally: - duckdb_conn.close() - - def read_as_table(self, key) -> pd.DataFrame: - """ - Read object as dataframe. Uses duckdb - """ - - with self._connect_duckdb() as connection: - cursor = connection.execute(f'SELECT * FROM "azure://{self.container_name}/{key}"') - return cursor.fetchdf() - - def _read_as_content(self, key) -> None: - """ - Read object as content - """ - - connection = self.connect() - client = connection.get_blob_client(container=self.container_name, blob=key) - - return client.download_blob() - - def add_data_to_table(self, key, df) -> None: - pass - """ - Writes the table to a file in the azure container. - - Raises: - CatalogException: If the table does not exist in the DuckDB connection. - """ - - # Check if the file exists in the Container. - - try: - client = self.connect() - blob_client = client.get_blob_client(container=self.container_name, blob=key) - blob_client.close() - - except Exception as e: - logger.error(f'Error querying the file {key} in the container {self.container_name}, {e}!') - raise e - - with self._connect_duckdb() as connection: - # copy - connection.execute(f'CREATE TABLE tmp_table AS SELECT * FROM "azure://{self.container_name}/{key}"') - - # insert - connection.execute("INSERT INTO tmp_table BY NAME SELECT * FROM df") - - # upload - connection.execute(f"COPY tmp_table TO 'azure://{self.container_name}/{key}'") - - def native_query(self, query: str) -> Response: - """ - Executes a SQL query and returns the result. - - Args: - query (str): The SQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - query_ast = parse_sql(query) - return self.query(query_ast) - - def query(self, query: ASTNode) -> Response: - """ - Executes a SQL query represented by an ASTNode and retrieves the data. - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Raises: - ValueError: If the file format is not supported or the file does not exist in the Azure Blob Container. - - Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. - """ - - self.connect() - - if isinstance(query, Select): - table_name = query.from_table.parts[-1].replace('`', '') - - if table_name == 'files': - table = self._files_table - df = table.select(query) - - # add content - has_content = False - for target in query.targets: - if isinstance(target, Identifier) and target.parts[-1].lower() == 'content': - has_content = True - break - if has_content: - df['content'] = df['path'].apply(self._read_as_content) - else: - extension = table_name.split('.')[-1] - if extension not in self.supported_file_formats: - logger.error(f'The file format {extension} is not supported!') - raise ValueError(f'The file format {extension} is not supported!') - - table = FileTable(self, table_name=table_name) - df = table.select(query) - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=df - ) - elif isinstance(query, Insert): - table_name = query.table.parts[-1] - table = FileTable(self, table_name=table_name) - table.insert(query) - response = Response(RESPONSE_TYPE.OK) - else: - raise NotImplementedError - - return response - - def get_files(self) -> List[str]: - client = self.connect() - container_client = client.get_container_client(self.container_name) - all_files = container_client.list_blobs() - - # Wrap the object names with backticks to prevent SQL syntax errors. - supported_files = [ - f"`{file.get('name')}`" - for file in all_files if file.get('name').split('.')[-1] in self.supported_file_formats - ] - - return supported_files - - def get_tables(self) -> Response: - """ - Retrieves a list of tables (objects) in the Azure Containers. - - Each object is considered a table. Only the supported file formats are considered as tables. - - Returns: - Response: A response object containing the list of tables and views, formatted as per the `Response` class. - """ - - # Get only the supported file formats. - # Wrap the object names with backticks to prevent SQL syntax errors. - - supported_files = self.get_files() - - # virtual table with list of files - supported_files.insert(0, 'files') - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - supported_files, - columns=['table_name'] - ) - ) - - return response - - def get_columns(self, table_name: str) -> Response: - """ - Retrieves column details for a specified table (object) in the Azure Blob Container. - - Args: - table_name (Text): The name of the table for which to retrieve column information. - - Raises: - ValueError: If the 'table_name' is not a valid string. - - Returns: - Response: A response object containing the column details, formatted as per the `Response` class. - """ - if not table_name or not isinstance(table_name, str): - raise ValueError("Invalid table name provided.") - - query = f"SELECT * FROM {table_name} LIMIT 5" - - result = self.query(query) - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - { - 'column_name': result.data_frame.columns, - 'data_type': [data_type if data_type != 'object' else 'string' for data_type in result.data_frame.dtypes] - } - ) - ) - - return response diff --git a/mindsdb/integrations/handlers/azure_blob_handler/connection_args.py b/mindsdb/integrations/handlers/azure_blob_handler/connection_args.py deleted file mode 100644 index 0541c99dcd2..00000000000 --- a/mindsdb/integrations/handlers/azure_blob_handler/connection_args.py +++ /dev/null @@ -1,25 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - container_name={ - 'type': ARG_TYPE.STR, - 'description': 'The name of your storage service account Container Name', - 'required': True, - 'label': 'Container Name' - }, - connection_string={ - 'type': ARG_TYPE.STR, - 'description': 'Connection String', - 'required': True, - 'label': 'Connection String', - 'secret': True - } -) - -connection_args_example = OrderedDict( - container_name='', - connection_string='' -) diff --git a/mindsdb/integrations/handlers/azure_blob_handler/icon.svg b/mindsdb/integrations/handlers/azure_blob_handler/icon.svg deleted file mode 100644 index 3f6c5c8f1fe..00000000000 --- a/mindsdb/integrations/handlers/azure_blob_handler/icon.svg +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - - - - diff --git a/mindsdb/integrations/handlers/azure_blob_handler/requirements.txt b/mindsdb/integrations/handlers/azure_blob_handler/requirements.txt deleted file mode 100644 index 003d8b0ebe0..00000000000 --- a/mindsdb/integrations/handlers/azure_blob_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -azure-storage-blob -azure-core>=1.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/mindsdb/integrations/handlers/bigcommerce_handler/README.md b/mindsdb/integrations/handlers/bigcommerce_handler/README.md deleted file mode 100644 index 8b090bb83a9..00000000000 --- a/mindsdb/integrations/handlers/bigcommerce_handler/README.md +++ /dev/null @@ -1,91 +0,0 @@ ---- -title: BigCommerce -sidebarTitle: BigCommerce ---- - -This documentation describes the integration of MindsDB with [BigCommerce](https://www.bigcommerce.com/). The integration allows MindsDB to access data from BigCommerce and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - - - Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). - -## Connection - -Establish a connection to BigCommerce from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/bigcommerce_handler) as an engine. - -```sql -CREATE DATABASE bigcommerce_datasource -WITH - ENGINE = 'bigcommerce', - PARAMETERS = { - "api_base": "https://api.bigcommerce.com/stores/0fh0fh0fh0/v3/", - "access_token": "k9iexk9iexk9iexk9iexk9iexk9iexk" - }; -``` - -Required connection parameters include the following: - -* `api_base`: The base URL of your BigCommerce store API (e.g., `https://api.bigcommerce.com/stores/YOUR_STORE_HASH/v3/`). -* `access_token`: The API token for authenticating with your BigCommerce account. - - -To obtain the API credentials for your BigCommerce store, follow the steps given below: -1. Log in to your BigCommerce store and go to the dashboard. -2. Navigate to `Settings` βš™οΈ > `API` > `Store-level API accounts`. -3. Click `Create API Account` and fill in the following details: - - **Token type**: Select `V2/V3 API token` - - **Name**: Choose any descriptive name for the account - - **OAuth Scopes**: Set permissions to at least `Read-only` for the following resources: - - Orders - - Products - - Customers - - Marketing - - Order Fulfillment -4. Before clicking `Save`, copy and save the `API Path` (this is your `api_base` URL). -5. Click `Save` to create the API account. -6. Copy and securely save the `Access Token` that is displayed (you won't be able to see it again). - - -## Usage - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM bigcommerce_datasource.orders -LIMIT 10; -``` - -The BigCommerce integration supports various tables including: - -```sql --- Available tables -SELECT * FROM bigcommerce_datasource.orders LIMIT 10; -SELECT * FROM bigcommerce_datasource.products LIMIT 10; -SELECT * FROM bigcommerce_datasource.customers LIMIT 10; -SELECT * FROM bigcommerce_datasource.categories LIMIT 10; -SELECT * FROM bigcommerce_datasource.pickups LIMIT 10; -SELECT * FROM bigcommerce_datasource.promotions LIMIT 10; -SELECT * FROM bigcommerce_datasource.wishlists LIMIT 10; -SELECT * FROM bigcommerce_datasource.segments LIMIT 10; -SELECT * FROM bigcommerce_datasource.brands LIMIT 10; -``` - -Query with filters and sorting: - -```sql --- Filter customers by name -SELECT * FROM bigcommerce_datasource.customers -WHERE name LIKE 'George' -ORDER BY last_name DESC; - --- Filter products by price and weight -SELECT * FROM bigcommerce_datasource.products -WHERE price = 109 AND weight = 1; - --- Search categories by name -SELECT * FROM bigcommerce_datasource.categories -WHERE name LIKE 'garden'; -``` diff --git a/mindsdb/integrations/handlers/bigcommerce_handler/__about__.py b/mindsdb/integrations/handlers/bigcommerce_handler/__about__.py deleted file mode 100644 index 2229d5ca15b..00000000000 --- a/mindsdb/integrations/handlers/bigcommerce_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB BigCommerce handler" -__package_name__ = "mindsdb_bigcommerce_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for BigCommerce" -__author__ = "MindsDB Inc" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2025 - mindsdb" diff --git a/mindsdb/integrations/handlers/bigcommerce_handler/__init__.py b/mindsdb/integrations/handlers/bigcommerce_handler/__init__.py deleted file mode 100644 index 7e671a123a0..00000000000 --- a/mindsdb/integrations/handlers/bigcommerce_handler/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example - -try: - from .bigcommerce_handler import BigCommerceHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "BigCommerce" -name = "bigcommerce" -type = HANDLER_TYPE.DATA -icon_path = "bigcommerce-black.svg" -support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY - -__all__ = [ - "Handler", - "version", - "name", - "type", - "support_level", - "title", - "description", - "import_error", - "icon_path", - "connection_args_example", - "connection_args", -] diff --git a/mindsdb/integrations/handlers/bigcommerce_handler/bigcommerce-black.svg b/mindsdb/integrations/handlers/bigcommerce_handler/bigcommerce-black.svg deleted file mode 100644 index cd515ac5b85..00000000000 --- a/mindsdb/integrations/handlers/bigcommerce_handler/bigcommerce-black.svg +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - BC_Wordmark_Black - - diff --git a/mindsdb/integrations/handlers/bigcommerce_handler/bigcommerce_api_client.py b/mindsdb/integrations/handlers/bigcommerce_handler/bigcommerce_api_client.py deleted file mode 100644 index 585957bac16..00000000000 --- a/mindsdb/integrations/handlers/bigcommerce_handler/bigcommerce_api_client.py +++ /dev/null @@ -1,239 +0,0 @@ -from http import HTTPStatus -from urllib.parse import urljoin - -import requests - - -DEFAULT_LIMIT = 999999999 - - -class BigCommerceAPIClient: - def __init__(self, url: str, access_token: str): - # we have to use both endpoints: v2/ and v3/, so delete it from base url - self.base_url = url.rstrip("/") - self.base_url = self.base_url.rstrip("v2") - self.base_url = self.base_url.rstrip("v3") - if not self.base_url.endswith("/"): - self.base_url += "/" - - self.access_token = access_token - self.session = requests.Session() - self.session.headers.update( - { - "X-Auth-Token": self.access_token, - "Content-Type": "application/json", - "Accept": "application/json", - } - ) - - def get_products( - self, - filter: dict = None, - sort_condition: dict = None, - limit: int = None, - ): - # doc: https://developer.bigcommerce.com/docs/rest-catalog/products#get-all-products - params = { - "limit": limit or DEFAULT_LIMIT, - } - if filter is not None: - params.update(filter) - if sort_condition is not None: - params.update(sort_condition) - return self._make_request_v3("GET", "catalog/products", params=params) - - def get_customers( - self, - filter: dict = None, - sort_condition: dict = None, - limit: int = None, - ): - # doc: https://developer.bigcommerce.com/docs/rest-management/customers#get-all-customers - params = { - "limit": limit or DEFAULT_LIMIT, - } - if filter: - params.update(filter) - if sort_condition: - params["sort"] = sort_condition - return self._make_request_v3("GET", "customers", params=params) - - def get_orders( - self, - filter: dict = None, - sort_condition: str = None, - limit: int = None, - ): - # doc: https://developer.bigcommerce.com/docs/rest-management/orders#get-all-orders - params = {"limit": limit or DEFAULT_LIMIT} - if filter: - params.update(filter) - if sort_condition: - params["sort"] = sort_condition - - return self._make_request_v2("GET", "orders", params=params) - - def get_orders_count(self) -> int: - response = self._make_request_v2("GET", "orders/count") - return response["count"] - - def get_customers_count(self) -> int: - response = self._make_request("GET", urljoin(self.base_url, "v3/customers"), params={"limit": 1}) - return response["meta"]["pagination"]["total"] - - def get_products_count(self) -> int: - response = self._make_request("GET", urljoin(self.base_url, "v3/products"), params={"limit": 1}) - return response["meta"]["pagination"]["total"] - - def get_categories( - self, - filter: dict = None, - limit: int = None, - ): - # doc: https://developer.bigcommerce.com/docs/rest-catalog/category-trees/categories#get-all-categories - params = { - "limit": limit or DEFAULT_LIMIT, - } - if filter is not None: - params.update(filter) - return self._make_request_v3("GET", "catalog/trees/categories", params=params) - - def get_categories_count(self) -> int: - response = self._make_request("GET", urljoin(self.base_url, "v3/catalog/trees/categories"), params={"limit": 1}) - return response.get("meta", {}).get("pagination", {}).get("total", 0) - - def get_pickups( - self, - filter: dict = None, - limit: int = None, - ): - # doc: https://developer.bigcommerce.com/docs/rest-management/pickup#get-pickups - params = { - "limit": limit or DEFAULT_LIMIT, - } - if filter is not None: - params.update(filter) - return self._make_request_v3("GET", "orders/pickups", params=params) - - def get_pickups_count(self) -> int: - response = self._make_request("GET", urljoin(self.base_url, "v3/pickups"), params={"limit": 1}) - return response.get("meta", {}).get("pagination", {}).get("total", 0) - - def get_promotions( - self, - filter: dict = None, - sort_condition: dict = None, - limit: int = None, - ): - # doc: https://developer.bigcommerce.com/docs/rest-management/promotions/promotions-bulk#get-all-promotions - params = { - "limit": limit or DEFAULT_LIMIT, - } - if filter is not None: - params.update(filter) - if sort_condition is not None: - params.update(sort_condition) - return self._make_request_v3("GET", "promotions", params=params) - - def get_promotions_count(self) -> int: - response = self._make_request("GET", urljoin(self.base_url, "v3/promotions"), params={"limit": 1}) - return response.get("meta", {}).get("pagination", {}).get("total", 0) - - def get_wishlists( - self, - filter: dict = None, - limit: int = None, - ): - # doc: https://developer.bigcommerce.com/docs/rest-management/wishlists#get-all-wishlists - params = { - "limit": limit or DEFAULT_LIMIT, - } - if filter is not None: - params.update(filter) - return self._make_request_v3("GET", "wishlists", params=params) - - def get_wishlists_count(self) -> int: - response = self._make_request("GET", urljoin(self.base_url, "v3/wishlists"), params={"limit": 1}) - return response.get("meta", {}).get("pagination", {}).get("total", 0) - - def get_segments( - self, - filter: dict = None, - limit: int = None, - ): - # doc: https://developer.bigcommerce.com/docs/rest-management/customer-segmentation/segments#get-all-segments - params = { - "limit": limit or DEFAULT_LIMIT, - } - if filter is not None: - params.update(filter) - return self._make_request_v3("GET", "segments", params=params) - - def get_segments_count(self) -> int: - response = self._make_request("GET", urljoin(self.base_url, "v3/segments"), params={"limit": 1}) - return response.get("meta", {}).get("pagination", {}).get("total", 0) - - def get_brands( - self, - filter: dict = None, - sort_condition: dict = None, - limit: int = None, - ): - # doc: https://developer.bigcommerce.com/docs/rest-catalog/brands#get-all-brands - params = { - "limit": limit or DEFAULT_LIMIT, - } - if filter is not None: - params.update(filter) - if sort_condition is not None: - params.update(sort_condition) - return self._make_request_v3("GET", "catalog/brands", params=params) - - def get_brands_count(self) -> int: - response = self._make_request("GET", urljoin(self.base_url, "v3/catalog/brands"), params={"limit": 1}) - return response.get("meta", {}).get("pagination", {}).get("total", 0) - - def _make_request_v2(self, method: str, url: str, *args, **kwargs) -> list[dict]: - # NOTE: v2 limit max is 250 - url = urljoin(urljoin(self.base_url, "v2/"), url) - api_limit = 250 - params = kwargs.pop("params", {}) - request_limit = params.get("limit", DEFAULT_LIMIT) - params["limit"] = min(api_limit, request_limit) - current_page = 1 - response_len = 1 - data = [] - while response_len > 0 and len(data) < request_limit: - params["page"] = current_page - response = self._make_request(method, url, params=params, *args, **kwargs) - if isinstance(response, dict): - # for "get_count" requests - return response - current_page += 1 - response_len = len(response) - data += response - return data[:request_limit] - - def _make_request_v3(self, method: str, url: str, *args, **kwargs) -> list[dict]: - url = urljoin(urljoin(self.base_url, "v3/"), url) - data = [] - params = kwargs.pop("params", {}) - current_page = 1 - total_pages = 1 - while current_page <= total_pages: - params["page"] = current_page - response = self._make_request(method, url, params=params, *args, **kwargs) - current_page = response["meta"]["pagination"]["current_page"] + 1 - total_pages = response["meta"]["pagination"]["total_pages"] - data += response["data"] - return data - - def _make_request(self, method: str, url: str, params: dict = None, data: dict = None) -> dict: - response = self.session.request(method, url, params=params, json=data) - - if response.status_code == HTTPStatus.NO_CONTENT: - return [] - if response.status_code != HTTPStatus.OK: - raise Exception(f"Request failed with status code {response.status_code}: {response.text}") - - return response.json() diff --git a/mindsdb/integrations/handlers/bigcommerce_handler/bigcommerce_handler.py b/mindsdb/integrations/handlers/bigcommerce_handler/bigcommerce_handler.py deleted file mode 100644 index 30ce1a10e28..00000000000 --- a/mindsdb/integrations/handlers/bigcommerce_handler/bigcommerce_handler.py +++ /dev/null @@ -1,102 +0,0 @@ -from typing import Any - -from mindsdb.integrations.handlers.bigcommerce_handler.bigcommerce_api_client import BigCommerceAPIClient -from mindsdb.integrations.handlers.bigcommerce_handler.bigcommerce_tables import ( - BigCommerceOrdersTable, - BigCommerceProductsTable, - BigCommerceCustomersTable, - BigCommerceCategoriesTable, - BigCommercePickupsTable, - BigCommercePromotionsTable, - BigCommerceWishlistsTable, - BigCommerceSegmentsTable, - BigCommerceBrandsTable, -) -from mindsdb.integrations.libs.api_handler import MetaAPIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class BigCommerceHandler(MetaAPIHandler): - """This handler handles the connection and execution of SQL statements on BigCommerce.""" - - name = "bigcommerce" - - def __init__(self, name: str, connection_data: dict, **kwargs: Any) -> None: - """ - Initializes the handler. - - Args: - name (str): The name of the handler instance. - connection_data (dict): The connection data required to connect to the BigCommerce API. - kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - self.thread_safe = True - - self._register_table("orders", BigCommerceOrdersTable(self)) - self._register_table("products", BigCommerceProductsTable(self)) - self._register_table("customers", BigCommerceCustomersTable(self)) - self._register_table("categories", BigCommerceCategoriesTable(self)) - self._register_table("pickups", BigCommercePickupsTable(self)) - self._register_table("promotions", BigCommercePromotionsTable(self)) - self._register_table("wishlists", BigCommerceWishlistsTable(self)) - self._register_table("segments", BigCommerceSegmentsTable(self)) - self._register_table("brands", BigCommerceBrandsTable(self)) - - def connect(self) -> BigCommerceAPIClient: - """ - Establishes a connection to the BigCommerce API. - - Raises: - ValueError: If the required connection parameters are not provided. - - Returns: - BigCommerceAPIClient: A connection object to the BigCommerce API. - """ - if self.is_connected is True: - return self.connection - - if not all( - key in self.connection_data and self.connection_data.get(key) for key in ["api_base", "access_token"] - ): - raise ValueError("Required parameters (api_base, access_token) must be provided and should not be empty.") - - self.connection = BigCommerceAPIClient( - url=self.connection_data.get("api_base"), - access_token=self.connection_data.get("access_token"), - ) - - self.is_connected = True - return self.connection - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the BigCommerce API. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - - try: - connection = self.connect() - connection.get_products(limit=1) - response.success = True - except Exception as e: - logger.error(f"Error connecting to BigCommerce API: {e}!") - response.error_message = e - - self.is_connected = response.success - - return response diff --git a/mindsdb/integrations/handlers/bigcommerce_handler/bigcommerce_tables.py b/mindsdb/integrations/handlers/bigcommerce_handler/bigcommerce_tables.py deleted file mode 100644 index 8b148be6b50..00000000000 --- a/mindsdb/integrations/handlers/bigcommerce_handler/bigcommerce_tables.py +++ /dev/null @@ -1,1414 +0,0 @@ -from typing import List -from decimal import Decimal - -import pandas as pd - -from mindsdb.integrations.handlers.bigcommerce_handler.bigcommerce_api_client import BigCommerceAPIClient -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, SortColumn -from mindsdb.integrations.libs.api_handler import MetaAPIResource -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -def _make_filter(conditions: list[FilterCondition] | None, op_map: dict) -> dict: - """Creates a filter dictionary, that can be used in the BigCommerce API. - - Args: - conditions (list[FilterCondition]): The list of parsed filter conditions. - op_map (dict): The mapping of filter operators to API parameters. - - Returns: - dict: The filter dictionary. - """ - filter = {} - if conditions is None: - return filter - for condition in conditions: - simple_op = op_map.get((condition.column, condition.op)) - if simple_op: - value = condition.value - if isinstance(value, list): - value = ",".join(map(str, value)) - filter[simple_op] = value - condition.applied = True - return filter - - -def _make_df(result: list[dict], table: MetaAPIResource): - """Converts a list of dictionaries to a pandas DataFrame. - If the list is empty, an empty DataFrame is returned with the columns from the table. - - Args: - result (list[dict]): The list of dictionaries to convert. - table (MetaAPIResource): The table class. - - Returns: - pd.DataFrame: The resulting DataFrame. - """ - if len(result) == 0: - result = pd.DataFrame([], columns=table.get_columns()) - else: - result = pd.DataFrame(result) - return result - - -def _make_sort_condition_v3(sort: list[SortColumn], sortable_columns: list[str]): - """Creates a sort condition for the BigCommerce API v3. - - Args: - sort (list[SortColumn]): The list of parsed sort columns. - sortable_columns (list[str]): The list of sortable columns. - - Returns: - dict: The sort condition, that can be used in the BigCommerce API v3. - """ - sort_condition = None - if isinstance(sort, list) and len(sort) > 1 and sort[0].column in sortable_columns: - sort_column = sort[0] - sort_condition = { - "sort": sort_column.column, - "direction": "asc" if sort_column.ascending else "desc", - } - return sort_condition - - -def _make_sort_condition_v2(sort: list[SortColumn], sortable_columns: list[str]): - """Creates a sort condition for the BigCommerce API v2. - - Args: - sort (list[SortColumn]): The list of parsed sort columns. - sortable_columns (list[str]): The list of sortable columns. - - Returns: - dict: The sort condition, that can be used in the BigCommerce API v2. - """ - sort_condition = None - if isinstance(sort, list) and len(sort) == 1 and sort[0].column in sortable_columns: - sort_column = sort[0] - sort_column.applied = True - sort_condition = f"{sort_column.column}:{'asc' if sort_column.ascending else 'desc'}" - return sort_condition - - -class BigCommerceOrdersTable(MetaAPIResource): - """ - The table abstraction for the 'orders' resource of the BigCommerce API. - """ - - name = "orders" - - def list( - self, - conditions: list[FilterCondition] = None, - limit: int = None, - sort: list[SortColumn] = None, - **kwargs, - ): - """Executes a parsed SELECT SQL query on the 'orders' resource of the BigCommerce API. - - Args: - conditions (list[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (list[SortColumn]): The list of parsed sort columns. - - Returns: - pd.DataFrame: The resulting DataFrame. - """ - client: BigCommerceAPIClient = self.handler.connect() - - simple_op_map = { - ("id", FilterOperator.GREATER_THAN): "min_id", - ("id", FilterOperator.LESS_THAN): "max_id", - ("total_inc_tax", FilterOperator.GREATER_THAN): "min_total", - ("total_inc_tax", FilterOperator.LESS_THAN): "max_total", - ("customer_id", FilterOperator.EQUAL): "customer_id", - ("email", FilterOperator.EQUAL): "email", - ("status_id", FilterOperator.EQUAL): "status_id", - ("cart_id", FilterOperator.EQUAL): "cart_id", - ("payment_method", FilterOperator.EQUAL): "payment_method", - ("date_created", FilterOperator.GREATER_THAN): "min_date_created", - ("date_created", FilterOperator.LESS_THAN): "max_date_created", - ("date_modified", FilterOperator.GREATER_THAN): "min_date_modified", - ("date_modified", FilterOperator.LESS_THAN): "max_date_modified", - ("channel_id", FilterOperator.EQUAL): "channel_id", - ("external_order_id", FilterOperator.EQUAL): "external_order_id", - } - - filter = _make_filter(conditions, simple_op_map) - - for condition in conditions: - if condition.applied: - continue - # region special case for filter "id = x" - if condition.op == FilterOperator.EQUAL and condition.column == "id": - filter["min_id"] = condition.value - filter["max_id"] = condition.value - # endregion - - sortable_columns = [ - "id", - "customer_id", - "date_created", - "date_modified", - "status_id", - "channel_id", - "external_id", - ] - sort_condition = _make_sort_condition_v3(sort, sortable_columns) - - result = client.get_orders(filter=filter, sort_condition=sort_condition, limit=limit) - result = _make_df(result, self) - - decimal_columns = [meta["COLUMN_NAME"] for meta in self.meta_get_columns() if meta["DATA_TYPE"] == "DECIMAL"] - for column_name in decimal_columns: - if column_name in result: - result[column_name] = result[column_name].apply(Decimal) - - return result - - def get_columns(self) -> List[str]: - """ - Retrieves the attributes (columns) of the 'orders' resource. - - Returns: - list[str]: A list of attributes (columns) of the 'orders' resource. - """ - columns = self.meta_get_columns() - return [column["COLUMN_NAME"] for column in columns] - - def meta_get_tables(self, table_name: str) -> dict: - client: BigCommerceAPIClient = self.handler.connect() - orders_count = client.get_orders_count() - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "", - "row_count": orders_count, - } - - def meta_get_columns(self, *args, **kwargs) -> List[str]: - return [ - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "id", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The ID of the order.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "date_modified", - "DATA_TYPE": "DATETIME", - "COLUMN_DESCRIPTION": "Value representing the last modification of the order. RFC-2822. This date time is always in UTC in the api response.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "date_shipped", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Value representing the date when the order is fully shipped. RFC-2822", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "cart_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "The cart ID from which this order originated, if applicable.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "status", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "The status will include one of the values defined under Order Statuses.", - }, - {"TABLE_NAME": "orders", "COLUMN_NAME": "subtotal_tax", "DATA_TYPE": "DECIMAL", "COLUMN_DESCRIPTION": ""}, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "shipping_cost_tax", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "shipping_cost_tax_class_id", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "handling_cost_tax", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "handling_cost_tax_class_id", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "wrapping_cost_tax", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "wrapping_cost_tax_class_id", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "payment_status", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Payment status of the order. Allowed: authorized | captured | capture pending | declined | held for review | paid | partially refunded | pending | refunded | void | void pending", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "store_credit_amount", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "Represents the store credit that the shopper has redeemed on this individual order.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "gift_certificate_amount", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "currency_id", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The display currency ID. Depending on the currency selected, the value can be different from the transactional currency.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "currency_code", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "The currency code of the display currency used to present prices to the shopper on the storefront. Depending on the currency selected, the value can be different from the transactional currency.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "currency_exchange_rate", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "The exchange rate between the store's default currency and the display currency.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "default_currency_id", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The transactional currency ID.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "default_currency_code", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "The currency code of the transactional currency the shopper pays in.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "store_default_currency_code", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "The currency code of the store's default currency.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "store_default_to_transactional_exchange_rate", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "The exchange rate between the store's default currency and the transactional currency used in the order.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "coupon_discount", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "shipping_address_count", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The number of shipping addresses associated with this transaction.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "is_deleted", - "DATA_TYPE": "BOOL", - "COLUMN_DESCRIPTION": "Indicates whether the order is deleted/archived.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "total_tax", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "Total tax amount for the order.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "is_tax_inclusive_pricing", - "DATA_TYPE": "BOOL", - "COLUMN_DESCRIPTION": "Indicate whether the order's base prices include tax. If true, the base prices are inclusive of tax. If false, the base prices are exclusive of tax.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "is_email_opt_in", - "DATA_TYPE": "BOOL", - "COLUMN_DESCRIPTION": "Indicates whether the shopper has selected an opt-in check box (on the checkout page) to receive emails.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "order_source", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Reflects the origin of the order. It can affect the order's icon and source as defined in the control panel listing.", - }, - {"TABLE_NAME": "orders", "COLUMN_NAME": "consignments", "DATA_TYPE": "JSON", "COLUMN_DESCRIPTION": ""}, - {"TABLE_NAME": "orders", "COLUMN_NAME": "products", "DATA_TYPE": "JSON", "COLUMN_DESCRIPTION": ""}, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "shipping_addresses", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "", - }, - {"TABLE_NAME": "orders", "COLUMN_NAME": "coupons", "DATA_TYPE": "JSON", "COLUMN_DESCRIPTION": ""}, - {"TABLE_NAME": "orders", "COLUMN_NAME": "billing_address", "DATA_TYPE": "JSON", "COLUMN_DESCRIPTION": ""}, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "base_handling_cost", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "The value of the base handling cost. The value can't be negative.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "base_shipping_cost", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "The value of the base shipping cost. The value can't be negative.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "base_wrapping_cost", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "The value of the base wrapping cost expressed as a floating point number to four decimal places in string format. The value can't be negative.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "channel_id", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "Shows where the order originated. The channel_id defaults to 1. The value must match the ID of a valid and enabled channel.", - }, - {"TABLE_NAME": "orders", "COLUMN_NAME": "customer_id", "DATA_TYPE": "INT", "COLUMN_DESCRIPTION": ""}, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "customer_message", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Message that the customer entered to the Order Comments box during checkout.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "date_created", - "DATA_TYPE": "DATETIME", - "COLUMN_DESCRIPTION": "The date the order was created, formatted in the RFC-2822 standard. You set this attribute on Order creation (POST) to support the migration of historical orders. If you do not provide a value, then it will default to the current date/time. This date time is always in UTC in the api response.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "discount_amount", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "Amount of discount for this transaction. The value can't be negative.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "ebay_order_id", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "If the order was placed through eBay, the eBay order number will be included. Otherwise, the value will be 0.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "external_id", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The order ID in another system, such as the Amazon order ID if this is an Amazon order.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "external_merchant_id", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The merchant ID represents an upstream order from an external system. It is the source of truth for orders. After setting it, you cannot write to or update the external_merchant_id.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "external_source", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "This value identifies an external system that generated the order and submitted it to BigCommerce with the Orders API.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "geoip_country", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "The full name of the country where the customer made the purchase, based on the IP.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "geoip_country_iso2", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "The country where the customer made the purchase, in ISO2 format, based on the IP.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "handling_cost_ex_tax", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "The value of the handling cost, excluding tax. The value can't be negative.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "handling_cost_inc_tax", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "The value of the handling cost, including tax. The value can't be negative.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "ip_address", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "IPv4 Address of the customer, if known.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "ip_address_v6", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "IPv6 Address of the customer, if known.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "items_shipped", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The number of items that have been shipped.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "items_total", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The total number of items in the order.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "order_is_digital", - "DATA_TYPE": "BOOL", - "COLUMN_DESCRIPTION": "Whether this is an order for digital products.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "payment_method", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "The payment method for this order.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "payment_provider_id", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The external Transaction ID/Payment ID within this order's payment provider (if a payment provider was used).", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "refunded_amount", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "The amount refunded from this transaction; always returns 0. The value can't be negative.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "shipping_cost_ex_tax", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "The value of shipping cost, excluding tax. The value can't be negative.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "shipping_cost_inc_tax", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "The value of shipping cost, including tax. The value can't be negative.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "staff_notes", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Any additional notes for staff.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "subtotal_ex_tax", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "Override value for subtotal excluding tax. The value can't be negative. If specified, the field subtotal_inc_tax is also required.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "subtotal_inc_tax", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "Override value for subtotal including tax. The value can't be negative. If specified, the field subtotal_ex_tax is also required.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "tax_provider_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "BasicTaxProvider - Tax is set to manual and order is created in the store. AvaTaxProvider - Tax is set to automatic and order is created in the store. Empty string - The order is created with the API, or the tax provider is unknown.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "customer_locale", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "The customer's locale.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "external_order_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "The order ID in another system, such as the Amazon Order ID if this is an Amazon order. After setting it, you can update this field using a POST or PUT request.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "total_ex_tax", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "Override value for the total, excluding tax. If specified, the field total_inc_tax is also required. The value can't be negative.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "total_inc_tax", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "Override value for the total, including tax. If specified, the field total_ex_tax is also required. The value can't be negative.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "wrapping_cost_ex_tax", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "The value of the wrapping cost, excluding tax. The value can't be negative.", - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "wrapping_cost_inc_tax", - "DATA_TYPE": "DECIMAL", - "COLUMN_DESCRIPTION": "The value of the wrapping cost, including tax. The value can't be negative.", - }, - # These fields are not mentioned in the API documentation, but they are present in the actual response. - {"TABLE_NAME": "orders", "COLUMN_NAME": "status_id", "DATA_TYPE": "INT", "COLUMN_DESCRIPTION": ""}, - {"TABLE_NAME": "orders", "COLUMN_NAME": "fees", "DATA_TYPE": "JSON", "COLUMN_DESCRIPTION": ""}, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "credit_card_type", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "", - }, - {"TABLE_NAME": "orders", "COLUMN_NAME": "custom_status", "DATA_TYPE": "VARCHAR", "COLUMN_DESCRIPTION": ""}, - ] - - -class BigCommerceProductsTable(MetaAPIResource): - """ - The table abstraction for the 'products' resource of the BigCommerce API. - """ - - name = "products" - - def list( - self, - conditions: list[FilterCondition] = None, - limit: int = None, - sort: list[SortColumn] = None, - targets: list[str] = None, - **kwargs, - ): - """ - Executes a parsed SELECT SQL query on the 'products' resource of the BigCommerce API. - - Args: - conditions (list[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (list[SortColumn]): The list of parsed sort columns. - targets (list[str]): The list of target columns to return. - - Returns: - pd.DataFrame: The resulting DataFrame. - """ - client: BigCommerceAPIClient = self.handler.connect() - - simple_op_map = { - ("id", FilterOperator.EQUAL): "id", - ("id", FilterOperator.IN): "id:in", - ("id", FilterOperator.NOT_IN): "id:not_in", - ("id", FilterOperator.GREATER_THAN): "id:greater", - ("id", FilterOperator.LESS_THAN): "id:less", - ("id", FilterOperator.GREATER_THAN_OR_EQUAL): "id:min", - ("id", FilterOperator.LESS_THAN_OR_EQUAL): "id:max", - ("channel_id", FilterOperator.IN): "channel_id:in", - ("categories", FilterOperator.EQUAL): "categories", - ("categories", FilterOperator.IN): "categories:in", - ("name", FilterOperator.EQUAL): "name", - ("mpn", FilterOperator.EQUAL): "mpn", - ("upc", FilterOperator.EQUAL): "upc", - ("price", FilterOperator.EQUAL): "price", - ("weight", FilterOperator.EQUAL): "weight", - ("condition", FilterOperator.EQUAL): "condition", - ("brand_id", FilterOperator.EQUAL): "brand_id", - ("date_modified", FilterOperator.EQUAL): "date_modified", - ("date_modified", FilterOperator.LESS_THAN_OR_EQUAL): "date_modified:max", - ("date_modified", FilterOperator.GREATER_THAN_OR_EQUAL): "date_modified:min", - ("date_last_imported", FilterOperator.EQUAL): "date_last_imported", - ("date_last_imported", FilterOperator.NOT_EQUAL): "date_last_imported:not", - ("date_last_imported", FilterOperator.LESS_THAN_OR_EQUAL): "date_last_imported:max", - ("date_last_imported", FilterOperator.GREATER_THAN_OR_EQUAL): "date_last_imported:min", - ("is_visible", FilterOperator.EQUAL): "is_visible", - ("is_featured", FilterOperator.EQUAL): "is_featured", - ("is_free_shipping", FilterOperator.EQUAL): "is_free_shipping", - ("inventory_level", FilterOperator.EQUAL): "inventory_level", - ("inventory_level", FilterOperator.IN): "inventory_level:in", - ("inventory_level", FilterOperator.NOT_IN): "inventory_level:not_in", - ("inventory_level", FilterOperator.GREATER_THAN_OR_EQUAL): "inventory_level:min", - ("inventory_level", FilterOperator.LESS_THAN_OR_EQUAL): "inventory_level:max", - ("inventory_level", FilterOperator.GREATER_THAN): "inventory_level:greater", - ("inventory_level", FilterOperator.LESS_THAN): "inventory_level:less", - ("inventory_low", FilterOperator.EQUAL): "inventory_low", - ("out_of_stock", FilterOperator.EQUAL): "out_of_stock", - ("total_sold", FilterOperator.EQUAL): "total_sold", - ("type", FilterOperator.EQUAL): "type", - ("keyword", FilterOperator.EQUAL): "keyword", - ("keyword_context", FilterOperator.EQUAL): "keyword_context", - ("availability", FilterOperator.EQUAL): "availability", - ("sku", FilterOperator.EQUAL): "sku", - ("sku", FilterOperator.IN): "sku:in", - } - - filter = {} - for condition in conditions: - simple_op = simple_op_map.get((condition.column, condition.op)) - if simple_op: - value = condition.value - if isinstance(value, list): - value = ",".join(map(str, value)) - filter[simple_op] = value - condition.applied = True - - filter = _make_filter(conditions, simple_op_map) - - if targets: - available_columns = self.get_columns() - for column_name in targets: - if column_name not in available_columns: - raise ValueError(f"Field '{column_name}' does not exists") - filter["include_fields"] = ",".join(targets) - - sortable_columns = [ - "id", - "name", - "sku", - "price", - "date_modified", - "date_last_imported", - "inventory_level", - "is_visible", - "total_sold", - "calculated_price", - ] - sort_condition = _make_sort_condition_v3(sort, sortable_columns) - - result = client.get_products( - filter=filter, - sort_condition=sort_condition, - limit=limit, - ) - result = _make_df(result, self) - - return result - - def get_columns(self) -> List[str]: - """Retrieves the columns names of the 'products' resource. - - Returns: - list[str]: A list of columns names of the 'products' resource. - """ - columns = self.meta_get_columns() - return [column["COLUMN_NAME"] for column in columns] - - def meta_get_tables(self, *args, **kwargs) -> dict: - client: BigCommerceAPIClient = self.handler.connect() - products_count = client.get_products_count() - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "", - "row_count": products_count, - } - - def meta_get_columns(self, *args, **kwargs) -> List[str]: - return [ - {"TABLE_NAME": "products", "COLUMN_NAME": "id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "name", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "type", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "sku", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "description", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "weight", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "width", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "depth", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "height", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "price", "DATA_TYPE": "DECIMAL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "cost_price", "DATA_TYPE": "DECIMAL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "retail_price", "DATA_TYPE": "DECIMAL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "sale_price", "DATA_TYPE": "DECIMAL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "map_price", "DATA_TYPE": "DECIMAL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "tax_class_id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "product_tax_code", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "calculated_price", "DATA_TYPE": "DECIMAL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "categories", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "brand_id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "option_set_id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "option_set_display", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "inventory_level", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "inventory_warning_level", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "inventory_tracking", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "reviews_rating_sum", "DATA_TYPE": "DECIMAL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "reviews_count", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "total_sold", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "fixed_cost_shipping_price", "DATA_TYPE": "DECIMAL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "is_free_shipping", "DATA_TYPE": "BOOL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "is_visible", "DATA_TYPE": "BOOL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "is_featured", "DATA_TYPE": "BOOL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "related_products", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "warranty", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "bin_picking_number", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "layout_file", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "upc", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "mpn", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "gtin", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "date_last_imported", "DATA_TYPE": "DATETIME"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "search_keywords", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "availability", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "availability_description", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "gift_wrapping_options_type", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "gift_wrapping_options_list", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "sort_order", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "condition", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "is_condition_shown", "DATA_TYPE": "BOOL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "order_quantity_minimum", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "order_quantity_maximum", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "page_title", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "meta_keywords", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "meta_description", "DATA_TYPE": "VARTEXTCHAR"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "date_created", "DATA_TYPE": "DATETIME"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "date_modified", "DATA_TYPE": "DATETIME"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "view_count", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "preorder_release_date", "DATA_TYPE": "DATETIME"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "preorder_message", "DATA_TYPE": "DECIMAL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "is_preorder_only", "DATA_TYPE": "BOOL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "is_price_hidden", "DATA_TYPE": "BOOL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "price_hidden_label", "DATA_TYPE": "DECIMAL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "custom_url", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "base_variant_id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "open_graph_type", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "open_graph_title", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "open_graph_description", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "open_graph_use_meta_description", "DATA_TYPE": "BOOL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "open_graph_use_product_name", "DATA_TYPE": "BOOL"}, - {"TABLE_NAME": "products", "COLUMN_NAME": "open_graph_use_image", "DATA_TYPE": "BOOL"}, - ] - - -class BigCommerceCustomersTable(MetaAPIResource): - """ - The table abstraction for the 'customers' resource of the BigCommerce API. - """ - - name = "customers" - - def list( - self, - conditions: list[FilterCondition] = None, - limit: int = None, - sort: list[SortColumn] = None, - **kwargs, - ): - """ - Executes a parsed SELECT SQL query on the 'customers' resource of the BigCommerce API. - - Args: - conditions (list[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (list[SortColumn]): The list of parsed sort columns. - - Returns: - pd.DataFrame: The resulting DataFrame. - """ - # doc: https://developer.bigcommerce.com/docs/rest-management/customers - client: BigCommerceAPIClient = self.handler.connect() - - simple_op_map = { - ("id", FilterOperator.EQUAL): "id:in", # custom filter - ("id", FilterOperator.IN): "id:in", - ("company", FilterOperator.EQUAL): "company:in", # custom filter - ("company", FilterOperator.IN): "company:in", - ("customer_group_id", FilterOperator.EQUAL): "customer_group_id:in", # custom filter - ("customer_group_id", FilterOperator.IN): "customer_group_id:in", - ("date_created", FilterOperator.EQUAL): "date_created", - ("date_created", FilterOperator.LESS_THAN): "date_created:max", - ("date_created", FilterOperator.GREATER_THAN): "date_created:min", - ("date_modified", FilterOperator.EQUAL): "date_modified", - ("date_modified", FilterOperator.LESS_THAN): "date_modified:max", - ("date_modified", FilterOperator.GREATER_THAN): "date_modified:min", - ("email", FilterOperator.EQUAL): "email:in", # custom filter - ("email", FilterOperator.IN): "email:in", - ("name", FilterOperator.IN): "name:in", - ("name", FilterOperator.LIKE): "name:like", - ("phone", FilterOperator.EQUAL): "phone:in", # custom filter - ("phone", FilterOperator.IN): "phone:in", - ("registration_ip_address", FilterOperator.EQUAL): "registration_ip_address:in", # custom filter - ("registration_ip_address", FilterOperator.IN): "registration_ip_address:in", - } - - filter = _make_filter(conditions, simple_op_map) - - sortable_columns = ["date_created", "last_name", "date_modified"] - sort_condition = _make_sort_condition_v2(sort, sortable_columns) - - result = client.get_customers( - filter=filter, - sort_condition=sort_condition, - limit=limit, - ) - result = _make_df(result, self) - - # 'name' is added to use server-side filtering - result["name"] = result["first_name"] + " " + result["last_name"] - - return result - - def get_columns(self) -> List[str]: - """Retrieves the columns names of the 'customers' resource. - - Returns: - list[str]: A list of columns names of the 'customers' resource. - """ - columns = self.meta_get_columns() - return [column["COLUMN_NAME"] for column in columns] - - def meta_get_tables(self, table_name: str) -> dict: - client: BigCommerceAPIClient = self.handler.connect() - customers_count = client.get_customers_count() - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "", - "row_count": customers_count, - } - - def meta_get_columns(self, *args, **kwargs) -> List[str]: - return [ - {"TABLE_NAME": "customers", "COLUMN_NAME": "id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "customers", "COLUMN_NAME": "authentication", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "customers", "COLUMN_NAME": "company", "DATA_TYPE": "VARCHAR"}, - {"TABLE_NAME": "customers", "COLUMN_NAME": "customer_group_id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "customers", "COLUMN_NAME": "email", "DATA_TYPE": "VARCHAR"}, - {"TABLE_NAME": "customers", "COLUMN_NAME": "first_name", "DATA_TYPE": "VARCHAR"}, - {"TABLE_NAME": "customers", "COLUMN_NAME": "last_name", "DATA_TYPE": "VARCHAR"}, - # 'name' is added to use server-side filtering: first_name + last_name - {"TABLE_NAME": "customers", "COLUMN_NAME": "name", "DATA_TYPE": "VARCHAR"}, - {"TABLE_NAME": "customers", "COLUMN_NAME": "notes", "DATA_TYPE": "VARCHAR"}, - {"TABLE_NAME": "customers", "COLUMN_NAME": "phone", "DATA_TYPE": "VARCHAR"}, - {"TABLE_NAME": "customers", "COLUMN_NAME": "registration_ip_address", "DATA_TYPE": "VARCHAR"}, - {"TABLE_NAME": "customers", "COLUMN_NAME": "tax_exempt_category", "DATA_TYPE": "VARCHAR"}, - {"TABLE_NAME": "customers", "COLUMN_NAME": "date_created", "DATA_TYPE": "DATETIME"}, - {"TABLE_NAME": "customers", "COLUMN_NAME": "date_modified", "DATA_TYPE": "DATETIME"}, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "accepts_product_review_abandoned_cart_emails", - "DATA_TYPE": "BOOL", - }, - {"TABLE_NAME": "customers", "COLUMN_NAME": "origin_channel_id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "customers", "COLUMN_NAME": "channel_ids", "DATA_TYPE": "JSON"}, - ] - - -class BigCommerceCategoriesTable(MetaAPIResource): - """ - The table abstraction for the 'categories' resource of the BigCommerce API. - """ - - name = "categories" - - def list( - self, - conditions: list[FilterCondition] = None, - limit: int = None, - targets: list[str] = None, - **kwargs, - ): - """Executes a parsed SELECT SQL query on the 'categories' resource of the BigCommerce API. - - Args: - conditions (list[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - targets (list[str]): The list of target columns to return. - - Returns: - pd.DataFrame: The resulting DataFrame. - """ - # doc: https://developer.bigcommerce.com/docs/rest-catalog/category-trees/categories#get-all-categories - client: BigCommerceAPIClient = self.handler.connect() - - simple_op_map = { - ("category_id", FilterOperator.EQUAL): "category_id:in", # custom filter - ("category_id", FilterOperator.IN): "category_id:in", - ("category_id", FilterOperator.NOT_IN): "category_id:not_in", - ("tree_id", FilterOperator.EQUAL): "tree_id:in", # custom filter - ("tree_id", FilterOperator.IN): "tree_id:in", - ("tree_id", FilterOperator.NOT_IN): "tree_id:not_in", - ("parent_id", FilterOperator.EQUAL): "parent_id:in", # custom filter - ("parent_id", FilterOperator.IN): "parent_id:in", - ("parent_id", FilterOperator.NOT_IN): "parent_id:not_in", - ("page_title", FilterOperator.EQUAL): "page_title", - ("page_title", FilterOperator.LIKE): "page_title:like", - ("name", FilterOperator.EQUAL): "name", - ("name", FilterOperator.LIKE): "name:like", - ("keyword", FilterOperator.EQUAL): "keyword", - ("is_visible", FilterOperator.EQUAL): "is_visible", - } - - filter = _make_filter(conditions, simple_op_map) - - if targets: - available_columns = self.get_columns() - for column_name in targets: - if column_name not in available_columns: - raise ValueError(f"Field '{column_name}' does not exists") - filter["include_fields"] = ",".join(targets) - - result = client.get_categories( - filter=filter, - limit=limit, - ) - result = _make_df(result, self) - - return result - - def get_columns(self) -> List[str]: - """Retrieves the columns names of the 'categories' resource. - - Returns: - list[str]: A list of columns names of the 'categories' resource. - """ - columns = self.meta_get_columns() - return [column["COLUMN_NAME"] for column in columns] - - def meta_get_tables(self, *args, **kwargs) -> dict: - client: BigCommerceAPIClient = self.handler.connect() - categories_count = client.get_categories_count() - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "", - "row_count": categories_count, - } - - def meta_get_columns(self, *args, **kwargs) -> List[str]: - return [ - {"TABLE_NAME": "categories", "COLUMN_NAME": "category_id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "parent_id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "tree_id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "name", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "description", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "views", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "sort_order", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "page_title", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "search_keywords", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "meta_keywords", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "meta_description", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "layout_file", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "is_visible", "DATA_TYPE": "BOOL"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "default_product_sort", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "url", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "categories", "COLUMN_NAME": "image_url", "DATA_TYPE": "VARCHAR"}, - ] - - -class BigCommercePickupsTable(MetaAPIResource): - """ - The table abstraction for the 'pickups' resource of the BigCommerce API. - """ - - name = "pickups" - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - **kwargs, - ): - """Executes a parsed SELECT SQL query on the 'pickups' resource of the BigCommerce API. - - Args: - conditions (List[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - - Returns: - pd.DataFrame: The resulting DataFrame. - """ - client: BigCommerceAPIClient = self.handler.connect() - - simple_op_map = { - ("order_id", FilterOperator.EQUAL): "order_id:in", # custom filter - ("order_id", FilterOperator.IN): "order_id:in", - ("pickup_id", FilterOperator.EQUAL): "pickup_id:in", # custom filter - ("pickup_id", FilterOperator.IN): "pickup_id:in", - } - - filter = _make_filter(conditions, simple_op_map) - - result = client.get_pickups( - filter=filter, - limit=limit, - ) - result = _make_df(result, self) - - return result - - def get_columns(self) -> List[str]: - """Retrieves the columns names of the 'pickups' resource. - - Returns: - list[str]: A list of columns names of the 'pickups' resource. - """ - columns = self.meta_get_columns() - return [column["COLUMN_NAME"] for column in columns] - - def meta_get_tables(self, *args, **kwargs) -> dict: - client: BigCommerceAPIClient = self.handler.connect() - pickups_count = client.get_pickups_count() - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "", - "row_count": pickups_count, - } - - def meta_get_columns(self, *args, **kwargs): - return [ - {"TABLE_NAME": "pickups", "COLUMN_NAME": "id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "pickups", "COLUMN_NAME": "pickup_method_id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "pickups", "COLUMN_NAME": "order_id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "pickups", "COLUMN_NAME": "ready_at", "DATA_TYPE": "DATETIME"}, - {"TABLE_NAME": "pickups", "COLUMN_NAME": "created_at", "DATA_TYPE": "DATETIME"}, - {"TABLE_NAME": "pickups", "COLUMN_NAME": "updated_at", "DATA_TYPE": "DATETIME"}, - {"TABLE_NAME": "pickups", "COLUMN_NAME": "pickup_items", "DATA_TYPE": "JSON"}, - ] - - -class BigCommercePromotionsTable(MetaAPIResource): - """ - The table abstraction for the 'promotions' resource of the BigCommerce API. - """ - - name = "promotions" - - def list( - self, - conditions: list[FilterCondition] = None, - limit: int = None, - sort: list[SortColumn] = None, - targets: list[str] = None, - **kwargs, - ): - """Executes a parsed SELECT SQL query on the 'promotions' resource of the BigCommerce API. - - Args: - conditions (list[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (list[SortColumn]): The list of parsed sort columns. - targets (list[str]): The list of target columns to return. - - Returns: - pd.DataFrame: The resulting DataFrame. - """ - client: BigCommerceAPIClient = self.handler.connect() - - simple_op_map = { - ("id", FilterOperator.EQUAL): "id", - ("name", FilterOperator.EQUAL): "name", - ("currency_code", FilterOperator.EQUAL): "currency_code", - ("redemption_type", FilterOperator.EQUAL): "redemption_type", - ("status", FilterOperator.EQUAL): "status", - ("channels", FilterOperator.EQUAL): "channels", # custom filter - ("channels", FilterOperator.IN): "channels", - } - - filter = _make_filter(conditions, simple_op_map) - - sortable_columns = ["id", "name", "start_date", "priority"] - sort_condition = _make_sort_condition_v3(sort, sortable_columns) - - result = client.get_promotions( - filter=filter, - sort_condition=sort_condition, - limit=limit, - ) - result = _make_df(result, self) - - return result - - def get_columns(self) -> List[str]: - """Retrieves the columns names of the 'promotions' resource. - - Returns: - list[str]: A list of columns names of the 'promotions' resource. - """ - columns = self.meta_get_columns() - return [column["COLUMN_NAME"] for column in columns] - - def meta_get_tables(self, *args, **kwargs) -> dict: - client: BigCommerceAPIClient = self.handler.connect() - promotions_count = client.get_promotions_count() - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "", - "row_count": promotions_count, - } - - def meta_get_columns(self, *args, **kwargs) -> List[str]: - return [ - {"TABLE_NAME": "promotions", "COLUMN_NAME": "id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "redemption_type", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "name", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "display_name", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "channels", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "customer", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "rules", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "current_uses", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "max_uses", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "status", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "start_date", "DATA_TYPE": "DATETIME"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "end_date", "DATA_TYPE": "DATETIME"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "stop", "DATA_TYPE": "BOOL"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "can_be_used_with_other_promotions", "DATA_TYPE": "BOOL"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "currency_code", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "notifications", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "shipping_address", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "schedule", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "promotions", "COLUMN_NAME": "created_from", "DATA_TYPE": "TEXT"}, - ] - - -class BigCommerceWishlistsTable(MetaAPIResource): - """ - The table abstraction for the 'wishlists' resource of the BigCommerce API. - """ - - name = "wishlists" - - def list( - self, - conditions: list[FilterCondition] = None, - limit: int = None, - **kwargs, - ): - """Executes a parsed SELECT SQL query on the 'wishlists' resource of the BigCommerce API. - - Args: - conditions (list[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - - Returns: - pd.DataFrame: The resulting DataFrame. - """ - client: BigCommerceAPIClient = self.handler.connect() - - simple_op_map = { - ("customer_id", FilterOperator.IN): "customer_id:in", - } - - filter = _make_filter(conditions, simple_op_map) - - result = client.get_wishlists( - filter=filter, - limit=limit, - ) - result = _make_df(result, self) - - return result - - def get_columns(self) -> List[str]: - """Retrieves the columns names of the 'wishlists' resource. - - Returns: - list[str]: A list of columns names of the 'wishlists' resource. - """ - columns = self.meta_get_columns() - return [column["COLUMN_NAME"] for column in columns] - - def meta_get_tables(self, *args, **kwargs) -> dict: - client: BigCommerceAPIClient = self.handler.connect() - wishlists_count = client.get_wishlists_count() - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "", - "row_count": wishlists_count, - } - - def meta_get_columns(self, *args, **kwargs) -> List[str]: - return [ - {"TABLE_NAME": "wishlists", "COLUMN_NAME": "id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "wishlists", "COLUMN_NAME": "customer_id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "wishlists", "COLUMN_NAME": "name", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "wishlists", "COLUMN_NAME": "is_public", "DATA_TYPE": "BOOL"}, - {"TABLE_NAME": "wishlists", "COLUMN_NAME": "token", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "wishlists", "COLUMN_NAME": "items", "DATA_TYPE": "JSON"}, - ] - - -class BigCommerceSegmentsTable(MetaAPIResource): - """ - The table abstraction for the 'segments' (customer segmentation) resource of the BigCommerce API. - """ - - name = "segments" - - def list( - self, - conditions: list[FilterCondition] = None, - limit: int = None, - **kwargs, - ): - """Executes a parsed SELECT SQL query on the 'segments' resource of the BigCommerce API. - - Args: - conditions (list[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - - Returns: - pd.DataFrame: The resulting DataFrame. - """ - client: BigCommerceAPIClient = self.handler.connect() - - simple_op_map = { - ("id", FilterOperator.IN): "id:in", - } - - filter = _make_filter(conditions, simple_op_map) - - result = client.get_segments( - filter=filter, - limit=limit, - ) - result = _make_df(result, self) - - return result - - def get_columns(self) -> List[str]: - """Retrieves the columns names of the 'segments' resource. - - Returns: - list[str]: A list of columns names of the 'segments' resource. - """ - columns = self.meta_get_columns() - return [column["COLUMN_NAME"] for column in columns] - - def meta_get_tables(self, *args, **kwargs) -> dict: - client: BigCommerceAPIClient = self.handler.connect() - segments_count = client.get_segments_count() - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "", - "row_count": segments_count, - } - - def meta_get_columns(self, *args, **kwargs) -> List[str]: - return [ - {"TABLE_NAME": "segments", "COLUMN_NAME": "id", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "segments", "COLUMN_NAME": "name", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "segments", "COLUMN_NAME": "description", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "segments", "COLUMN_NAME": "created_at", "DATA_TYPE": "DATETIME"}, - {"TABLE_NAME": "segments", "COLUMN_NAME": "updated_at", "DATA_TYPE": "DATETIME"}, - ] - - -class BigCommerceBrandsTable(MetaAPIResource): - """ - The table abstraction for the 'brands' resource of the BigCommerce API. - """ - - name = "brands" - - def list( - self, - conditions: list[FilterCondition] = None, - limit: int = None, - sort: list[SortColumn] = None, - targets: list[str] = None, - **kwargs, - ): - """Executes a parsed SELECT SQL query on the 'brands' resource of the BigCommerce API. - - Args: - conditions (list[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (list[SortColumn]): The list of parsed sort columns. - targets (list[str]): The list of target columns to return. - - Returns: - pd.DataFrame: The resulting DataFrame. - """ - client: BigCommerceAPIClient = self.handler.connect() - - simple_op_map = { - ("id", FilterOperator.EQUAL): "id", - ("id", FilterOperator.IN): "id:in", - ("id", FilterOperator.NOT_IN): "id:not_in", - ("id", FilterOperator.GREATER_THAN): "id:greater", - ("id", FilterOperator.LESS_THAN): "id:less", - ("id", FilterOperator.GREATER_THAN_OR_EQUAL): "id:min", - ("id", FilterOperator.LESS_THAN_OR_EQUAL): "id:max", - ("name", FilterOperator.EQUAL): "name", - ("name", FilterOperator.LIKE): "name:like", - ("page_title", FilterOperator.EQUAL): "page_title", - } - - filter = _make_filter(conditions, simple_op_map) - - if targets: - available_columns = self.get_columns() - for column_name in targets: - if column_name not in available_columns: - raise ValueError(f"Field '{column_name}' does not exists") - filter["include_fields"] = ",".join(targets) - - sortable_columns = ["name"] - sort_condition = _make_sort_condition_v3(sort, sortable_columns) - - result = client.get_brands( - filter=filter, - sort_condition=sort_condition, - limit=limit, - ) - result = _make_df(result, self) - - return result - - def get_columns(self) -> List[str]: - """Retrieves the columns names of the 'brands' resource. - - Returns: - list[str]: A list of columns names of the 'brands' resource. - """ - columns = self.meta_get_columns() - return [column["COLUMN_NAME"] for column in columns] - - def meta_get_tables(self, *args, **kwargs) -> dict: - client: BigCommerceAPIClient = self.handler.connect() - brands_count = client.get_brands_count() - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "", - "row_count": brands_count, - } - - def meta_get_columns(self, *args, **kwargs) -> List[str]: - return [ - {"TABLE_NAME": "brands", "COLUMN_NAME": "id", "DATA_TYPE": "INT"}, - {"TABLE_NAME": "brands", "COLUMN_NAME": "name", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "brands", "COLUMN_NAME": "page_title", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "brands", "COLUMN_NAME": "meta_keywords", "DATA_TYPE": "JSON"}, - {"TABLE_NAME": "brands", "COLUMN_NAME": "meta_description", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "brands", "COLUMN_NAME": "search_keywords", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "brands", "COLUMN_NAME": "image_url", "DATA_TYPE": "TEXT"}, - {"TABLE_NAME": "brands", "COLUMN_NAME": "custom_url", "DATA_TYPE": "JSON"}, - ] diff --git a/mindsdb/integrations/handlers/bigcommerce_handler/connection_args.py b/mindsdb/integrations/handlers/bigcommerce_handler/connection_args.py deleted file mode 100644 index 3b3a1b909e2..00000000000 --- a/mindsdb/integrations/handlers/bigcommerce_handler/connection_args.py +++ /dev/null @@ -1,25 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - api_base={ - "type": ARG_TYPE.URL, - "description": "The base URL of the BigCommerce instance/server.", - "label": "Base URL", - "required": True, - }, - access_token={ - "type": ARG_TYPE.STR, - "description": "The API token for the BigCommerce account.", - "label": "Access Token", - "required": True, - "secret": True, - }, -) - -connection_args_example = OrderedDict( - api_base="https://api.bigcommerce.com/stores/0fh0fh0fh0/v3/", - access_token="h0fhag1nyqag1ezme1nyqa", -) diff --git a/mindsdb/integrations/handlers/bigcommerce_handler/tests/__init__.py b/mindsdb/integrations/handlers/bigcommerce_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/bigcommerce_handler/tests/test_bigcommerce_handler.py b/mindsdb/integrations/handlers/bigcommerce_handler/tests/test_bigcommerce_handler.py deleted file mode 100644 index 0e747a4df16..00000000000 --- a/mindsdb/integrations/handlers/bigcommerce_handler/tests/test_bigcommerce_handler.py +++ /dev/null @@ -1,176 +0,0 @@ -import unittest -from unittest.mock import patch, MagicMock -from mindsdb.integrations.handlers.bigcommerce_handler.bigcommerce_handler import BigCommerceHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb_sql_parser import ast - - -class BigCommerceHandlerTest(unittest.TestCase): - def setUp(self): - self.handler = BigCommerceHandler( - "test_bigcommerce_handler", - connection_data={ - "api_base": "https://api.bigcommerce.com/stores/test-store/v3/", - "access_token": "mock_access_token", - }, - ) - self.patcher = patch( - "mindsdb.integrations.handlers.bigcommerce_handler.bigcommerce_handler.BigCommerceAPIClient" - ) - self.mock_client = self.patcher.start() - self.mock_client_instance = MagicMock() - self.mock_client.return_value = self.mock_client_instance - - def tearDown(self): - self.patcher.stop() - - def test_check_connection_success(self): - """Test successful connection to BigCommerce API""" - self.mock_client_instance.get_products.return_value = [{"id": 1, "name": "Test Product"}] - response = self.handler.check_connection() - self.assertTrue(response.success) - self.mock_client_instance.get_products.assert_called_once_with(limit=1) - - def test_check_connection_failure(self): - """Test failed connection to BigCommerce API""" - self.mock_client_instance.get_products.side_effect = Exception("Connection failed") - response = self.handler.check_connection() - self.assertFalse(response.success) - self.assertIsNotNone(response.error_message) - - def test_get_tables(self): - """Test retrieving list of tables""" - result = self.handler.get_tables() - self.assertIsNotNone(result) - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - # Check that all expected tables are present - table_names = [row[0] for row in result.data_frame.values] - expected_tables = [ - "orders", - "products", - "customers", - "categories", - "pickups", - "promotions", - "wishlists", - "segments", - "brands", - ] - for table in expected_tables: - self.assertIn(table, table_names) - - def test_query_products(self): - """Test querying products table""" - mock_products = [ - { - "id": 1, - "name": "Product 1", - "type": "physical", - "sku": "SKU-001", - "price": "29.99", - "inventory_level": 100, - }, - { - "id": 2, - "name": "Product 2", - "type": "digital", - "sku": "SKU-002", - "price": "49.99", - "inventory_level": 50, - }, - ] - self.mock_client_instance.get_products.return_value = mock_products - - query = ast.Select(targets=[ast.Star()], from_table=ast.Identifier("products"), limit=ast.Constant(10)) - - self.handler.connect = MagicMock(return_value=self.mock_client_instance) - result = self.handler.query(query) - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertEqual(len(result.data_frame), 2) - - def test_query_orders(self): - """Test querying orders table""" - mock_orders = [ - {"id": 100, "customer_id": 1, "status": "completed", "total_inc_tax": 99.99, "date_created": "2024-01-15"}, - {"id": 101, "customer_id": 2, "status": "pending", "total_inc_tax": 149.99, "date_created": "2024-01-16"}, - ] - self.mock_client_instance.get_orders.return_value = mock_orders - - query = ast.Select(targets=[ast.Star()], from_table=ast.Identifier("orders"), limit=ast.Constant(10)) - - self.handler.connect = MagicMock(return_value=self.mock_client_instance) - result = self.handler.query(query) - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertEqual(len(result.data_frame), 2) - - def test_query_customers(self): - """Test querying customers table""" - mock_customers = [ - { - "id": 1, - "email": "customer1@example.com", - "first_name": "John", - "last_name": "Doe", - "customer_group_id": 0, - }, - { - "id": 2, - "email": "customer2@example.com", - "first_name": "Jane", - "last_name": "Smith", - "customer_group_id": 1, - }, - ] - self.mock_client_instance.get_customers.return_value = mock_customers - - query = ast.Select(targets=[ast.Star()], from_table=ast.Identifier("customers"), limit=ast.Constant(10)) - - self.handler.connect = MagicMock(return_value=self.mock_client_instance) - result = self.handler.query(query) - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertEqual(len(result.data_frame), 2) - - def test_query_with_filter(self): - """Test querying with WHERE clause""" - mock_products = [{"id": 1, "name": "Filtered Product", "price": 29.99}] - self.mock_client_instance.get_products.return_value = mock_products - - query = ast.Select( - targets=[ast.Star()], - from_table=ast.Identifier("products"), - where=ast.BinaryOperation("=", args=[ast.Identifier("id"), ast.Constant(1)]), - limit=ast.Constant(10), - ) - - self.handler.connect = MagicMock(return_value=self.mock_client_instance) - result = self.handler.query(query) - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertGreaterEqual(len(result.data_frame), 0) - - def test_connect_missing_parameters(self): - """Test connection with missing required parameters""" - handler = BigCommerceHandler( - "test_handler", - connection_data={ - "api_base": "https://api.bigcommerce.com/stores/test/" - # Missing access_token - }, - ) - - with self.assertRaises(ValueError) as context: - handler.connect() - - self.assertIn("Required parameters", str(context.exception)) - - def test_meta_columns(self): - """Test retrieving metadata columns""" - meta_columns = self.handler.meta_get_columns() - self.assertTrue(len(meta_columns.data_frame) > 0) - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py b/mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py index 723801413ce..fe76de29ff3 100644 --- a/mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +++ b/mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py @@ -80,7 +80,8 @@ def connect(self): ) credentials = google_sa_oauth2_manager.get_oauth2_credentials() - client = Client(project=self.connection_data["project_id"], credentials=credentials) + billing_project = self.connection_data.get("billing_project") or self.connection_data["project_id"] + client = Client(project=billing_project, credentials=credentials) self.is_connected = True self.connection = client return self.connection @@ -257,7 +258,9 @@ def check_connection(self) -> StatusResponse: connection.query("SELECT 1;", timeout=10, retry=DEFAULT_RETRY.with_deadline(10)) # Check if the dataset exists - connection.get_dataset(self.connection_data["dataset"]) + dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] + dataset_ref = f"{dataset_project}.{self.connection_data['dataset']}" + connection.get_dataset(dataset_ref) response.success = True except (BadRequest, ValueError) as e: @@ -286,13 +289,13 @@ def native_query(self, query: str) -> Response: """ connection = self.connect() try: - job_config = QueryJobConfig( - default_dataset=f"{self.connection_data['project_id']}.{self.connection_data['dataset']}" - ) + dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] + job_config = QueryJobConfig(default_dataset=f"{dataset_project}.{self.connection_data['dataset']}") query_job = connection.query(query, job_config=job_config) result = query_job.to_dataframe() self._record_query_stats(query_job) - if not result.empty: + has_table_result = isinstance(result, pd.DataFrame) and (not result.empty or len(result.columns) > 0) + if has_table_result: response = Response(RESPONSE_TYPE.TABLE, result) else: response = Response(RESPONSE_TYPE.OK) @@ -342,10 +345,11 @@ def get_tables(self) -> Response: # Get filtered table list based on configuration filtered_tables = self._get_filtered_tables() + dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] # Build base query query = f""" SELECT table_name, table_schema, table_type - FROM `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLES` + FROM `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLES` WHERE table_type IN ('BASE TABLE', 'VIEW') """ @@ -380,9 +384,10 @@ def get_columns(self, table_name) -> Response: Raises: ValueError: If the 'table_name' is not a valid string. """ + dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] query = f""" SELECT column_name AS Field, data_type as Type - FROM `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.COLUMNS` + FROM `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.COLUMNS` WHERE table_name = '{table_name}' """ result = self.native_query(query) @@ -402,6 +407,7 @@ def meta_get_tables(self, table_names: Optional[list] = None) -> Response: Returns: Response: A response object containing the metadata information. """ + dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] query = f""" SELECT t.table_name, @@ -409,9 +415,9 @@ def meta_get_tables(self, table_names: Optional[list] = None) -> Response: t.table_type, st.row_count FROM - `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLES` AS t - JOIN - `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.__TABLES__` AS st + `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLES` AS t + LEFT JOIN + `{dataset_project}.{self.connection_data["dataset"]}.__TABLES__` AS st ON t.table_name = st.table_id WHERE @@ -468,6 +474,7 @@ def meta_get_columns(self, table_names: Optional[list] = None) -> Response: Returns: Response: A response object containing the column metadata. """ + dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] query = f""" SELECT table_name, @@ -479,7 +486,7 @@ def meta_get_columns(self, table_names: Optional[list] = None) -> Response: ELSE FALSE END AS is_nullable FROM - `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.COLUMNS` + `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.COLUMNS` """ # Apply connection-time filtering @@ -526,9 +533,10 @@ def meta_get_column_statistics_for_table(self, table_name: str, columns: list) - Response: A response object containing the column statistics. """ # Check column data types + dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] column_types_query = f""" SELECT column_name, data_type - FROM `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.COLUMNS` + FROM `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.COLUMNS` WHERE table_name = '{table_name}' """ column_types_result = self.native_query(column_types_query) @@ -588,7 +596,7 @@ def chunked(lst, n): CAST(MAX(`{column}`) AS STRING) AS maximum_value, COUNT(DISTINCT `{column}`) AS distinct_values_count FROM - `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.{table_name}` + `{dataset_project}.{self.connection_data["dataset"]}.{table_name}` """ ) else: @@ -604,7 +612,7 @@ def chunked(lst, n): CAST(NULL AS STRING) AS maximum_value, CAST(NULL AS INT64) AS distinct_values_count FROM - `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.{table_name}` + `{dataset_project}.{self.connection_data["dataset"]}.{table_name}` """ ) @@ -648,16 +656,17 @@ def meta_get_primary_keys(self, table_names: Optional[list] = None) -> Response: Returns: Response: A response object containing the primary key information. """ + dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] query = f""" SELECT tc.table_name, kcu.column_name, kcu.ordinal_position, - tc.constraint_name, + tc.constraint_name FROM - `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` AS tc + `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` AS tc JOIN - `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` AS kcu + `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` AS kcu ON tc.constraint_name = kcu.constraint_name WHERE @@ -710,6 +719,7 @@ def meta_get_foreign_keys(self, table_names: Optional[list] = None) -> Response: Returns: Response: A response object containing the foreign key information. """ + dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] query = f""" SELECT ccu.table_name AS parent_table_name, @@ -718,13 +728,13 @@ def meta_get_foreign_keys(self, table_names: Optional[list] = None) -> Response: kcu.column_name AS child_column_name, tc.constraint_name FROM - `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` AS tc + `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` AS tc JOIN - `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` AS kcu + `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` AS kcu ON tc.constraint_name = kcu.constraint_name JOIN - `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE` AS ccu + `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE` AS ccu ON tc.constraint_name = ccu.constraint_name WHERE diff --git a/mindsdb/integrations/handlers/bigquery_handler/connection_args.py b/mindsdb/integrations/handlers/bigquery_handler/connection_args.py index 36bfe184814..efc0c3ad81a 100644 --- a/mindsdb/integrations/handlers/bigquery_handler/connection_args.py +++ b/mindsdb/integrations/handlers/bigquery_handler/connection_args.py @@ -5,22 +5,19 @@ connection_args = OrderedDict( project_id={ - 'type': ARG_TYPE.STR, - 'description': 'The BigQuery project id.' + "type": ARG_TYPE.STR, + "description": "Default BigQuery project id (used for billing and dataset lookup if not overridden).", }, - dataset={ - 'type': ARG_TYPE.STR, - 'description': 'The BigQuery dataset name.' + billing_project={ + "type": ARG_TYPE.STR, + "description": "BigQuery project id to bill query jobs to (defaults to project_id).", }, + dataset_project={"type": ARG_TYPE.STR, "description": "Project id that owns the dataset (defaults to project_id)."}, + dataset={"type": ARG_TYPE.STR, "description": "The BigQuery dataset name."}, service_account_keys={ - 'type': ARG_TYPE.PATH, - 'description': 'Full path or URL to the service account JSON file', - 'secret': True - }, - service_account_json={ - 'type': ARG_TYPE.DICT, - 'description': 'Content of service account JSON file', - 'secret': True + "type": ARG_TYPE.PATH, + "description": "Full path or URL to the service account JSON file", + "secret": True, }, include_tables={ 'type': ARG_TYPE.STR, @@ -34,9 +31,9 @@ 'required': False, 'label': 'Exclude Tables' }, + service_account_json={"type": ARG_TYPE.DICT, "description": "Content of service account JSON file", "secret": True}, ) connection_args_example = OrderedDict( - project_id='tough-future-332513', - service_account_keys='/home/bigq/tough-future-332513.json' + project_id="tough-future-332513", service_account_keys="/home/bigq/tough-future-332513.json" ) diff --git a/mindsdb/integrations/handlers/binance_handler/README.md b/mindsdb/integrations/handlers/binance_handler/README.md deleted file mode 100644 index e4344e08c63..00000000000 --- a/mindsdb/integrations/handlers/binance_handler/README.md +++ /dev/null @@ -1,122 +0,0 @@ -# Binance API Handler - -This handler integrates with the [Binance API](https://binance-docs.github.io/apidocs/spot/en/#change-log) to make aggregate trade (kline) data available to use for model training and predictions. - -## Example: Forecast Cryptocurrency Prices - -To see how the Binance handler is used, let's walk through a simple example to create a time series model to predict the future price of Bitcoin (BTC) in terms of USDT. - -### Connect to the Binance API -We start by creating a database to connect to the Binance API. Currently, there is no need for an API key: - -``` -CREATE DATABASE my_binance -WITH - ENGINE = 'binance' - PARAMETERS = {}; -``` - -### Select Data -To see if the connection was successful, try searching for the most recent trade data. By default, aggregate data (klines) from the latest 1000 trading intervals with a length of 1m each are returned. - -``` -SELECT * -FROM my_binance.aggregated_trade_data -WHERE symbol = 'BTCUSDT'; -``` - -Each row should look like this: - -| symbol | open_time | open_price | high_price | low_price | close_price | volume | close_time | quote_asset_volume | number_of_trades | taker_buy_base_asset_volume | taker_buy_quote_asset_volume | -| ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ------------------ | ---------------- | --------------------------- | ---------------------------- | -| BTCUSDT | 1678338600| 21752.65000 | 21761.33000 | 21751.53000 | 21756.7000 | 103.8614100 | 1678338659.999 | 2259656.20520700 | 3655 | 55.25763000 | 1202219.60971860 - -where: -* symbol - Trading pair (BTC to USDT in the above example) -* open_time - Start time of interval in seconds since the Unix epoch (default interval is 1m) -* open_price - Price of base asset at beginning of trading interval -* high_price - Highest price of base asset during trading interval -* low_price - Lowest price of base asset during trading interval -* close_price - Price of base asset at end of trading interval -* volume - Total amount of base asset traded during interval -* close_time - End time of interval in seconds since the Unix epoch -* quote_asset_volume - Total amount of quote asset (USDT in above case) traded during interval -* number_of_trades - Total number of trades made during interval -* taker_buy_base_asset_volume - How much of the base asset volume is contributed by taker buy orders -* taker_buy_quote_asset_volume - How much of the quote asset volume is contributed by taker buy orders - - -You can customize open_time, close_time, and interval: - -``` -SELECT * -FROM my_binance.aggregated_trade_data -WHERE symbol = 'BTCUSDT' -AND open_time > '2023-01-01' -AND close_time < '2023-01-03 08:00:00' -AND interval = '1s' -LIMIT 10000; -``` - -Supported intervals are [listed here](https://binance-docs.github.io/apidocs/spot/en/#kline-candlestick-data): -* 1s -* 1m -* 3m -* 5m -* 15m -* 30m -* 1h -* 2h -* 4h -* 6h -* 8h -* 12h -* 1d -* 3d -* 1w - -### Train a Model - -Now it's time to create a time series model using 10000 trading intervals in the past with duration 1m. - -``` -CREATE MODEL mindsdb.btc_forecast_model -FROM my_binance -( - SELECT * FROM aggregated_trade_data - WHERE symbol = 'BTCUSDT' - AND close_time < '2023-01-01' - AND interval = '1m' - LIMIT 10000 -) - -PREDICT open_price - -ORDER BY open_time -WINDOW 100 -HORIZON 10; -``` - -It may take a few minutes to complete. For more accuracy, you should increase the limit to be higher (e.g. 100,000) - -### Making Predictions - -First let's make a view for the most recent BTCUSDT aggregate trade data: - -``` -CREATE VIEW recent_btcusdt_data AS ( - SELECT * FROM my_binance.aggregated_trade_data - WHERE symbol = 'BTCUSDT' -) -``` - -Now let's predict the future price of BTC: - -``` -SELECT m.* -FROM recent_btcusdt_data AS t -JOIN mindsdb.btc_forecast_model AS m -WHERE m.open_time > LATEST -``` - -This should give you the predicted BTC price for the next 10 minutes (we set the horizon to 10) in terms of USDT. \ No newline at end of file diff --git a/mindsdb/integrations/handlers/binance_handler/__about__.py b/mindsdb/integrations/handlers/binance_handler/__about__.py deleted file mode 100644 index 473b52ac92c..00000000000 --- a/mindsdb/integrations/handlers/binance_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Binance handler' -__package_name__ = 'mindsdb_binance_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for the Binance API" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/binance_handler/__init__.py b/mindsdb/integrations/handlers/binance_handler/__init__.py deleted file mode 100644 index 13dac7fad0e..00000000000 --- a/mindsdb/integrations/handlers/binance_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .binance_handler import BinanceHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Binance' -name = 'binance' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/binance_handler/binance_handler.py b/mindsdb/integrations/handlers/binance_handler/binance_handler.py deleted file mode 100644 index 7a9651fccfe..00000000000 --- a/mindsdb/integrations/handlers/binance_handler/binance_handler.py +++ /dev/null @@ -1,145 +0,0 @@ -import pandas as pd -from typing import Dict - -from binance.spot import Spot - -from mindsdb.integrations.handlers.binance_handler.binance_tables import BinanceAggregatedTradesTable -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, -) -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - -_BASE_BINANCE_US_URL = 'https://api.binance.us' - -logger = log.getLogger(__name__) - - -class BinanceHandler(APIHandler): - """A class for handling connections and interactions with the Binance API. - - Attributes: - client (binance.spot.Spot): The `binance.spot.Spot` object for interacting with the Binance API. - api_key (str): API key, only required for user data endpoints. - api_secret (str): API secret, only required for user data endpoints. - is_connected (bool): Whether or not the API client is connected to Binance. - - """ - - def __init__(self, name: str = None, **kwargs): - """Registers all API tables and prepares the handler for an API connection. - - Args: - name: (str): The handler name to use - """ - super().__init__(name) - self.api_key = None - self.api_secret = None - - args = kwargs.get('connection_data', {}) - if 'api_key' in args: - self.api_key = args['api_key'] - if 'api_secret' in args: - self.api_secret = args['api_secret'] - - self.client = None - self.is_connected = False - - aggregated_trade_data = BinanceAggregatedTradesTable(self) - self._register_table('aggregated_trade_data', aggregated_trade_data) - - def connect(self) -> Spot: - """Creates a new Binance Spot API client if needed and sets it as the client to use for requests. - - Returns newly created Binance Spot API client, or current client if already set. - """ - if self.is_connected is True and self.client: - return self.client - - if self.api_key and self.api_secret: - self.client = Spot(key=self.api_key, secret=self.api_secret, base_url=_BASE_BINANCE_US_URL) - else: - self.client = Spot(base_url=_BASE_BINANCE_US_URL) - - self.is_connected = True - return self.client - - def check_connection(self) -> StatusResponse: - """Checks connection to Binance API by sending a ping request. - - Returns StatusResponse indicating whether or not the handler is connected. - """ - - response = StatusResponse(False) - - try: - client = self.connect() - client.ping() - response.success = True - - except Exception as e: - logger.error(f'Error connecting to Binance API: {e}!') - response.error_message = e - - self.is_connected = response.success - return response - - def _get_klines(self, params: Dict = None) -> pd.DataFrame: - """Gets aggregate trade data for a symbol based on given parameters - - Returns results as a pandas DataFrame. - - Args: - params (Dict): Trade data params (symbol, interval, limit, start_time, end_time) - """ - if 'symbol' not in params: - raise ValueError('Missing "symbol" param to fetch trade data for.') - if 'interval' not in params: - raise ValueError('Missing "interval" param (1m, 1d, etc).') - - client = self.connect() - symbol = params['symbol'] - interval = params['interval'] - limit = params['limit'] if 'limit' in params else BinanceAggregatedTradesTable.DEFAULT_AGGREGATE_TRADE_LIMIT - start_time = params['start_time'] if 'start_time' in params else None - end_time = params['end_time'] if 'end_time' in params else None - raw_klines = client.klines( - symbol, - interval, - limit=limit, - startTime=start_time, - endTime=end_time) - - open_time_i = 0 - close_time_i = 6 - for i in range(len(raw_klines)): - # To train we need timestamps to be in seconds since Unix epoch and Binance returns it in ms. - raw_klines[i][open_time_i] = int(raw_klines[i][open_time_i] / 1000) - raw_klines[i][close_time_i] = int(raw_klines[i][close_time_i] / 1000) - - df = pd.DataFrame(raw_klines) - df.insert(0, 'symbol', [symbol] * len(raw_klines), True) - # Remove last unnecessary column (unused API field) - if len(raw_klines) > 0: - num_cols = len(df.columns) - df = df.drop(df.columns[[num_cols - 1]], axis=1) - return df - - def native_query(self, query: str = None) -> Response: - ast = parse_sql(query) - return self.query(ast) - - def call_binance_api(self, method_name: str = None, params: Dict = None) -> pd.DataFrame: - """Calls the Binance API method with the given params. - - Returns results as a pandas DataFrame. - - Args: - method_name (str): Method name to call (e.g. klines) - params (Dict): Params to pass to the API call - """ - if method_name == 'klines': - return self._get_klines(params) - raise NotImplementedError('Method name {} not supported by Binance API Handler'.format(method_name)) diff --git a/mindsdb/integrations/handlers/binance_handler/binance_tables.py b/mindsdb/integrations/handlers/binance_handler/binance_tables.py deleted file mode 100644 index afa087f975a..00000000000 --- a/mindsdb/integrations/handlers/binance_handler/binance_tables.py +++ /dev/null @@ -1,174 +0,0 @@ -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.date_utils import interval_str_to_duration_ms, utc_date_str_to_timestamp_ms -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb_sql_parser import ast - -from concurrent.futures import ThreadPoolExecutor -from typing import Dict, List - -import pandas as pd -import time - - -class BinanceAggregatedTradesTable(APITable): - - # Default 1m intervals in aggregate data. - DEFAULT_AGGREGATE_TRADE_INTERVAL = '1m' - DEFAULT_AGGREGATE_TRADE_LIMIT = 1000 - # Binance Spot client has connection pool size of 10. - MAX_THREAD_POOL_WORKERS = 10 - - def _get_batch_klines(self, executor: ThreadPoolExecutor, total_results: int, params: Dict) -> pd.DataFrame: - """Gets aggregate trade data in batches and combines the results together. - - Returns all results as a pandas DataFrame. - - Args: - executor (ThreadPoolExecutor): Executor to use when mapping API calls as tasks. - total_results (int): Total number of results to fetch. - params (Dict): Overall request params to be split into batches. - """ - interval_duration_ms = interval_str_to_duration_ms(params['interval']) - if 'end_time' not in params: - # Default to get all klines before the current time. - overall_end_ms = int(time.time() * 1000) - else: - overall_end_ms = params['end_time'] - - if 'start_time' not in params: - total_duration_ms = interval_duration_ms * total_results - # Infer start time based on the interval and how many klines we need to fetch. - overall_start_ms = overall_end_ms - total_duration_ms - else: - overall_start_ms = params['start_time'] - - next_params = params.copy() - next_params['start_time'] = overall_start_ms - duration_per_api_call_ms = interval_duration_ms * BinanceAggregatedTradesTable.DEFAULT_AGGREGATE_TRADE_LIMIT - next_params['end_time'] = min(overall_end_ms, overall_start_ms + duration_per_api_call_ms) - all_params = [next_params.copy()] - results_so_far = BinanceAggregatedTradesTable.DEFAULT_AGGREGATE_TRADE_LIMIT - while next_params['end_time'] < overall_end_ms and results_so_far < total_results: - next_params['limit'] = min( - BinanceAggregatedTradesTable.DEFAULT_AGGREGATE_TRADE_LIMIT, - total_results - results_so_far - ) - next_params['start_time'] = next_params['end_time'] - next_params['end_time'] = min(overall_end_ms, next_params['start_time'] + duration_per_api_call_ms) - all_params.append(next_params.copy()) - results_so_far += next_params['limit'] - - aggregated_trade_subdatas = list(executor.map(lambda p: self.handler.call_binance_api(method_name='klines', params=p), all_params)) - if not aggregated_trade_subdatas: - return pd.DataFrame([]) - - aggregated_trade_data = aggregated_trade_subdatas[0] - for aggregated_trade_subdata in aggregated_trade_subdatas[1:]: - aggregated_trade_data = pd.concat([aggregated_trade_data, aggregated_trade_subdata]) - return aggregated_trade_data - - def _get_kline_params_from_conditions(self, conditions: List) -> Dict: - """Gets aggregate trade data API params from SQL WHERE conditions. - - Returns params to use for Binance API call to klines. - - Args: - conditions (List): List of individual SQL WHERE conditions. - """ - params = { - 'interval': BinanceAggregatedTradesTable.DEFAULT_AGGREGATE_TRADE_INTERVAL, - 'limit': BinanceAggregatedTradesTable.DEFAULT_AGGREGATE_TRADE_LIMIT - } - for op, arg1, arg2 in conditions: - if arg1 == 'interval': - if op != '=': - raise NotImplementedError - params['interval'] = arg2 - - elif arg1 == 'symbol': - if op != '=': - raise NotImplementedError - params['symbol'] = arg2 - interval_duration_ms = interval_str_to_duration_ms(params['interval']) - - for op, arg1, arg2 in conditions: - if arg1 == 'open_time': - utc_timestamp_ms = utc_date_str_to_timestamp_ms(arg2) - if op == '>': - params['start_time'] = utc_timestamp_ms - else: - raise NotImplementedError - continue - elif arg1 == 'close_time': - utc_timestamp_ms = utc_date_str_to_timestamp_ms(arg2) - if op == '<': - params['end_time'] = utc_timestamp_ms - interval_duration_ms - else: - raise NotImplementedError - - return params - - def select(self, query: ast.Select) -> pd.DataFrame: - """Selects data from the Binance API and returns it as a pandas DataFrame. - - Returns dataframe representing the Binance API results. - - Args: - query (ast.Select): Given SQL SELECT query - """ - conditions = extract_comparison_conditions(query.where) - params = self._get_kline_params_from_conditions(conditions) - - total_results = params['limit'] - if query.limit: - total_results = query.limit.value - params['limit'] = min(BinanceAggregatedTradesTable.DEFAULT_AGGREGATE_TRADE_LIMIT, query.limit.value) - - if total_results > BinanceAggregatedTradesTable.DEFAULT_AGGREGATE_TRADE_LIMIT: - # Max 1000 klines per API call so we need to combine multiple API calls. - with ThreadPoolExecutor(max_workers=BinanceAggregatedTradesTable.MAX_THREAD_POOL_WORKERS) as executor: - aggregated_trades_data = self._get_batch_klines(executor, total_results, params) - - else: - aggregated_trades_data = self.handler.call_binance_api( - method_name='klines', - params=params - ) - - # Only return the columns we need to. - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(aggregated_trades_data) == 0: - aggregated_trades_data = pd.DataFrame([], columns=columns) - else: - # Remove columns not part of select. - aggregated_trades_data.columns = self.get_columns() - for col in set(aggregated_trades_data.columns).difference(set(columns)): - aggregated_trades_data = aggregated_trades_data.drop(col, axis=1) - - return aggregated_trades_data - - def get_columns(self): - """Gets all columns to be returned in pandas DataFrame responses""" - return [ - 'symbol', - 'open_time', - 'open_price', - 'high_price', - 'low_price', - 'close_price', - 'volume', - 'close_time', - 'quote_asset_volume', - 'number_of_trades', - 'taker_buy_base_asset_volume', - 'taker_buy_quote_asset_volume' - ] diff --git a/mindsdb/integrations/handlers/binance_handler/icon.svg b/mindsdb/integrations/handlers/binance_handler/icon.svg deleted file mode 100644 index 706dd8f56bb..00000000000 --- a/mindsdb/integrations/handlers/binance_handler/icon.svg +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/binance_handler/requirements.txt b/mindsdb/integrations/handlers/binance_handler/requirements.txt deleted file mode 100644 index af5e4ddae92..00000000000 --- a/mindsdb/integrations/handlers/binance_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -binance-connector \ No newline at end of file diff --git a/mindsdb/integrations/handlers/byom_handler/README.md b/mindsdb/integrations/handlers/byom_handler/README.md deleted file mode 100644 index 5186b0e45e1..00000000000 --- a/mindsdb/integrations/handlers/byom_handler/README.md +++ /dev/null @@ -1,54 +0,0 @@ - -## BYOM Handler - -### Http api: - -**POST** api/handlers/byom/ - -Uploaded files -- code - python file with model code -- modules - text file with requirements - -**Logic:** - -1. Create new ml engine -2. If virtualenv is installed on computer: - - try to create a new virtual environment using current python interpreter - - install all requirements for model - - if pandas is not in requirements - it is installed anyway - - install pyarrow (for sending dataframe between different pandas versions) -3. If not virtualenv is installed: - - try to install requirements to current environment -4. Try to import uploaded model and check predict and train methods -5. If not success: - - remove virtual environment if it was created - - remove ml engine - -Using: - -1. Add args argument to train and predict methods: -``` -class CustomPredictor(): - def train(self, df, target_col, args=None): - ... - - def predict(self, df, args=None): - ... -``` - -2. Passing args to model training -``` -create predictor pred -from files (select * from byom) -predict Time -using engine='uploaded_model', - param1=1, param2='2' -``` - -3. Passing args at predict -``` -select * from files.byom t -join pred3 p -using param1=1, param2='2' -``` - diff --git a/mindsdb/integrations/handlers/byom_handler/__about__.py b/mindsdb/integrations/handlers/byom_handler/__about__.py deleted file mode 100644 index 963d1d697ad..00000000000 --- a/mindsdb/integrations/handlers/byom_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB BYOM handler' -__package_name__ = 'mindsdb_byom_handler' -__version__ = '0.0.2' -__description__ = "MindsDB handler for BYOM" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/byom_handler/__init__.py b/mindsdb/integrations/handlers/byom_handler/__init__.py deleted file mode 100644 index 4f2d7df093e..00000000000 --- a/mindsdb/integrations/handlers/byom_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args -try: - from .byom_handler import BYOMHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - - -title = 'BYOM' -name = 'byom' -type = HANDLER_TYPE.ML -icon_path = "icon.svg" - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', 'connection_args' -] diff --git a/mindsdb/integrations/handlers/byom_handler/byom_handler.py b/mindsdb/integrations/handlers/byom_handler/byom_handler.py deleted file mode 100644 index 3b8f7c901f6..00000000000 --- a/mindsdb/integrations/handlers/byom_handler/byom_handler.py +++ /dev/null @@ -1,649 +0,0 @@ -"""BYOM: Bring Your Own Model - -env vars to contloll BYOM: - - MINDSDB_BYOM_ENABLED - can BYOM be used or not. Locally enabled by default. - - MINDSDB_BYOM_INHOUSE_ENABLED - enable or disable 'inhouse' BYOM usage. Locally enabled by default. - - MINDSDB_BYOM_DEFAULT_TYPE - [inhouse|venv] default byom type. Locally it is 'venv' by default. -""" - -import os -import re -import sys -import shutil -import pickle -import tarfile -import tempfile -import subprocess -from enum import Enum -from pathlib import Path -from datetime import datetime -from typing import Optional, Dict, Union - -import pandas as pd -from pandas.api import types as pd_types - -from mindsdb.utilities import log -from mindsdb.utilities.config import config -from mindsdb.utilities.fs import safe_extract -from mindsdb.interfaces.storage import db -from mindsdb.integrations.libs.base import BaseMLEngine -from mindsdb.integrations.libs.const import PREDICTOR_STATUS -from mindsdb.integrations.utilities.utils import format_exception_error -import mindsdb.utilities.profiler as profiler - - -from .proc_wrapper import ( - pd_decode, - pd_encode, - encode, - decode, - BYOM_METHOD, - import_string, - find_model_class, - check_module, -) -from .__about__ import __version__ - - -BYOM_TYPE = Enum("BYOM_TYPE", ["INHOUSE", "VENV"]) - -logger = log.getLogger(__name__) - - -class BYOMHandler(BaseMLEngine): - name = "byom" - - def __init__(self, model_storage, engine_storage, **kwargs) -> None: - # region check availability - if config["byom"]["enabled"] is not True: - raise RuntimeError("BYOM is disabled") - # endregion - - self.model_wrapper = None - - self.inhouse_model_wrapper = None - self.model_wrappers = {} - - # region read and save set default byom type - try: - self._default_byom_type = BYOM_TYPE.VENV - if os.environ.get("MINDSDB_BYOM_DEFAULT_TYPE") is not None: - self._default_byom_type = BYOM_TYPE[os.environ.get("MINDSDB_BYOM_DEFAULT_TYPE").upper()] - except KeyError: - logger.warning(f"Wrong value of env var MINDSDB_BYOM_DEFAULT_TYPE, {BYOM_TYPE.VENV} will be used") - self._default_byom_type = BYOM_TYPE.VENV - # endregion - - # region check if 'inhouse' BYOM is enabled - env_var = os.environ.get("MINDSDB_BYOM_INHOUSE_ENABLED") - if env_var is None: - self._inhouse_enabled = False if config.is_cloud else True - else: - self._inhouse_enabled = env_var.lower() in ("true", "1") - # endregion - - super().__init__(model_storage, engine_storage, **kwargs) - - @staticmethod - def normalize_engine_version(engine_version: Union[int, str, None]) -> int: - """Cast engine version to int, or return `1` if can not be casted - - Args: - engine_version (Union[int, str, None]): engine version - - Returns: - int: engine version - """ - if isinstance(engine_version, str): - try: - engine_version = int(engine_version) - except Exception: - engine_version = 1 - if isinstance(engine_version, int) is False: - engine_version = 1 - return engine_version - - @staticmethod - def create_validation(target: str, args: dict = None, **kwargs) -> None: - if isinstance(args, dict) is False: - return - using_args = args.get("using", {}) - engine_version = using_args.get("engine_version") - if engine_version is not None: - engine_version = BYOMHandler.normalize_engine_version(engine_version) - else: - connection_args = kwargs["handler_storage"].get_connection_args() - versions = connection_args.get("versions") - if isinstance(versions, dict): - engine_version = max([int(x) for x in versions.keys()]) - else: - engine_version = 1 - using_args["engine_version"] = engine_version - - def get_model_engine_version(self) -> int: - """Return current model engine version - - Returns: - int: engine version - """ - engine_version = self.model_storage.get_info()["learn_args"].get("using", {}).get("engine_version") - engine_version = BYOMHandler.normalize_engine_version(engine_version) - return engine_version - - def normalize_byom_type(self, byom_type: Optional[str]) -> BYOM_TYPE: - if byom_type is not None: - byom_type = BYOM_TYPE[byom_type.upper()] - else: - byom_type = self._default_byom_type - if byom_type == BYOM_TYPE.INHOUSE and self._inhouse_enabled is False: - raise Exception("'Inhouse' BYOM engine type can not be used") - return byom_type - - def _get_model_proxy(self, version=None): - if version is None: - version = 1 - if isinstance(version, str): - version = int(version) - version_mark = "" - if version > 1: - version_mark = f"_{version}" - version_str = str(version) - - self.engine_storage.fileStorage.pull() - try: - code = self.engine_storage.fileStorage.file_get(f"code{version_mark}") - modules_str = self.engine_storage.fileStorage.file_get(f"modules{version_mark}") - except FileNotFoundError: - raise Exception(f"Engine version '{version}' does not exists") - - if version_str not in self.model_wrappers: - connection_args = self.engine_storage.get_connection_args() - version_meta = connection_args["versions"][version_str] - - try: - engine_version_type = BYOM_TYPE[version_meta.get("type", self._default_byom_type.name).upper()] - except KeyError: - raise Exception("Unknown BYOM engine type") - - if engine_version_type == BYOM_TYPE.INHOUSE: - if self._inhouse_enabled is False: - raise Exception("'Inhouse' BYOM engine type can not be used") - if self.inhouse_model_wrapper is None: - self.inhouse_model_wrapper = ModelWrapperUnsafe( - code=code, - modules_str=modules_str, - engine_id=self.engine_storage.integration_id, - engine_version=version, - ) - self.model_wrappers[version_str] = self.inhouse_model_wrapper - elif engine_version_type == BYOM_TYPE.VENV: - if version_meta.get("venv_status") != "ready": - version_meta["venv_status"] = "creating" - self.engine_storage.update_connection_args(connection_args) - self.model_wrappers[version_str] = ModelWrapperSafe( - code=code, - modules_str=modules_str, - engine_id=self.engine_storage.integration_id, - engine_version=version, - ) - version_meta["venv_status"] = "ready" - self.engine_storage.update_connection_args(connection_args) - - return self.model_wrappers[version_str] - - def describe(self, attribute: Optional[str] = None) -> pd.DataFrame: - engine_version = self.get_model_engine_version() - mp = self._get_model_proxy(engine_version) - model_state = self.model_storage.file_get("model") - return mp.describe(model_state, attribute) - - def create(self, target, df=None, args=None, **kwargs): - using_args = args.get("using", {}) - engine_version = using_args.get("engine_version") - - model_proxy = self._get_model_proxy(engine_version) - model_state = model_proxy.train(df, target, args) - - self.model_storage.file_set("model", model_state) - - # TODO return columns? - - def convert_type(field_type): - if pd_types.is_integer_dtype(field_type): - return "integer" - elif pd_types.is_numeric_dtype(field_type): - return "float" - elif pd_types.is_datetime64_any_dtype(field_type): - return "datetime" - else: - return "categorical" - - columns = {target: convert_type(object)} - - self.model_storage.columns_set(columns) - - def predict(self, df, args=None): - pred_args = args.get("predict_params", {}) - - engine_version = pred_args.get("engine_version") - if engine_version is not None: - engine_version = int(engine_version) - else: - engine_version = self.get_model_engine_version() - - model_proxy = self._get_model_proxy(engine_version) - model_state = self.model_storage.file_get("model") - pred_df = model_proxy.predict(df, model_state, pred_args) - - return pred_df - - def create_engine(self, connection_args): - code_path = Path(connection_args["code"]) - self.engine_storage.fileStorage.file_set("code", code_path.read_bytes()) - - requirements_path = Path(connection_args["modules"]) - self.engine_storage.fileStorage.file_set("modules", requirements_path.read_bytes()) - - self.engine_storage.fileStorage.push() - - self.engine_storage.update_connection_args( - { - "handler_version": __version__, - "mode": connection_args.get("mode"), - "versions": { - "1": { - "code": code_path.name, - "requirements": requirements_path.name, - "type": self.normalize_byom_type(connection_args.get("type")).name.lower(), - } - }, - } - ) - - model_proxy = self._get_model_proxy() - try: - info = model_proxy.check(connection_args.get("mode")) - self.engine_storage.json_set("methods", info["methods"]) - - except Exception as e: - if hasattr(model_proxy, "remove_venv"): - model_proxy.remove_venv() - raise e - - def update_engine(self, connection_args: dict) -> None: - """Add new version of engine - - Args: - connection_args (dict): paths to code and requirements - """ - code_path = Path(connection_args["code"]) - requirements_path = Path(connection_args["modules"]) - - engine_connection_args = self.engine_storage.get_connection_args() - if isinstance(engine_connection_args, dict) is False or "handler_version" not in engine_connection_args: - engine_connection_args = { - "handler_version": __version__, - "versions": { - "1": { - "code": "code.py", - "requirements": "requirements.txt", - "type": self._default_byom_type.name.lower(), - } - }, - } - new_version = str(max([int(x) for x in engine_connection_args["versions"].keys()]) + 1) - - engine_connection_args["versions"][new_version] = { - "code": code_path.name, - "requirements": requirements_path.name, - "type": self.normalize_byom_type(connection_args.get("type")).name.lower(), - } - - self.engine_storage.fileStorage.file_set(f"code_{new_version}", code_path.read_bytes()) - - self.engine_storage.fileStorage.file_set(f"modules_{new_version}", requirements_path.read_bytes()) - self.engine_storage.fileStorage.push() - - self.engine_storage.update_connection_args(engine_connection_args) - - model_proxy = self._get_model_proxy(new_version) - try: - methods = model_proxy.check() - self.engine_storage.json_set("methods", methods) - - except Exception as e: - if hasattr(model_proxy, "remove_venv"): - model_proxy.remove_venv() - raise e - - def function_list(self): - return self.engine_storage.json_get("methods") - - def function_call(self, name, args): - mp = self._get_model_proxy() - return mp.func_call(name, args) - - def finetune(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None: - using_args = args.get("using", {}) - engine_version = using_args.get("engine_version") - - model_storage = self.model_storage - try: - base_predictor_id = args["base_model_id"] - base_predictor_record = db.Predictor.query.get(base_predictor_id) - if base_predictor_record.status != PREDICTOR_STATUS.COMPLETE: - raise Exception("Base model must be in status 'complete'") - - predictor_id = model_storage.predictor_id - predictor_record = db.Predictor.query.get(predictor_id) - - predictor_record.data = { - "training_log": "training" - } # TODO move to ModelStorage (don't work w/ db directly) - predictor_record.training_start_at = datetime.now() - predictor_record.status = PREDICTOR_STATUS.FINETUNING # TODO: parallel execution block - db.session.commit() - - model_proxy = self._get_model_proxy(engine_version) - model_state = self.base_model_storage.file_get("model") - model_state = model_proxy.finetune(df, model_state, args=args.get("using", {})) - - # region hack to speedup file saving - with profiler.Context("finetune-byom-write-file"): - dest_abs_path = model_storage.fileStorage.folder_path / "model" - with open(dest_abs_path, "wb") as fd: - fd.write(model_state) - model_storage.fileStorage.push(compression_level=0) - # endregion - - predictor_record.update_status = "up_to_date" - predictor_record.status = PREDICTOR_STATUS.COMPLETE - predictor_record.training_stop_at = datetime.now() - db.session.commit() - - except Exception as e: - logger.error("Unexpected error during BYOM finetune:", exc_info=True) - predictor_id = model_storage.predictor_id - predictor_record = db.Predictor.query.with_for_update().get(predictor_id) - error_message = format_exception_error(e) - predictor_record.data = {"error": error_message} - predictor_record.status = PREDICTOR_STATUS.ERROR - db.session.commit() - raise - - finally: - if predictor_record.training_stop_at is None: - predictor_record.training_stop_at = datetime.now() - db.session.commit() - - -class ModelWrapperUnsafe: - """Model wrapper that executes learn/predict in current process""" - - def __init__(self, code, modules_str, engine_id, engine_version: int): - self.module = import_string(code) - - model_instance = None - model_class = find_model_class(self.module) - if model_class is not None: - model_instance = model_class() - - self.model_instance = model_instance - - def train(self, df, target, args): - self.model_instance.train(df, target, args) - return pickle.dumps(self.model_instance.__dict__, protocol=5) - - def predict(self, df, model_state, args): - model_state = pickle.loads(model_state) - self.model_instance.__dict__ = model_state - try: - result = self.model_instance.predict(df, args) - except Exception: - result = self.model_instance.predict(df) - return result - - def finetune(self, df, model_state, args): - self.model_instance.__dict__ = pickle.loads(model_state) - - call_args = [df] - if args: - call_args.append(args) - - self.model_instance.finetune(df, args) - - return pickle.dumps(self.model_instance.__dict__, protocol=5) - - def describe(self, model_state, attribute: Optional[str] = None) -> pd.DataFrame: - if hasattr(self.model_instance, "describe"): - model_state = pickle.loads(model_state) - self.model_instance.__dict__ = model_state - return self.model_instance.describe(attribute) - return pd.DataFrame() - - def func_call(self, func_name, args): - func = getattr(self.module, func_name) - return func(*args) - - def check(self, mode: str = None): - methods = check_module(self.module, mode) - return methods - - -class ModelWrapperSafe: - """Model wrapper that executes learn/predict in venv""" - - def __init__(self, code, modules_str, engine_id, engine_version: int): - self.code = code - modules = self.parse_requirements(modules_str) - - self.config = config - self.is_cloud = config.is_cloud - - self.env_path = None - self.env_storage_path = None - self.prepare_env(modules, engine_id, engine_version) - - def prepare_env(self, modules, engine_id, engine_version: int): - try: - import virtualenv - - base_path = self.config.get("byom", {}).get("venv_path") - if base_path is None: - # create in root path - base_path = Path(self.config.paths["root"]) / "venvs" - else: - base_path = Path(base_path) - base_path.mkdir(parents=True, exist_ok=True) - - env_folder_name = f"env_{engine_id}" - if isinstance(engine_version, int) and engine_version > 1: - env_folder_name = f"{env_folder_name}_{engine_version}" - - self.env_storage_path = base_path / env_folder_name - if self.is_cloud: - bese_env_path = Path(tempfile.gettempdir()) / "mindsdb" / "venv" - bese_env_path.mkdir(parents=True, exist_ok=True) - self.env_path = bese_env_path / env_folder_name - tar_path = self.env_storage_path.with_suffix(".tar") - if self.env_path.exists() is False and tar_path.exists() is True: - with tarfile.open(tar_path) as tar: - safe_extract(tar, path=bese_env_path) - else: - self.env_path = self.env_storage_path - - if sys.platform in ("win32", "cygwin"): - exectable_folder_name = "Scripts" - else: - exectable_folder_name = "bin" - - pip_cmd = self.env_path / exectable_folder_name / "pip" - self.python_path = self.env_path / exectable_folder_name / "python" - - if self.env_path.exists(): - # already exists. it means requirements are already installed - return - - # create - logger.info(f"Creating new environment: {self.env_path}") - virtualenv.cli_run(["-p", sys.executable, str(self.env_path)]) - logger.info(f"Created new environment: {self.env_path}") - - if len(modules) > 0: - self.install_modules(modules, pip_cmd=pip_cmd) - except Exception: - # DANGER !!! VENV MUST BE CREATED - logger.info("Can't create virtual environment. venv module should be installed") - - if self.is_cloud: - raise - - self.python_path = Path(sys.executable) - - # try to install modules everytime - self.install_modules(modules, pip_cmd=pip_cmd) - - # fastest way to copy files if destination is NFS - if self.is_cloud and self.env_storage_path != self.env_path: - old_cwd = os.getcwd() - os.chdir(str(bese_env_path)) - tar_path = self.env_path.with_suffix(".tar") - with tarfile.open(name=str(tar_path), mode="w") as tar: - tar.add(str(self.env_path.name)) - os.chdir(old_cwd) - subprocess.run( - ["cp", "-R", "--no-preserve=mode,ownership", str(tar_path), str(base_path / tar_path.name)], - check=True, - shell=False, - ) - tar_path.unlink() - - def remove_venv(self): - if self.env_path is not None and self.env_path.exists(): - shutil.rmtree(str(self.env_path)) - - if self.is_cloud: - tar_path = self.env_storage_path.with_suffix(".tar") - tar_path.unlink() - - def parse_requirements(self, requirements): - # get requirements from string - # they should be located at the top of the file, before code - - pattern = "^[\w\\[\\]-]+[=!<>\s]*[\d\.]*[,=!<>\s]*[\d\.]*$" # noqa - modules = [] - for line in requirements.split(b"\n"): - line = line.decode().strip() - if line: - if re.match(pattern, line): - modules.append(line) - else: - raise Exception(f"Wrong requirement: {line}") - - is_pandas = any([m.lower().startswith("pandas") for m in modules]) - if not is_pandas: - modules.append("pandas>=2.0.0,<2.1.0") - modules.append("numpy<2.0.0") - - # for dataframe serialization - modules.append("pyarrow==19.0.0") - return modules - - def install_modules(self, modules, pip_cmd): - # install in current environment using pip - for module in modules: - logger.debug(f"BYOM install module: {module}") - p = subprocess.Popen([pip_cmd, "install", module], stderr=subprocess.PIPE) - p.wait() - if p.returncode != 0: - raise Exception(f"Problem with installing module {module}: {p.stderr.read()}") - - def _run_command(self, params): - logger.debug(f"BYOM run command: {params.get('method')}") - params_enc = encode(params) - - wrapper_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "proc_wrapper.py") - p = subprocess.Popen( - [str(self.python_path), wrapper_path], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - - p.stdin.write(params_enc) - p.stdin.close() - ret_enc = p.stdout.read() - - p.wait() - - try: - ret = decode(ret_enc) - except (pickle.UnpicklingError, EOFError): - raise RuntimeError(p.stderr.read()) - return ret - - def check(self, mode: str = None): - params = { - "method": BYOM_METHOD.CHECK.value, - "code": self.code, - "mode": mode, - } - return self._run_command(params) - - def train(self, df, target, args): - params = { - "method": BYOM_METHOD.TRAIN.value, - "code": self.code, - "df": None, - "to_predict": target, - "args": args, - } - if df is not None: - params["df"] = pd_encode(df) - - model_state = self._run_command(params) - return model_state - - def predict(self, df, model_state, args): - params = { - "method": BYOM_METHOD.PREDICT.value, - "code": self.code, - "model_state": model_state, - "df": pd_encode(df), - "args": args, - } - pred_df = self._run_command(params) - return pd_decode(pred_df) - - def finetune(self, df, model_state, args): - params = { - "method": BYOM_METHOD.FINETUNE.value, - "code": self.code, - "model_state": model_state, - "df": pd_encode(df), - "args": args, - } - - model_state = self._run_command(params) - return model_state - - def describe(self, model_state, attribute: Optional[str] = None) -> pd.DataFrame: - params = { - "method": BYOM_METHOD.DESCRIBE.value, - "code": self.code, - "model_state": model_state, - "attribute": attribute, - } - enc_df = self._run_command(params) - df = pd_decode(enc_df) - return df - - def func_call(self, func_name, args): - params = { - "method": BYOM_METHOD.FUNC_CALL.value, - "code": self.code, - "func_name": func_name, - "args": args, - } - result = self._run_command(params) - return result diff --git a/mindsdb/integrations/handlers/byom_handler/connection_args.py b/mindsdb/integrations/handlers/byom_handler/connection_args.py deleted file mode 100644 index 333d3bb59ce..00000000000 --- a/mindsdb/integrations/handlers/byom_handler/connection_args.py +++ /dev/null @@ -1,19 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - code={ - 'type': ARG_TYPE.PATH, - 'description': 'The path to model code' - }, - modules={ - 'type': ARG_TYPE.PATH, - 'description': 'The path to model requirements' - }, - mode={ - 'type': ARG_TYPE.STR, - 'description': 'Mode of byom hander. It can be "custom_function" for using it as container for functions' - } -) diff --git a/mindsdb/integrations/handlers/byom_handler/icon.svg b/mindsdb/integrations/handlers/byom_handler/icon.svg deleted file mode 100644 index 2494154d911..00000000000 --- a/mindsdb/integrations/handlers/byom_handler/icon.svg +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/byom_handler/proc_wrapper.py b/mindsdb/integrations/handlers/byom_handler/proc_wrapper.py deleted file mode 100644 index 93f37d4bf44..00000000000 --- a/mindsdb/integrations/handlers/byom_handler/proc_wrapper.py +++ /dev/null @@ -1,229 +0,0 @@ -""" -Utility functions used in the 'Bring Your Own Model' (BYOM) engine. - -These functions interact with interfaces (stdin, stdout), python files, and the actual BYOM engine. - -In particular, they: - - Wrap and run python code in separate python proceess. - - Communicate with parent process throughout stdin/out using pickle to serialize objects. - - -The flow is as follows: - - 1. Receive module code, method with parameters and stored attributes from parent process - 2. A python class object is created from the code - 3. Class is instanced and filled with stored attributes - 4. A calls to the chosen method of the class is performed with any relevant parameters that were passed - 5. Response is generated, appropriately packaged and sent to stdout - 6. Exit -""" - -import io -import sys -import pickle -import inspect -from enum import Enum - -import pandas as pd - - -class BYOM_METHOD(Enum): - CHECK = 1 - TRAIN = 2 - PREDICT = 3 - FINETUNE = 4 - DESCRIBE = 5 - FUNC_CALL = 6 - - -def pd_encode(df): - return df.to_parquet(engine='pyarrow') - - -def pd_decode(encoded): - fd = io.BytesIO() - fd.write(encoded) - fd.seek(0) - return pd.read_parquet(fd, engine='pyarrow') - - -def encode(obj): - return pickle.dumps(obj, protocol=5) - - -def decode(encoded): - return pickle.loads(encoded) - - -def return_output(obj): - # read stdin - encoded = encode(obj) - with open(1, 'wb') as fd: - fd.write(encoded) - sys.exit(0) - - -def get_input(): - # write to stdout - with open(0, 'rb') as fd: - encoded = fd.read() - obj = decode(encoded) - return obj - - -def import_string(code, module_name='model'): - # import string as python module - - import types - module = types.ModuleType(module_name) - - exec(code, module.__dict__) - # sys.modules['my_module'] = module - return module - - -def find_model_class(module): - # find the first class that contains predict and train methods - for _, cls in inspect.getmembers(module, inspect.isclass): - if inspect.getmodule(cls) is not None: - # is imported class - continue - - funcs = [ - name - for name, _ in inspect.getmembers(cls, inspect.isfunction) - ] - if 'predict' in funcs and 'train' in funcs: - # found - return cls - - -def get_methods_info(module): - # get all methods and their types - methods = {} - for method_name, method in inspect.getmembers(module, inspect.isfunction): - - sig = inspect.signature(method) - input_params = [ - {'name': name, 'type': param.annotation.__name__} - for name, param in sig.parameters.items() - ] - methods[method_name] = { - 'input_params': input_params, - 'output_type': sig.return_annotation.__name__ - } - return methods - - -def check_module(module, mode): - # checks module and returns info - - methods = {} - if mode == 'custom_function': - methods = get_methods_info(module) - - else: - # is BYOM, check it. - model_class = find_model_class(module) - if model_class is None: - raise RuntimeError('Unable to find model class (it has to have `train` and `predict` methods)') - - # try to initialize - model_class() - return {'methods': methods} - - -def main(): - # replace print output to stderr - sys.stdout = sys.stderr - - params = get_input() - - method = BYOM_METHOD(params['method']) - code = params['code'] - - module = import_string(code) - - if method == BYOM_METHOD.FUNC_CALL: - func_name = params['func_name'] - args = params['args'] - - func = getattr(module, func_name) - return return_output(func(*args)) - - if method == BYOM_METHOD.CHECK: - - mode = params['mode'] - info = check_module(module, mode) - - return return_output(info) - - model_class = find_model_class(module) - - if method == BYOM_METHOD.TRAIN: - df = params['df'] - if df is not None: - df = pd_decode(df) - to_predict = params['to_predict'] - args = params['args'] - model = model_class() - - call_args = [df, to_predict] - if args: - call_args.append(args) - model.train(*call_args) - - # return model - data = model.__dict__ - - model_state = encode(data) - return_output(model_state) - - elif method == BYOM_METHOD.PREDICT: - model_state = params['model_state'] - df = pd_decode(params['df']) - args = params['args'] - - model = model_class() - model.__dict__ = decode(model_state) - - call_args = [df] - if args: - call_args.append(args) - res = model.predict(*call_args) - return_output(pd_encode(res)) - - elif method == BYOM_METHOD.FINETUNE: - model_state = params['model_state'] - df = pd_decode(params['df']) - args = params['args'] - - model = model_class() - model.__dict__ = decode(model_state) - - call_args = [df] - if args: - call_args.append(args) - - model.finetune(*call_args) - - # return model - data = model.__dict__ - model_state = encode(data) - return_output(model_state) - - elif method == BYOM_METHOD.DESCRIBE: - model_state = params['model_state'] - model = model_class() - model.__dict__ = decode(model_state) - try: - df = model.describe(params.get('attribute')) - except Exception: - return_output(pd_encode(pd.DataFrame())) - return_output(pd_encode(df)) - - raise NotImplementedError(method) - - -if __name__ == '__main__': - main() diff --git a/mindsdb/integrations/handlers/byom_handler/requirements.txt b/mindsdb/integrations/handlers/byom_handler/requirements.txt deleted file mode 100644 index a40486c0646..00000000000 --- a/mindsdb/integrations/handlers/byom_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -virtualenv -scikit-learn==1.5.2 diff --git a/mindsdb/integrations/handlers/cassandra_handler/README.md b/mindsdb/integrations/handlers/cassandra_handler/README.md deleted file mode 100644 index 2c35ce05f3f..00000000000 --- a/mindsdb/integrations/handlers/cassandra_handler/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# Cassandra Handler - -This is the implementation of the Apache Cassandra handler for MindsDB. - -## Cassandra - -Cassandra is a free and open-source, distributed, wide-column store, NoSQL database management system designed to handle large amounts of data across many commodity servers, providing high availability with no single point of failure. https://cassandra.apache.org/_/index.html - -## Implementation - -ScyllaDB is API-compatible with Apache Cassandra so this handler just extends the ScyllaHandler and is using the python `scylla-driver` library. - -The required arguments to establish a connection are: - -- `host`: the host name or IP address of the Cassandra -- `port`: the port to use when connecting -- `user`: the user to authenticate. Optional; required only if authentication is enabled. -- `password`: the password to authenticate the user. Optional; required only if authentication is enabled. -- `keyspace`: the keyspace to connect to(top level container for tables) -- `protocol_version`: not required, default to 4 - -## Usage - -In order to make use of this handler and connect to a Cassandra server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE sc -WITH ENGINE = "cassandra", -PARAMETERS = { - "host": "127.0.0.1", - "port": "9043", - "user": "user", - "password": "pass", - "keyspace": "test_data", - "protocol_version": 4 - } -``` - -Now, you can use this established connection to query your database as follows: - -```sql -SELECT * FROM sc.example_table LIMIT 10; -``` diff --git a/mindsdb/integrations/handlers/cassandra_handler/__about__.py b/mindsdb/integrations/handlers/cassandra_handler/__about__.py deleted file mode 100644 index ae28d842e15..00000000000 --- a/mindsdb/integrations/handlers/cassandra_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Cassandra handler' -__package_name__ = 'mindsdb_cassandra_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for CassandraDB" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/cassandra_handler/__init__.py b/mindsdb/integrations/handlers/cassandra_handler/__init__.py deleted file mode 100644 index c68372fe613..00000000000 --- a/mindsdb/integrations/handlers/cassandra_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .connection_args import connection_args -try: - from .cassandra_handler import CassandraHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = 'Apache Cassandra' -name = 'cassandra' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/cassandra_handler/cassandra_handler.py b/mindsdb/integrations/handlers/cassandra_handler/cassandra_handler.py deleted file mode 100644 index 5c1c7403021..00000000000 --- a/mindsdb/integrations/handlers/cassandra_handler/cassandra_handler.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.handlers.scylla_handler import Handler as ScyllaHandler -from mindsdb.integrations.libs.response import ( - HandlerResponse as Response) - - -class CassandraHandler(ScyllaHandler): - """ - This handler handles connection and execution of the Cassandra statements. - """ - - name = 'cassandra' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) - - def get_tables(self) -> Response: - """ - Get the list of tables in the connected Cassandra database. - - :return: List of table names. - """ - sql = "DESCRIBE TABLES" - result = self.native_query(sql) - df = result.data_frame - df = df.rename(columns={'name': 'table_name'}) - result.data_frame = df - return result diff --git a/mindsdb/integrations/handlers/cassandra_handler/connection_args.py b/mindsdb/integrations/handlers/cassandra_handler/connection_args.py deleted file mode 100644 index 2e6f832fcd6..00000000000 --- a/mindsdb/integrations/handlers/cassandra_handler/connection_args.py +++ /dev/null @@ -1,50 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'User name', - 'required': True, - 'label': 'User', - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'Password', - 'required': True, - 'label': 'Password', - 'secret': True - }, - protocol_version={ - 'type': ARG_TYPE.INT, - 'description': 'is not required and defaults to 4.', - 'required': False, - 'label': 'Protocol version', - }, - host={ - 'type': ARG_TYPE.STR, - 'description': ' is the host name or IP address of the Cassandra database.', - 'required': True, - 'label': 'Host', - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'Server port', - 'required': True, - 'label': 'Port', - }, - keyspace={ - 'type': ARG_TYPE.STR, - 'description': ' is the keyspace to connect, the top level container for tables.', - 'required': True, - 'label': 'Keyspace', - }, - secure_connect_bundle={ - 'type': ARG_TYPE.STR, - 'description': 'Path or URL to the secure connect bundle', - 'required': False, - 'label': 'Host', - }, -) diff --git a/mindsdb/integrations/handlers/cassandra_handler/icon.svg b/mindsdb/integrations/handlers/cassandra_handler/icon.svg deleted file mode 100644 index 0bb4d04721a..00000000000 --- a/mindsdb/integrations/handlers/cassandra_handler/icon.svg +++ /dev/null @@ -1,33 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/cassandra_handler/requirements.txt b/mindsdb/integrations/handlers/cassandra_handler/requirements.txt deleted file mode 100644 index 45c4777e7b7..00000000000 --- a/mindsdb/integrations/handlers/cassandra_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/scylla_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/cassandra_handler/tests/__init__.py b/mindsdb/integrations/handlers/cassandra_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/cassandra_handler/tests/test_cassandra_handler.py b/mindsdb/integrations/handlers/cassandra_handler/tests/test_cassandra_handler.py deleted file mode 100644 index 801995bf721..00000000000 --- a/mindsdb/integrations/handlers/cassandra_handler/tests/test_cassandra_handler.py +++ /dev/null @@ -1,39 +0,0 @@ -import unittest -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.integrations.handlers.cassandra_handler.cassandra_handler import CassandraHandler - - -class CassandraHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "127.0.0.1", - "port": "9043", - "user": "cassandra", - "password": "", - "keyspace": "test_data", - "protocol_version": 4 - } - } - cls.handler = CassandraHandler('test_cassandra_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.check_connection() - - def test_1_native_query_show_keyspaces(self): - dbs = self.handler.native_query("DESC KEYSPACES;") - assert dbs.type is not RESPONSE_TYPE.ERROR - - def test_2_get_tables(self): - tbls = self.handler.get_tables() - assert tbls.type is not RESPONSE_TYPE.ERROR - - def test_3_describe_table(self): - described = self.handler.get_columns("home_rentals") - assert described.type is RESPONSE_TYPE.TABLE - - def test_4_select_query(self): - query = "SELECT * FROM home_rentals WHERE 'id'='3712'" - result = self.handler.query(query) - assert result.type is RESPONSE_TYPE.TABLE diff --git a/mindsdb/integrations/handlers/chromadb_handler/__init__.py b/mindsdb/integrations/handlers/chromadb_handler/__init__.py index 9c5a069c83f..613aa3f0647 100644 --- a/mindsdb/integrations/handlers/chromadb_handler/__init__.py +++ b/mindsdb/integrations/handlers/chromadb_handler/__init__.py @@ -1,10 +1,12 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE +from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL, HANDLER_TYPE from .__about__ import __description__ as description from .__about__ import __version__ as version from .connection_args import connection_args, connection_args_example + try: from .chromadb_handler import ChromaDBHandler as Handler + import_error = None except Exception as e: Handler = None @@ -13,6 +15,7 @@ title = "ChromaDB" name = "chromadb" type = HANDLER_TYPE.DATA +support_level = HANDLER_SUPPORT_LEVEL.MINDSDB icon_path = "icon.png" __all__ = [ @@ -22,6 +25,7 @@ "type", "title", "description", + "support_level", "connection_args", "connection_args_example", "import_error", diff --git a/mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py b/mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py index 61a5b439d12..32d0e566b00 100644 --- a/mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +++ b/mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py @@ -215,17 +215,22 @@ def select( include = ["metadatas", "documents", "embeddings"] - # check if embedding vector filter is present - vector_filter = ( - [] - if conditions is None - else [condition for condition in conditions if condition.column == TableField.EMBEDDINGS.value] - ) + # Identify Search Intent + vector_filter = None + content_filter = None - if len(vector_filter) > 0: - vector_filter = vector_filter[0] - else: - vector_filter = None + if conditions is not None: + # Embeddings + v_filters = [c for c in conditions if c.column == TableField.EMBEDDINGS.value] + if v_filters: + vector_filter = v_filters[0] + + # Semantic Search + c_filters = [c for c in conditions if c.column == TableField.CONTENT.value] + if c_filters: + content_filter = c_filters[0] + + # ID Filtering ids_include = [] ids_exclude = [] @@ -242,14 +247,26 @@ def select( elif condition.op == FilterOperator.NOT_IN: ids_exclude.extend(condition.value) - if vector_filter is not None: - # similarity search + # Trigger search if Vector OR Content is present + if vector_filter is not None or content_filter is not None: + # Similarity search query_payload = { "where": filters, - "query_embeddings": vector_filter.value if vector_filter is not None else None, "include": include + ["distances"], } + # Handle Vector Search + if vector_filter: + query_payload["query_embeddings"] = vector_filter.value + + # Handle Text Search + if content_filter: + val = content_filter.value + if isinstance(val, list): + query_payload["query_texts"] = val + else: + query_payload["query_texts"] = [val] + if limit is not None: if len(ids_include) == 0 and len(ids_exclude) == 0: query_payload["n_results"] = limit @@ -265,7 +282,7 @@ def select( embeddings = result["embeddings"][0] else: - # general get query + # general get query (Exact Match) result = collection.get( ids=ids_include or None, where=filters, @@ -279,7 +296,6 @@ def select( embeddings = result["embeddings"] distances = None - # project based on columns payload = { TableField.ID.value: ids, TableField.CONTENT.value: documents, @@ -290,7 +306,7 @@ def select( if columns is not None: payload = {column: payload[column] for column in columns if column != TableField.DISTANCE.value} - # always include distance + # Include distance distance_filter = None distance_col = TableField.DISTANCE.value if distances is not None: diff --git a/mindsdb/integrations/handlers/chromadb_handler/settings.py b/mindsdb/integrations/handlers/chromadb_handler/settings.py index 2b669ed75a8..279c404384e 100644 --- a/mindsdb/integrations/handlers/chromadb_handler/settings.py +++ b/mindsdb/integrations/handlers/chromadb_handler/settings.py @@ -14,7 +14,7 @@ class ChromaHandlerConfig(BaseModel): host: str = None port: str = None password: str = None - distance: str = 'cosine' + distance: str = "cosine" class Config: extra = "forbid" @@ -27,13 +27,9 @@ def check_param_typos(cls, values: Any) -> Any: expected_params = cls.model_fields.keys() for key in values.keys(): if key not in expected_params: - close_matches = difflib.get_close_matches( - key, expected_params, cutoff=0.4 - ) + close_matches = difflib.get_close_matches(key, expected_params, cutoff=0.4) if close_matches: - raise ValueError( - f"Unexpected parameter '{key}'. Did you mean '{close_matches[0]}'?" - ) + raise ValueError(f"Unexpected parameter '{key}'. Did you mean '{close_matches[0]}'?") else: raise ValueError(f"Unexpected parameter '{key}'.") return values @@ -56,8 +52,7 @@ def check_config(cls, values: Any) -> Any: if persist_directory and (host or port): raise ValueError( - f"For {vector_store} handler - if persistence_folder is provided, " - f"host, port should not be provided." + f"For {vector_store} handler - if persistence_folder is provided, host, port should not be provided." ) return values diff --git a/mindsdb/integrations/handlers/chromadb_handler/tests/test_chromadb_handler.py b/mindsdb/integrations/handlers/chromadb_handler/tests/test_chromadb_handler.py new file mode 100644 index 00000000000..d3e5d330d16 --- /dev/null +++ b/mindsdb/integrations/handlers/chromadb_handler/tests/test_chromadb_handler.py @@ -0,0 +1,83 @@ +import unittest +from unittest.mock import Mock, patch +import pandas as pd +from mindsdb.integrations.handlers.chromadb_handler.chromadb_handler import ( + ChromaDBHandler, + TableField, +) + + +class MockCondition: + def __init__(self, column, op, value): + self.column = column + self.op = op + self.value = value + + +class TestChromaHandler(unittest.TestCase): + def setUp(self): + self.handler = ChromaDBHandler(name="test_chroma", connection_data={}, handler_storage=Mock()) + + # INSERT + @patch("mindsdb.integrations.handlers.chromadb_handler.chromadb_handler.ChromaDBHandler.connect") + def test_insert_calls_upsert(self, mock_connect): + mock_client = Mock() + mock_collection = Mock() + mock_client.get_or_create_collection.return_value = mock_collection + self.handler._client = mock_client + self.handler.is_connected = True + + df = pd.DataFrame( + { + TableField.CONTENT.value: ["Cat Photo"], + TableField.EMBEDDINGS.value: [[0.9, 0.1, 0.1]], + TableField.ID.value: ["img_1"], + TableField.METADATA.value: [{"author": "Sriram"}], + } + ) + self.handler.insert("my_gallery", df) + + call_args = mock_collection.upsert.call_args[1] + self.assertEqual(call_args["embeddings"], [[0.9, 0.1, 0.1]]) + + # SELECT + @patch("mindsdb.integrations.handlers.chromadb_handler.chromadb_handler.ChromaDBHandler.disconnect") + @patch("mindsdb.integrations.handlers.chromadb_handler.chromadb_handler.ChromaDBHandler.connect") + def test_select_semantic_search(self, mock_connect, mock_disconnect): + # Mock System + mock_client = Mock() + mock_collection = Mock() + mock_client.get_collection.return_value = mock_collection + + self.handler._client = mock_client + self.handler.is_connected = True + + # Mock Return Data + mock_result = { + "ids": [["id1"]], + "documents": [["Dog"]], + "metadatas": [[{}]], + "embeddings": [[[0.1, 0.2]]], + "distances": [[0.5]], + } + mock_collection.query.return_value = mock_result + mock_collection.get.return_value = mock_result + + conditions = [MockCondition(column=TableField.CONTENT.value, op="=", value="Dog")] + + self.handler.select("my_gallery", conditions=conditions) + + # Verification + if not mock_collection.query.called: + self.fail("CRITICAL: The handler used .get() (Exact Match) instead of .query() (Semantic Search)!") + + call_args = mock_collection.query.call_args[1] + + if "query_texts" not in call_args: + self.fail("CRITICAL: The handler called .query() but forgot 'query_texts'!") + + self.assertEqual(call_args["query_texts"], ["Dog"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/mindsdb/integrations/handlers/ckan_handler/README.md b/mindsdb/integrations/handlers/ckan_handler/README.md deleted file mode 100644 index 2f0c51fb4ca..00000000000 --- a/mindsdb/integrations/handlers/ckan_handler/README.md +++ /dev/null @@ -1,78 +0,0 @@ -## CKAN Integration handler - -This handler facilitates integration with [CKAN](https://ckan.org/). -an open-source data catalog platform for managing and publishing open data. CKAN organizes datasets and stores data in its [DataStore](http://docs.ckan.org/en/2.11/maintaining/datastore.html).To retrieve data from CKAN, the [CKAAPI](https://github.com/ckan/ckanapi) must be used. - -# Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect SAP HANA to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -The CKAN handler is included with MindsDB by default, so no additional installation is required. - -## Configuration - -To use the CKAN handler, you need to provide the URL of the CKAN instance you want to connect to. You can do this by setting the `CKAN_URL` environment variable. For example: - -```sql -CREATE DATABASE ckan_datasource -WITH ENGINE = 'ckan', -PARAMETERS = { - "url": "https://your-ckan-instance-url.com", - "api_key": "your-api-key-if-required" -}; -``` - -> **_NOTE:_** Some CKAN instances will require you to provide an API Token. You can create one in the CKAN user panel. - -## Usage - -The CKAN handler provides three main tables: - -- `datasets`: Lists all datasets in the CKAN instance. -- `resources`: Lists all resources metadata across all packages. -- `datastore`: Allows querying individual datastore resources. - -## Example Queries - -1. List all datasets: - - ```sql - SELECT * FROM `your-datasource`.datasets; - ``` - -2. List all resources: - - ```sql - SELECT * FROM `your-datasource`.resources ; - ``` - -3. Query a specific datastore resource: - - ```sql - SELECT * FROM `your-datasource`.datastore WHERE resource_id = 'your-resource-id'; - ``` - -Replace `your-resource-id-here` with the actual resource ID you want to query. - -## Querying Large Resources - -The CKAN handler supports automatic pagination when querying datastore resources. This allows you to retrieve large datasets without worrying about API limits. - -You can still use the `LIMIT` clause to limit the number of rows returned by the query. For example: - -```sql -SELECT * FROM ckan_datasource.datastore -WHERE resource_id = 'your-resource-id-here' -LIMIT 1000; -``` - -## Limitations - -- The handler currently supports read operations only. Write operations are not supported. -- Performance may vary depending on the size of the CKAN instance and the complexity of your queries. -- The handler may not work with all CKAN instances, especially those with custom configurations. -- The handler does not support all CKAN API features. Some advanced features may not be available. -- The datastore search will return limited records up to 32000. Please refer to the [CKAN API](https://docs.ckan.org/en/2.11/maintaining/datastore.html#ckanext.datastore.logic.action.datastore_search_sql) documentation for more information. diff --git a/mindsdb/integrations/handlers/ckan_handler/__about__.py b/mindsdb/integrations/handlers/ckan_handler/__about__.py deleted file mode 100644 index e4630c10a94..00000000000 --- a/mindsdb/integrations/handlers/ckan_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB CKAN handler' -__package_name__ = 'mindsdb_ckan_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for CKAN" -__author__ = 'Konstantin Sivakov' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/ckan_handler/__init__.py b/mindsdb/integrations/handlers/ckan_handler/__init__.py deleted file mode 100644 index f6e4f7e946c..00000000000 --- a/mindsdb/integrations/handlers/ckan_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .ckan_handler import CkanHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'CKAN' -name = 'ckan' -type = HANDLER_TYPE.DATA -icon_path = 'icon.png' - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/ckan_handler/ckan_handler.py b/mindsdb/integrations/handlers/ckan_handler/ckan_handler.py deleted file mode 100644 index 0dd4ae2a426..00000000000 --- a/mindsdb/integrations/handlers/ckan_handler/ckan_handler.py +++ /dev/null @@ -1,313 +0,0 @@ -import pandas as pd -from ckanapi import RemoteCKAN - -from mindsdb.integrations.libs.api_handler import APIHandler, APITable -from mindsdb.integrations.libs.response import ( - HandlerResponse, - HandlerStatusResponse, - RESPONSE_TYPE, -) -from mindsdb_sql_parser import ast -from mindsdb.utilities import log -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions - -logger = log.getLogger(__name__) - - -class DatasetsTable(APITable): - ''' - Datasets table contains information about CKAN datasets. - This table is used to list all datasets available in CKAN that have datastore active resources. - ''' - - def select(self, query: ast.Select) -> pd.DataFrame: - conditions = extract_comparison_conditions(query.where) if query.where else [] - limit = query.limit.value if query.limit else 1000 - packages = self.list(conditions, limit) - return pd.DataFrame(packages) - - def list(self, conditions=None, limit=1000): - self.handler.connect() - package_list = self.handler.call_ckan_api("package_search", {"rows": limit}) - packages = package_list.get("results", []) - - data = [] - # Get only datastore active resources - for pkg in packages: - datastore_active_resources = [ - r for r in pkg.get("resources", []) if r.get("datastore_active") - ] - data.append( - { - "id": pkg.get("id"), - "name": pkg.get("name"), - "title": pkg.get("title"), - "num_resources": len(pkg.get("resources", [])), - "num_datastore_active_resources": len(datastore_active_resources), - } - ) - - return pd.DataFrame(data) - - def get_columns(self): - return [ - "id", - "name", - "title", - "num_resources", - "num_datastore_active_resources", - ] - - -class ResourceIDsTable(APITable): - ''' - ResourceIDs table contains information about CKAN resources. - This table is used to list all resources available in CKAN that are datastore active. - ''' - - def select(self, query: ast.Select) -> pd.DataFrame: - conditions = extract_comparison_conditions(query.where) if query.where else [] - limit = query.limit.value if query.limit else 1000 - - resources = self.list(conditions, limit) - return pd.DataFrame(resources) - - def list(self, conditions=None, limit=1000): - self.handler.connect() - package_list = self.handler.call_ckan_api("package_search", {"rows": limit}) - packages = package_list.get("results", []) - - data = [] - for package in packages: - for resource in package.get("resources", []): - # Get only datastore active resources - if resource.get("datastore_active"): - data.append( - { - "id": resource.get("id"), - "package_id": package.get("id"), - "name": resource.get("name"), - "format": resource.get("format"), - "url": resource.get("url"), - "datastore_active": resource.get("datastore_active"), - } - ) - if len(data) >= limit: - break - if len(data) >= limit: - break - - return pd.DataFrame(data) - - def get_columns(self): - return [ - "id", - "package_id", - "name", - "format", - "url", - "datastore_active", - ] - - -class DatastoreTable(APITable): - ''' - Datastore table is used to query CKAN datastore resources. - This table is used to query data from CKAN datastore resources. - It is using the datastore_search_sql API to execute SQL queries on CKAN datastore resources. - ''' - - def select(self, query: ast.Select) -> pd.DataFrame: - conditions = extract_comparison_conditions(query.where) if query.where else [] - resource_id = self.extract_resource_id(conditions) - - if resource_id: - return self.execute_resource_query(query, resource_id) - else: - message = "Please provide a resource_id in your query. Example: SELECT * FROM datastore WHERE resource_id = 'your_resource_id'" - df = pd.DataFrame({"message": [message]}) - return df - - def execute_resource_query( - self, query: ast.Select, resource_id: str - ) -> pd.DataFrame: - sql_query = self.ast_to_sql(query, resource_id) - result = self.handler.call_ckan_api("datastore_search_sql", {"sql": sql_query}) - - records = result.get("records", []) - - df = pd.DataFrame(records) - - df = df.loc[:, ~df.columns.str.startswith("_")] - - return df - - def ast_to_sql(self, query: ast.Select, resource_id: str) -> str: - sql_parts = [ - f"SELECT {self.render_columns(query.targets)}", - f'FROM "{resource_id}"', - ] - - # Handle WHERE clause - where_conditions = [] - if query.where: - where_conditions = self.extract_where_conditions(query.where) - - where_conditions = [ - cond - for cond in where_conditions - if not ( - isinstance(cond, ast.BinaryOperation) - and cond.args[0].parts[-1] == "resource_id" - ) - ] - - if where_conditions: - sql_parts.append( - f'WHERE {" AND ".join(self.render_where(cond) for cond in where_conditions)}' - ) - - # Handle LIMIT - if query.limit: - sql_parts.append(f"LIMIT {query.limit.value}") - - return " ".join(sql_parts) - - def render_columns(self, targets): - if not targets or (len(targets) == 1 and isinstance(targets[0], ast.Star)): - return "*" - return ", ".join(self.render_column(target) for target in targets) - - def render_column(self, target): - if isinstance(target, ast.Identifier): - return f'"{target.parts[-1]}"' - # Handle other types of targets as needed - return str(target) - - def extract_where_conditions(self, where): - if isinstance(where, ast.BinaryOperation) and where.op == "and": - return self.extract_where_conditions( - where.args[0] - ) + self.extract_where_conditions(where.args[1]) - return [where] - - def render_where(self, where): - if isinstance(where, ast.BinaryOperation): - left = self.render_where(where.args[0]) - right = self.render_where(where.args[1]) - - if where.op == "like": - return f"{left} ILIKE {right}" - elif where.op in ["=", ">", "<", ">=", "<=", "<>"]: - return f"{left} {where.op} {right}" - # Add more operators as needed - - elif isinstance(where, ast.Constant): - return ( - f"'{where.value}'" if isinstance(where.value, str) else str(where.value) - ) - - elif isinstance(where, ast.Identifier): - return f'"{where.parts[-1]}"' - - # Handle other types of WHERE conditions as needed - return str(where) - - def extract_resource_id(self, conditions): - for condition in conditions: - if isinstance(condition, list) and len(condition) == 3: - op, col, val = condition - if col == "resource_id" and op == "=": - return val - return None - - def get_columns(self): - return [field["id"] for field in self.fields] - - -class CkanHandler(APIHandler): - name = "ckan" - - def __init__(self, name=None, **kwargs): - super().__init__(name) - self.connection = None - self.is_connected = False - self.connection_args = kwargs.get("connection_data", {}) - - self.datasets_table = DatasetsTable(self) - self.resources_table = ResourceIDsTable(self) - self.datastore_table = DatastoreTable(self) - - self._register_table("datasets", self.datasets_table) - self._register_table("resources", self.resources_table) - self._register_table("datastore", self.datastore_table) - - def connect(self): - if self.is_connected: - return self.connection - - url = self.connection_args.get("url") - api_key = self.connection_args.get("api_key") - if not url: - raise ValueError("CKAN URL is required") - - try: - self.connection = RemoteCKAN(url, apikey=api_key) - self.is_connected = True - logger.info(f"Successfully connected to CKAN at {url}") - except Exception as e: - logger.error(f"Error connecting to CKAN: {e}") - raise ConnectionError(f"Failed to connect to CKAN: {e}") - - return self.connection - - def check_connection(self) -> HandlerStatusResponse: - try: - self.connect() - return HandlerStatusResponse(success=True) - except Exception as e: - logger.error(f"Error checking connection: {e}") - return HandlerStatusResponse(success=False, error_message=str(e)) - - def call_ckan_api(self, method_name: str, params: dict): - connection = self.connect() - method = getattr(connection.action, method_name) - - try: - result = method(**params) - return result - except Exception as e: - logger.error(f"Error calling CKAN API: {e}") - raise RuntimeError(f"Failed to call CKAN API: {e}") - - def native_query(self, query: str) -> HandlerResponse: - method, params = self.parse_native_query(query) - try: - result = self.call_ckan_api(method, params) - if isinstance(result, list): - df = pd.DataFrame(result) - elif isinstance(result, dict): - df = pd.DataFrame([result]) - else: - df = pd.DataFrame([{"result": result}]) - return HandlerResponse(RESPONSE_TYPE.TABLE, df) - except Exception as e: - logger.error(f"Error executing native query: {e}") - return HandlerResponse(RESPONSE_TYPE.ERROR, error_message=str(e)) - - @staticmethod - def parse_native_query(query: str): - parts = query.split(":") - if len(parts) != 2: - raise ValueError( - "Invalid query format. Expected 'method_name:param1=value1,param2=value2'" - ) - method = parts[0].strip() - params = {} - if parts[1].strip(): - param_pairs = parts[1].split(",") - for pair in param_pairs: - key, value = pair.split("=") - params[key.strip()] = value.strip() - - return method, params diff --git a/mindsdb/integrations/handlers/ckan_handler/connection_args.py b/mindsdb/integrations/handlers/ckan_handler/connection_args.py deleted file mode 100644 index 95b89336fce..00000000000 --- a/mindsdb/integrations/handlers/ckan_handler/connection_args.py +++ /dev/null @@ -1,22 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - url={ - "type": ARG_TYPE.STR, - "description": "The URI-Like connection string to the CKAN server. If provided, it will override the other connection arguments.", - "required": True, - "label": "URL", - }, - api_key={ - "type": ARG_TYPE.STR, - "description": "The API key used to authenticate with the CKAN server. For CKAN 2.10+ API tokens are supported.", - "required": False, - "label": "API Key/Token", - }, -) - - -connection_args_example = OrderedDict(url="https://data.gov.au/data/", api_key="my_api_key") diff --git a/mindsdb/integrations/handlers/ckan_handler/icon.png b/mindsdb/integrations/handlers/ckan_handler/icon.png deleted file mode 100644 index ee1c08d3964..00000000000 Binary files a/mindsdb/integrations/handlers/ckan_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/ckan_handler/requirements.txt b/mindsdb/integrations/handlers/ckan_handler/requirements.txt deleted file mode 100644 index 5a237d414fe..00000000000 --- a/mindsdb/integrations/handlers/ckan_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -ckanapi \ No newline at end of file diff --git a/mindsdb/integrations/handlers/ckan_handler/tests/__init__.py b/mindsdb/integrations/handlers/ckan_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/ckan_handler/tests/test_ckan_handler.py b/mindsdb/integrations/handlers/ckan_handler/tests/test_ckan_handler.py deleted file mode 100644 index 1692da31930..00000000000 --- a/mindsdb/integrations/handlers/ckan_handler/tests/test_ckan_handler.py +++ /dev/null @@ -1,156 +0,0 @@ -import unittest -from unittest.mock import patch, MagicMock -from mindsdb.integrations.handlers.ckan_handler.ckan_handler import CkanHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb_sql_parser import ast - - -class CkanHandlerTest(unittest.TestCase): - def setUp(self): - self.handler = CkanHandler('test_ckan_handler', connection_data={ - "url": "http://mock-ckan-url.com", - "api_key": "mock_api_key" - }) - self.patcher = patch('mindsdb.integrations.handlers.ckan_handler.ckan_handler.RemoteCKAN') - self.mock_ckan = self.patcher.start() - self.mock_ckan_instance = MagicMock() - self.mock_ckan.return_value = self.mock_ckan_instance - - def tearDown(self): - self.patcher.stop() - - def test_check_connection(self): - self.mock_ckan_instance.action.site_read.return_value = True - response = self.handler.check_connection() - self.assertTrue(response.success) - - def test_get_tables(self): - self.mock_ckan_instance.action.package_list.return_value = [ - "package1", - "package2", - ] - result = self.handler.get_tables() - self.assertIsNotNone(result) - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - - def test_query_package_ids(self): - mock_packages = [ - { - "id": "pkg1", - "name": "Package 1", - "title": "Test Package 1", - "num_resources": 2, - }, - { - "id": "pkg2", - "name": "Package 2", - "title": "Test Package 2", - "num_resources": 1, - }, - ] - self.mock_ckan_instance.action.package_search.return_value = { - "results": mock_packages - } - - query = ast.Select( - targets=[ast.Star()], - from_table=ast.Identifier("package_ids"), - limit=ast.Constant(10), - ) - result = self.handler.query(query) - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertEqual(len(result.data_frame), 2) - - def test_query_resource_ids(self): - mock_packages = [ - { - "id": "pkg1", - "resources": [ - { - "id": "res1", - "name": "Resource 1", - "format": "CSV", - "datastore_active": True, - }, - { - "id": "res2", - "name": "Resource 2", - "format": "XSLX", - "datastore_active": False, - }, - ], - }, - { - "id": "pkg2", - "resources": [ - { - "id": "res3", - "name": "Resource 3", - "format": "CSV", - "datastore_active": True, - } - ], - }, - ] - self.mock_ckan_instance.action.package_search.return_value = { - "results": mock_packages - } - - query = ast.Select( - targets=[ast.Star()], - from_table=ast.Identifier("resource_ids"), - limit=ast.Constant(10), - ) - result = self.handler.query(query) - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertEqual(len(result.data_frame), 2) - - def test_query_datastore_without_resource_id(self): - query = ast.Select( - targets=[ast.Star()], - from_table=ast.Identifier("datastore"), - limit=ast.Constant(10), - ) - result = self.handler.query(query) - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertEqual(len(result.data_frame), 1) - self.assertIn("message", result.data_frame.columns) - - def test_query_datastore_with_resource_id(self): - mock_records = [ - {"id": 1, "name": "Record 1"}, - {"id": 2, "name": "Record 2"}, - {"id": 3, "name": "Record 3"}, - ] - self.mock_ckan_instance.action.datastore_search.return_value = { - "records": mock_records - } - - query = ast.Select( - targets=[ast.Star()], - from_table=ast.Identifier("datastore"), - where=ast.BinaryOperation( - "=", - args=[ast.Identifier("resource_id"), ast.Constant("test_resource_id")], - ), - limit=ast.Constant(10), - ) - result = self.handler.query(query) - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertEqual(len(result.data_frame), 3) - - def test_native_query(self): - self.mock_ckan_instance.action.package_list.return_value = [ - "package1", - "package2", - "package3", - ] - - query = "package_list:" - result = self.handler.native_query(query) - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertEqual(len(result.data_frame), 3) - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/clickhouse_handler/README.md b/mindsdb/integrations/handlers/clickhouse_handler/README.md deleted file mode 100644 index 699e4ab86f6..00000000000 --- a/mindsdb/integrations/handlers/clickhouse_handler/README.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: ClickHouse -sidebarTitle: ClickHouse ---- - -This documentation describes the integration of MindsDB with [ClickHouse](https://clickhouse.com/docs/en/intro), a high-performance, column-oriented SQL database management system (DBMS) for online analytical processing (OLAP). -The integration allows MindsDB to access data from ClickHouse and enhance ClickHouse with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB [locally via Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or use [MindsDB Cloud](https://cloud.mindsdb.com/). -2. To connect ClickHouse to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to ClickHouse from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/clickhouse_handler) as an engine. - -```sql -CREATE DATABASE clickhouse_conn -WITH ENGINE = 'clickhouse', -PARAMETERS = { - "host": "127.0.0.1", - "port": "8443", - "user": "root", - "password": "mypass", - "database": "test_data", - "protocol" : "https" - } -``` - -Required connection parameters include the following: - -* `host`: is the hostname or IP address of the ClickHouse server. -* `port`: is the TCP/IP port of the ClickHouse server. -* `user`: is the username used to authenticate with the ClickHouse server. -* `password`: is the password to authenticate the user with the ClickHouse server. -* `database`: defaults to `default`. It is the database name to use when connecting with the ClickHouse server. -* `protocol`: defaults to `native`. It is an optional parameter. Its supported values are `native`, `http` and `https`. - -## Usage - -The following usage examples utilize the connection to ClickHouse made via the `CREATE DATABASE` statement and named `clickhouse_conn`. - -Retrieve data from a specified table by providing the integration and table name. - -```sql -SELECT * -FROM clickhouse_conn.table_name -LIMIT 10; -``` - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the ClickHouse database. -* **Checklist**: - 1. Ensure that the ClickHouse server is running and accessible - 2. Confirm that host, port, user, and password are correct. Try a direct MySQL connection. - 3. Test the network connection between the MindsDB host and the ClickHouse server. - - - -`Slow Connection Initialization` - -* **Symptoms**: Connecting to the ClickHouse server takes an exceptionally long time, or connections hang without completing -* **Checklist**: - 1. Ensure that you are using the appropriate protocol (http, https, or native) for your ClickHouse setup. Misconfigurations here can lead to significant delays. - 2. Ensure that firewalls or security groups (in cloud environments) are properly configured to allow traffic on the necessary ports (as 8123 for HTTP or 9000 for native). - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces, reserved words or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - diff --git a/mindsdb/integrations/handlers/clickhouse_handler/__about__.py b/mindsdb/integrations/handlers/clickhouse_handler/__about__.py deleted file mode 100644 index 463db8470a2..00000000000 --- a/mindsdb/integrations/handlers/clickhouse_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB ClickHouse handler' -__package_name__ = 'mindsdb_clickhouse_handler' -__version__ = '0.0.2' -__description__ = "MindsDB handler for ClickHouse" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022 MindsDB' diff --git a/mindsdb/integrations/handlers/clickhouse_handler/__init__.py b/mindsdb/integrations/handlers/clickhouse_handler/__init__.py deleted file mode 100644 index fa29b4f0f43..00000000000 --- a/mindsdb/integrations/handlers/clickhouse_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .clickhouse_handler import ClickHouseHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'ClickHouse' -name = 'clickhouse' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/clickhouse_handler/clickhouse_handler.py b/mindsdb/integrations/handlers/clickhouse_handler/clickhouse_handler.py deleted file mode 100644 index feda48c1323..00000000000 --- a/mindsdb/integrations/handlers/clickhouse_handler/clickhouse_handler.py +++ /dev/null @@ -1,167 +0,0 @@ -from urllib.parse import quote, urlencode - -import pandas as pd -from sqlalchemy import create_engine -from sqlalchemy.exc import SQLAlchemyError -from clickhouse_sqlalchemy.drivers.base import ClickHouseDialect -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender - -from mindsdb.utilities import log -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) - -logger = log.getLogger(__name__) - - -class ClickHouseHandler(DatabaseHandler): - """ - This handler handles connection and execution of the ClickHouse statements. - """ - - name = "clickhouse" - - def __init__(self, name, connection_data, **kwargs): - super().__init__(name) - self.dialect = "clickhouse" - self.connection_data = connection_data - self.renderer = SqlalchemyRender(ClickHouseDialect) - self.is_connected = False - self.protocol = connection_data.get("protocol", "native") - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self): - """ - Establishes a connection to a ClickHouse server using SQLAlchemy. - - Raises: - SQLAlchemyError: If an error occurs while connecting to the database. - - Returns: - Connection: A SQLAlchemy Connection object to the ClickHouse database. - """ - if self.is_connected: - return self.connection - - protocol = "clickhouse+native" if self.protocol == "native" else "clickhouse+http" - host = quote(self.connection_data["host"]) - port = self.connection_data["port"] - user = quote(self.connection_data["user"]) - password = quote(self.connection_data["password"]) - database = quote(self.connection_data["database"]) - verify = self.connection_data.get("verify", True) - url = f"{protocol}://{user}:{password}@{host}:{port}/{database}" - # This is not redundunt. Check https://clickhouse-sqlalchemy.readthedocs.io/en/latest/connection.html#http - - params = {} - if self.protocol == "https": - params["protocol"] = "https" - if verify is False: - params["verify"] = "false" - if params: - url = f"{url}?{urlencode(params)}" - - try: - engine = create_engine(url) - connection = engine.raw_connection() - self.is_connected = True - self.connection = connection - except SQLAlchemyError as e: - logger.error(f"Error connecting to ClickHouse {self.connection_data['database']}, {e}!") - self.is_connected = False - raise - - return self.connection - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the ClickHouse. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - need_to_close = not self.is_connected - - try: - connection = self.connect() - cur = connection.cursor() - try: - cur.execute("select 1;") - finally: - cur.close() - response.success = True - except SQLAlchemyError as e: - logger.error(f"Error connecting to ClickHouse {self.connection_data['database']}, {e}!") - response.error_message = str(e) - self.is_connected = False - - if response.success is True and need_to_close: - self.disconnect() - - return response - - def native_query(self, query: str) -> Response: - """ - Executes a SQL query and returns the result. - - Args: - query (str): The SQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - - connection = self.connect() - cur = connection.cursor() - try: - cur.execute(query) - result = cur.fetchall() - if result: - response = Response(RESPONSE_TYPE.TABLE, pd.DataFrame(result, columns=[x[0] for x in cur.description])) - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except SQLAlchemyError as e: - logger.error(f"Error running query: {query} on {self.connection_data['database']}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - connection.rollback() - finally: - cur.close() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Retrieve the data from the SQL statement with eliminated rows that dont satisfy the WHERE condition - """ - query_str = self.renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Get a list with all of the tabels in ClickHouse db - """ - q = f"SHOW TABLES FROM {self.connection_data['database']}" - result = self.native_query(q) - df = result.data_frame - - if df is not None: - result.data_frame = df.rename(columns={df.columns[0]: "table_name"}) - - return result - - def get_columns(self, table_name) -> Response: - """ - Show details about the table - """ - q = f"DESCRIBE {table_name}" - result = self.native_query(q) - return result diff --git a/mindsdb/integrations/handlers/clickhouse_handler/connection_args.py b/mindsdb/integrations/handlers/clickhouse_handler/connection_args.py deleted file mode 100644 index e53e9d82366..00000000000 --- a/mindsdb/integrations/handlers/clickhouse_handler/connection_args.py +++ /dev/null @@ -1,54 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - protocol={ - "type": ARG_TYPE.STR, - "description": "The protocol to query clickhouse. Supported: native, http, https. Default: native", - "required": False, - "label": "Protocol", - }, - user={ - "type": ARG_TYPE.STR, - "description": "The user name used to authenticate with the ClickHouse server.", - "required": True, - "label": "User", - }, - database={ - "type": ARG_TYPE.STR, - "description": "The database name to use when connecting with the ClickHouse server.", - "required": True, - "label": "Database name", - }, - host={ - "type": ARG_TYPE.STR, - "description": "The host name or IP address of the ClickHouse server. NOTE: use '127.0.0.1' instead of 'localhost' to connect to local server.", - "required": True, - "label": "Host", - }, - port={ - "type": ARG_TYPE.INT, - "description": "The TCP/IP port of the ClickHouse server. Must be an integer.", - "required": True, - "label": "Port", - }, - password={ - "type": ARG_TYPE.PWD, - "description": "The password to authenticate the user with the ClickHouse server.", - "required": True, - "label": "Password", - "secret": True, - }, - verify={ - "type": ARG_TYPE.BOOL, - "description": "Controls certificate verification in https protocol. Possible choices: true/false. Default is true.", - "required": False, - "label": "SSL Verification", - }, -) - -connection_args_example = OrderedDict( - protocol="native", host="127.0.0.1", port=9000, user="root", password="password", database="database", verify=True -) diff --git a/mindsdb/integrations/handlers/clickhouse_handler/icon.svg b/mindsdb/integrations/handlers/clickhouse_handler/icon.svg deleted file mode 100644 index e8e486bc132..00000000000 --- a/mindsdb/integrations/handlers/clickhouse_handler/icon.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/clickhouse_handler/requirements.txt b/mindsdb/integrations/handlers/clickhouse_handler/requirements.txt deleted file mode 100644 index 08d1b56f89b..00000000000 --- a/mindsdb/integrations/handlers/clickhouse_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -clickhouse-sqlalchemy>=0.3.1 diff --git a/mindsdb/integrations/handlers/clickhouse_handler/tests/__init__.py b/mindsdb/integrations/handlers/clickhouse_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/clickhouse_handler/tests/test_clickhouse_handler.py b/mindsdb/integrations/handlers/clickhouse_handler/tests/test_clickhouse_handler.py deleted file mode 100644 index 28b6663deac..00000000000 --- a/mindsdb/integrations/handlers/clickhouse_handler/tests/test_clickhouse_handler.py +++ /dev/null @@ -1,41 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.clickhouse_handler.clickhouse_handler import ClickHouseHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class PostgresHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - connection_data = { - "host": "localhost", - "port": "9000", - "user": "root", - "password": "pass", - "database": "test_data" - } - cls.handler = ClickHouseHandler('test_clickhouse_handler', connection_data) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_show_dbs(self): - result = self.handler.native_query("SHOW DATABASES;") - assert result.type is not RESPONSE_TYPE.ERROR - - def test_2_wrong_native_query_returns_error(self): - result = self.handler.native_query("SHOW DATABASE1S;") - assert result.type is RESPONSE_TYPE.ERROR - - def test_3_select_query(self): - query = 'SELECT * FROM hdi' - result = self.handler.query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_4_get_tables(self): - tbls = self.handler.get_tables() - assert tbls.type is not RESPONSE_TYPE.ERROR - - def test_5_describe_table(self): - described = self.handler.get_columns("hdi") - print('described', described) - assert described.type is RESPONSE_TYPE.TABLE diff --git a/mindsdb/integrations/handlers/cloud_spanner_handler/README.md b/mindsdb/integrations/handlers/cloud_spanner_handler/README.md deleted file mode 100644 index 8268b0453e1..00000000000 --- a/mindsdb/integrations/handlers/cloud_spanner_handler/README.md +++ /dev/null @@ -1,51 +0,0 @@ -# Cloud SpannerHandler -This is the implementation of the Cloud Spanner handler for MindsDB. - -## Cloud Spanner -Cloud Spanner is a fully managed, mission-critical, relational database service that offers transactional consistency at global scale, automatic, synchronous replication for high availability. -Cloud spanner supports two SQL dialects: GoogleSQL (ANSI 2011 with extensions) and PostgreSQL. - -## Implementation -This handler was implemented using the `google-cloud-spanner` python client library. - -The arguments to establish a connection are: - -* `instance_id`: the instance identifier. -* `database_id`: the datbase identifier. -* `project`: the identifier of the project that owns the resources. -* `credentials`: a stringified GCP service account key JSON. - - -## Usage -In order to make use of this handler and connect to a Cloud Spanner database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE cloud_spanner_datasource -WITH -engine='cloud_spanner', -parameters={ - "instance_id":"my-instance", - "database_id":"example-id", - "project":"my-project", - "dialect": "postgres" -- optional, default is 'googlesql' - "credentials":"{...}" -}; -``` - -Now, you can use this established connection to query your database as follows: -```sql -SELECT * FROM cloud_spanner_datasource.my_table; -``` - -> **NOTE** : Cloud Spanner supports PostgreSQL syntax and also Google's GoogleSQL dialect. But, not all PostgresSQL dialect features are supported. Find the list of such features below. -> - Change streams -> - GoogleSQL `JSON` type (PostgreSQL-dialect databases support the PostgreSQL JSONB type.) -> - `SELECT DISTINCT` (`DISTINCT` is supported in aggregate functions.) -> - `FULL JOIN` with `USING` -> - Query optimizer versioning -> - Optimizer statistics package versioning -> - `ORDER BY`, `LIMIT`, and `OFFSET` in `UNION`,`EXCEPT`, or `DISTINCT` statements -> - The following columns in `SPANNER_SYS` statistics tables: -> - Transaction statistics: `TOTAL_LATENCY_DISTRIBUTION` and `OPERATIONS_BY_TABLE` -> - Query statistics: `LATENCY_DISTRIBUTION` -> - Lock Statistics: `SAMPLE_LOCK_REQUESTS` diff --git a/mindsdb/integrations/handlers/cloud_spanner_handler/__about__.py b/mindsdb/integrations/handlers/cloud_spanner_handler/__about__.py deleted file mode 100644 index 187790a8131..00000000000 --- a/mindsdb/integrations/handlers/cloud_spanner_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Cloud Spanner handler' -__package_name__ = 'mindsdb_cloud_spanner_handler' -__version__ = '0.0.1' -__description__ = 'MindsDB handler for Cloud Spanner' -__author__ = 'Kamil Tyborowski' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022 - mindsdb' diff --git a/mindsdb/integrations/handlers/cloud_spanner_handler/__init__.py b/mindsdb/integrations/handlers/cloud_spanner_handler/__init__.py deleted file mode 100644 index 1172d04e14d..00000000000 --- a/mindsdb/integrations/handlers/cloud_spanner_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .cloud_spanner_handler import CloudSpannerHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Google Cloud Spanner' -name = 'cloud_spanner' -type = HANDLER_TYPE.DATA -icon_path = 'icon.png' - -__all__ = [ - 'Handler', - 'version', - 'name', - 'type', - 'title', - 'description', - 'connection_args', - 'connection_args_example', - 'import_error', - 'icon_path', -] diff --git a/mindsdb/integrations/handlers/cloud_spanner_handler/cloud_spanner_handler.py b/mindsdb/integrations/handlers/cloud_spanner_handler/cloud_spanner_handler.py deleted file mode 100644 index b0c5e83ff6d..00000000000 --- a/mindsdb/integrations/handlers/cloud_spanner_handler/cloud_spanner_handler.py +++ /dev/null @@ -1,219 +0,0 @@ -import json - -from google.oauth2 import service_account -from google.cloud.spanner_dbapi.connection import connect, Connection -from google.cloud.sqlalchemy_spanner import SpannerDialect - -import pandas as pd -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.ast import CreateTable, Function -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender - -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import RESPONSE_TYPE -from mindsdb.integrations.libs.response import HandlerResponse as Response -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class CloudSpannerHandler(DatabaseHandler): - """This handler handles connection and execution of the Cloud Spanner statements.""" - - name = 'cloud_spanner' - - def __init__(self, name: str, **kwargs): - super().__init__(name) - self.parser = parse_sql - self.connection_data = kwargs.get('connection_data') - self.dialect = self.connection_data.get('dialect', 'googlesql') - - if self.dialect == 'postgres': - self.renderer = SqlalchemyRender('postgres') - else: - self.renderer = SqlalchemyRender(SpannerDialect) - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> Connection: - """Connect to a Cloud Spanner database. - - Returns: - Connection: The database connection. - """ - - if self.is_connected is True: - return self.connection - - args = { - 'database_id': self.connection_data.get('database_id'), - 'instance_id': self.connection_data.get('instance_id'), - 'project': self.connection_data.get('project'), - 'credentials': self.connection_data.get('credentials'), - } - - args['credentials'] = service_account.Credentials.from_service_account_info( - json.loads(args['credentials']) - ) - self.connection = connect(**args) - self.is_connected = True - - return self.connection - - def disconnect(self): - """Close the database connection.""" - - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """Check the connection to the Cloud Spanner database. - - Returns: - StatusResponse: Connection success status and error message if an error occurs. - """ - - response = StatusResponse(False) - - try: - self.connect() - response.success = True - except Exception as e: - logger.error( - f'Error connecting to Cloud Spanner {self.connection_data["database_id"]}, {e}!' - ) - response.error_message = str(e) - finally: - if response.success is True and self.is_connected: - self.disconnect() - if response.success is False and self.is_connected: - self.is_connected = False - - return response - - def native_query(self, query: str) -> Response: - """Execute a SQL query. - - Args: - query (str): The SQL query to execute. - - Returns: - Response: The query result. - """ - - connection = self.connect() - cursor = connection.cursor() - - try: - cursor.execute(query) - - # The cursor description check indicates if there are any results. - # This is required as spanner_dbapi will fail on a fetchall() call on an empty cursor. - if cursor.description: - result = cursor.fetchall() - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, columns=[x[0] for x in cursor.description] - ), - ) - else: - response = Response(RESPONSE_TYPE.OK) - - connection.commit() - except Exception as e: - logger.error( - f'Error running query: {query} on {self.connection_data["database_id"]}!' - ) - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - cursor.close() - if self.is_connected: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """Render and execute a SQL query. - - Args: - query (ASTNode): The SQL query. - - Returns: - Response: The query result. - """ - - # check primary key for table: - if isinstance(query, CreateTable) and query.columns is not None: - id_col = None - has_primary = False - for col in query.columns: - if col.name.lower() == 'id': - id_col = col - if col.is_primary_key: - has_primary = True - # if no other primary keys use id - if not has_primary and id_col: - id_col.is_primary_key = True - id_col.default = Function('GENERATE_UUID', args=[]) - - query_str = self.renderer.get_string(query, with_failback=True) - - return self.native_query(query_str) - - def get_tables(self) -> Response: - """Get a list of all the tables in the database. - - Returns: - Response: Names of the tables in the database. - """ - - query = ''' - SELECT - t.table_name - FROM - information_schema.tables AS t - WHERE - t.table_schema = '' - ''' - result = self.native_query(query) - df = result.data_frame - - if df is not None: - result.data_frame = df.rename(columns={df.columns[0]: 'table_name'}) - - return result - - def get_columns(self, table_name: str) -> Response: - """Get details about a table. - - Args: - table_name (str): Name of the table to retrieve details of. - - Returns: - Response: Details of the table. - """ - - query = f''' - SELECT - t.column_name, - t.spanner_type, - t.is_nullable - FROM - information_schema.columns AS t - WHERE - t.table_name = '{table_name}' - ''' - return self.native_query(query) diff --git a/mindsdb/integrations/handlers/cloud_spanner_handler/connection_args.py b/mindsdb/integrations/handlers/cloud_spanner_handler/connection_args.py deleted file mode 100644 index 1a1378a74d1..00000000000 --- a/mindsdb/integrations/handlers/cloud_spanner_handler/connection_args.py +++ /dev/null @@ -1,33 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - instance_id={ - 'type': ARG_TYPE.STR, - 'description': 'The Cloud Spanner instance identifier.', - }, - database_id={ - 'type': ARG_TYPE.STR, - 'description': 'The Cloud Spanner database indentifier.', - }, - project={ - 'type': ARG_TYPE.STR, - 'description': 'The Cloud Spanner project indentifier.', - }, - dialect={ - 'type': ARG_TYPE.STR, - 'description': 'Dialect of the database', - "required": False, - }, - credentials={ - 'type': ARG_TYPE.STR, - 'description': 'The Google Cloud Platform service account key in the JSON format.', - 'secret': True - }, -) - -connection_args_example = OrderedDict( - instance_id='test-instance', datbase_id='example-db', project='your-project-id' -) diff --git a/mindsdb/integrations/handlers/cloud_spanner_handler/icon.png b/mindsdb/integrations/handlers/cloud_spanner_handler/icon.png deleted file mode 100644 index 61db5d97238..00000000000 Binary files a/mindsdb/integrations/handlers/cloud_spanner_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/cloud_spanner_handler/requirements.txt b/mindsdb/integrations/handlers/cloud_spanner_handler/requirements.txt deleted file mode 100644 index c8fb3415443..00000000000 --- a/mindsdb/integrations/handlers/cloud_spanner_handler/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -google-cloud-spanner -sqlalchemy-spanner -sqlparse>=0.5.4 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/mindsdb/integrations/handlers/cloud_spanner_handler/tests/__init__.py b/mindsdb/integrations/handlers/cloud_spanner_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/cloud_spanner_handler/tests/test_cloud_spanner_handler.py b/mindsdb/integrations/handlers/cloud_spanner_handler/tests/test_cloud_spanner_handler.py deleted file mode 100644 index 98c45edf323..00000000000 --- a/mindsdb/integrations/handlers/cloud_spanner_handler/tests/test_cloud_spanner_handler.py +++ /dev/null @@ -1,48 +0,0 @@ -import unittest -from mindsdb.api.executor.data_types.response_type import ( - RESPONSE_TYPE, -) -from mindsdb.integrations.handlers.cloud_spanner_handler.cloud_spanner_handler import ( - CloudSpannerHandler, -) - - -class CloudSpannerHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = {'connection_data': {'database_id': 'example-db', 'instance_id': 'test-instance', 'project': 'your-project-id'}} - cls.handler = CloudSpannerHandler('test_cloud_spanner_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_check_connection(self): - self.handler.check_connection() - - def test_2_create_table(self): - res = self.handler.query('CREATE TABLE integers(i INT64) PRIMARY KEY (i)') - assert res.type is not RESPONSE_TYPE.ERROR - - def test_3_insert_into_table(self): - res = self.handler.query('INSERT INTO integers (i) VALUES (42)') - assert res.type is not RESPONSE_TYPE.ERROR - - def test_4_select(self): - res = self.handler.query('SELECT * FROM integers') - assert res.type is RESPONSE_TYPE.TABLE - - def test_5_describe_table(self): - res = self.handler.get_columns('integers') - assert res.type is RESPONSE_TYPE.TABLE - - def test_6_drop_table(self): - res = self.handler.query('DROP TABLE integers') - assert res.type is not RESPONSE_TYPE.ERROR - - def test_7_get_tables(self): - res = self.handler.get_tables() - assert res.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/cloud_sql_handler/README.md b/mindsdb/integrations/handlers/cloud_sql_handler/README.md deleted file mode 100644 index ee367c88929..00000000000 --- a/mindsdb/integrations/handlers/cloud_sql_handler/README.md +++ /dev/null @@ -1,70 +0,0 @@ -# Google Cloud SQL Handler - -This is the implementation of the Google Cloud SQL handler for MindsDB. - -## Google Cloud SQL -Fully managed relational database service for MySQL, PostgreSQL, and SQL Server with rich extension collections, configuration flags, and developer ecosystems. -
-https://cloud.google.com/sql - -## Implementation -This handler was implemented using the existing MindsDB handlers for MySQL, PostgreSQL and SQL Server. - -The required arguments to establish a connection are, -* `host`: the host name or IP address of the Google Cloud SQL instance. -* `port`: the TCP/IP port of the Google Cloud SQL instance. -* `user`: the username used to authenticate with the Google Cloud SQL instance. -* `password`: the password to authenticate the user with the Google Cloud SQL instance. -* `database`: the database name to use when connecting with the Google Cloud SQL instance. -* `db_engine`: the database engine of the Google Cloud SQL instance. This can take one of three values: 'mysql', 'postgresql' or 'mssql'. - -## Usage -In order to make use of this handler and connect to a Google Cloud SQL MySQL instance in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE cloud_sql_mysql_datasource -WITH ENGINE = 'cloud_sql', -PARAMETERS = { - "db_engine": "mysql", - "host": "53.170.61.16", - "port": 3306, - "user": "admin", - "password": "password", - "database": "example_db" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM cloud_sql_mysql_datasource.example_tbl -~~~~ - -Similar commands can be used to establish a connection and query Google Cloud SQL PostgreSQL and SQL Server instances, -~~~~sql -CREATE DATABASE cloud_sql_postgres_datasource -WITH ENGINE = 'cloud_sql', -PARAMETERS = { - "db_engine": "postgresql", - "host": "53.170.61.17", - "port": 5432, - "user": "postgres", - "password": "password", - "database": "example_db " -}; - -SELECT * FROM cloud_sql_postgres_datasource.example_tbl -~~~~ - -~~~~sql -CREATE DATABASE cloud_sql_mssql_datasource -WITH ENGINE = 'cloud_sql', -PARAMETERS = { - "db_engine": "mssql", - "host": "53.170.61.18", - "port": 1433, - "user": "postgres", - "password": "password", - "database": "example_db " -}; - -SELECT * FROM cloud_sql_mssql_datasource.example_tbl -~~~~ \ No newline at end of file diff --git a/mindsdb/integrations/handlers/cloud_sql_handler/__about__.py b/mindsdb/integrations/handlers/cloud_sql_handler/__about__.py deleted file mode 100644 index 6eb87bfbb02..00000000000 --- a/mindsdb/integrations/handlers/cloud_sql_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'Google Cloud SQL handler' -__package_name__ = 'mindsdb_cloud_sql_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Google Cloud SQL" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/cloud_sql_handler/__init__.py b/mindsdb/integrations/handlers/cloud_sql_handler/__init__.py deleted file mode 100644 index 2b788a278d0..00000000000 --- a/mindsdb/integrations/handlers/cloud_sql_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .cloud_sql_handler import CloudSQLHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Google Cloud SQL' -name = 'cloud_sql' -type = HANDLER_TYPE.DATA -icon_path = 'icon.png' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/cloud_sql_handler/cloud_sql_handler.py b/mindsdb/integrations/handlers/cloud_sql_handler/cloud_sql_handler.py deleted file mode 100644 index 63a870c37ed..00000000000 --- a/mindsdb/integrations/handlers/cloud_sql_handler/cloud_sql_handler.py +++ /dev/null @@ -1,122 +0,0 @@ -from typing import Optional - -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse -) - -from mindsdb.integrations.handlers.mysql_handler.mysql_handler import MySQLHandler -from mindsdb.integrations.handlers.postgres_handler.postgres_handler import PostgresHandler -from mindsdb.integrations.handlers.mssql_handler.mssql_handler import SqlServerHandler - - -class CloudSQLHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Google Cloud SQL statements. - """ - name = 'cloud_sql' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - self.dialect = 'cloud_sql' - self.connection_data = connection_data - self.kwargs = kwargs - - if self.connection_data['db_engine'] == 'mysql': - self.db = MySQLHandler( - name=name + 'mysql', - connection_data={key: self.connection_data[key] for key in self.connection_data if key != 'db_engine'} - ) - elif self.connection_data['db_engine'] == 'postgresql': - self.db = PostgresHandler( - name=name + 'postgresql', - connection_data={key: self.connection_data[key] for key in self.connection_data if key != 'db_engine'} - ) - elif self.connection_data['db_engine'] == 'mssql': - self.db = SqlServerHandler( - name=name + 'mssql', - connection_data={key: self.connection_data[key] for key in self.connection_data if key != 'db_engine'} - ) - else: - raise Exception("The database engine should be either MySQL, PostgreSQL or SQL Server!") - - def __del__(self): - self.db.__del__() - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - return self.db.connect() - - def disconnect(self): - """ - Close any existing connections. - """ - - return self.db.disconnect() - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - return self.db.check_connection() - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - return self.db.native_query(query) - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - - return self.db.query(query) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - - return self.db.get_tables() - - def get_columns(self, table_name: str) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - - return self.db.get_columns(table_name) diff --git a/mindsdb/integrations/handlers/cloud_sql_handler/connection_args.py b/mindsdb/integrations/handlers/cloud_sql_handler/connection_args.py deleted file mode 100644 index e649213a191..00000000000 --- a/mindsdb/integrations/handlers/cloud_sql_handler/connection_args.py +++ /dev/null @@ -1,41 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Google Cloud SQL instance.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the Google Cloud SQL instance.', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the Google Cloud SQL instance.' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Google Cloud SQL instance.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the Google Cloud SQL instance. Must be an integer.' - }, - db_engine={ - 'type': ARG_TYPE.STR, - 'description': "The database engine of the Google Cloud SQL instance. This can take one of three values: 'mysql', 'postgresql' or 'mssql'." - } -) - -connection_args_example = OrderedDict( - db_engine='mysql', - host='53.170.61.16', - port=3306, - user='root', - password='password', - database='database' -) diff --git a/mindsdb/integrations/handlers/cloud_sql_handler/icon.png b/mindsdb/integrations/handlers/cloud_sql_handler/icon.png deleted file mode 100644 index 55fd2db8f2b..00000000000 Binary files a/mindsdb/integrations/handlers/cloud_sql_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/cloud_sql_handler/requirements.txt b/mindsdb/integrations/handlers/cloud_sql_handler/requirements.txt deleted file mode 100644 index 1a722aa2942..00000000000 --- a/mindsdb/integrations/handlers/cloud_sql_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ --r mindsdb/integrations/handlers/mysql_handler/requirements.txt --r mindsdb/integrations/handlers/mssql_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/cloud_sql_handler/tests/__init__.py b/mindsdb/integrations/handlers/cloud_sql_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/cloud_sql_handler/tests/test_cloud_sql_mssql_handler.py b/mindsdb/integrations/handlers/cloud_sql_handler/tests/test_cloud_sql_mssql_handler.py deleted file mode 100644 index f9e8b7d23f8..00000000000 --- a/mindsdb/integrations/handlers/cloud_sql_handler/tests/test_cloud_sql_mssql_handler.py +++ /dev/null @@ -1,37 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.cloud_sql_handler.cloud_sql_handler import CloudSQLHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class CloudSQLMSSQLHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": "", - "port": 1433, - "user": "root", - "password": "", - "database": "public", - "db_engine": "mssql" - } - cls.handler = CloudSQLHandler('test_cloud_sql_mssql_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM person" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_get_columns(self): - columns = self.handler.get_columns('person') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/cloud_sql_handler/tests/test_cloud_sql_mysql_handler.py b/mindsdb/integrations/handlers/cloud_sql_handler/tests/test_cloud_sql_mysql_handler.py deleted file mode 100644 index d09d9c8809c..00000000000 --- a/mindsdb/integrations/handlers/cloud_sql_handler/tests/test_cloud_sql_mysql_handler.py +++ /dev/null @@ -1,37 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.cloud_sql_handler.cloud_sql_handler import CloudSQLHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class CloudSQLMySQLHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": "", - "port": 3306, - "user": "root", - "password": "", - "database": "public", - "db_engine": "mysql" - } - cls.handler = CloudSQLHandler('test_cloud_sql_mysql_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM person" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_get_columns(self): - columns = self.handler.get_columns('person') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/cockroach_handler/README.md b/mindsdb/integrations/handlers/cockroach_handler/README.md deleted file mode 100644 index e947cb4ea16..00000000000 --- a/mindsdb/integrations/handlers/cockroach_handler/README.md +++ /dev/null @@ -1,42 +0,0 @@ -# CockroachDB Handler - -This is the implementation of the CockroachDB handler for MindsDB. - -## CockroachDB - -CockroachDB was architected for complex, high performant distributed writes and delivers scale-out read capability. CockroachDB delivers simple relational SQL transactions and obscures complexity away from developers. CockroachDB is wire-compatible with PostgreSQL and provides a familiar, easy interface for developers. For more info check https://www.cockroachlabs.com/docs/ - -## Implementation - -Since, CockroachDB is wire-compatible with PostgreSQL this implementation was pretty straight-forward by just extending PostgreSQL handler. - -The required arguments to establish a connection are: - -* `host`: the host name or IP address of the CockroachDB -* `database`: the name of the database to connect to -* `user`: the user to authenticate with -* `port`: the port to use when connecting -* `password`: the password to authenticate the user - -## Usage - -In order to make use of this handler and connect to a CockroachDB server in MindsDB, the following syntax can be used, - -```sql -CREATE DATABASE cockroachdb -WITH -engine='cockroachdb', -parameters={ - "host": "localhost", - "database": "dbname", - "user": "admin", - "password": "password", - "port": "5432" -}; -``` - -Now, you can use this established connection to query your database as follows: - -```sql -SELECT * FROM cockroachdb.public.db; -``` diff --git a/mindsdb/integrations/handlers/cockroach_handler/__about__.py b/mindsdb/integrations/handlers/cockroach_handler/__about__.py deleted file mode 100644 index 8a8666cb49e..00000000000 --- a/mindsdb/integrations/handlers/cockroach_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Cockroach handler' -__package_name__ = 'mindsdb_cockroach_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for CockroachDB" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/cockroach_handler/__init__.py b/mindsdb/integrations/handlers/cockroach_handler/__init__.py deleted file mode 100644 index 59695911335..00000000000 --- a/mindsdb/integrations/handlers/cockroach_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .cockroach_handler import CockroachHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'CockroachDB' -name = 'cockroachdb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', - 'description', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/cockroach_handler/cockroach_handler.py b/mindsdb/integrations/handlers/cockroach_handler/cockroach_handler.py deleted file mode 100644 index 3fd088bdce5..00000000000 --- a/mindsdb/integrations/handlers/cockroach_handler/cockroach_handler.py +++ /dev/null @@ -1,11 +0,0 @@ -from mindsdb.integrations.handlers.postgres_handler.postgres_handler import PostgresHandler - - -class CockroachHandler(PostgresHandler): - """ - This handler handles connection and execution of the Cockroachdb statements. - """ - name = 'cockroachdb' - - def __init__(self, name=None, **kwargs): - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/cockroach_handler/icon.svg b/mindsdb/integrations/handlers/cockroach_handler/icon.svg deleted file mode 100644 index 3a24677d4ab..00000000000 --- a/mindsdb/integrations/handlers/cockroach_handler/icon.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/cockroach_handler/tests/__init__.py b/mindsdb/integrations/handlers/cockroach_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/cockroach_handler/tests/test_cockroachdb_handler.py b/mindsdb/integrations/handlers/cockroach_handler/tests/test_cockroachdb_handler.py deleted file mode 100644 index 35052d2f117..00000000000 --- a/mindsdb/integrations/handlers/cockroach_handler/tests/test_cockroachdb_handler.py +++ /dev/null @@ -1,34 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.cockroach_handler.cockroach_handler import CockroachHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class CockroachHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "localhost", - "port": "5432", - "user": "mindsdb", - "password": "mindsdb", - "database": "postgres" - } - } - cls.handler = CockroachHandler('test_cockroach_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_describe_table(self): - described = self.handler.describe_table("test_mdb") - assert described['type'] is not RESPONSE_TYPE.ERROR - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables['type'] is not RESPONSE_TYPE.ERROR - - def test_4_select_query(self): - query = "SELECT * FROM data.test_mdb WHERE 'id'='1'" - result = self.handler.query(query) - assert result['type'] is RESPONSE_TYPE.TABLE diff --git a/mindsdb/integrations/handlers/coinbase_handler/README.md b/mindsdb/integrations/handlers/coinbase_handler/README.md deleted file mode 100644 index dee72fa5ae9..00000000000 --- a/mindsdb/integrations/handlers/coinbase_handler/README.md +++ /dev/null @@ -1,105 +0,0 @@ -# CoinBase API Handler - -This handler integrates with the [CoinBase API](https://docs.cloud.coinbase.com/sign-in-with-coinbase/docs/api-users) to make aggregate trade data available to use for model training and predictions. - -## Example: Forecast Cryptocurrency Prices - -To see how the CoinBase handler is used, let's walk through a simple example to create a time series model to predict the future price of Bitcoin (BTC) in terms of USDT. - -### Connect to the CoinBase API -We start by creating a database to connect to the CoinBase API. - -``` -CREATE DATABASE my_coinbase -WITH - ENGINE = 'coinbase', - PARAMETERS = { - "api_key": "1234", - "api_secret": "***", - "api_passphrase": "***" - }; -``` - -### Select Data -To see if the connection was successful, try searching for the most recent trade data. By default, aggregate data from the latest 1000 trading intervals with a length of 1m each are returned. - -``` -SELECT * -FROM my_coinbase.coinbase_candle_data -WHERE symbol = 'BTC-USD'; -``` - -Each row should look like this: - -| symbol | low | high | open | close | volume | timestamp | timestamp_iso | -| ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | - BTC-USD 34070.49 34091.25 34088.72 34073.8 1.94718722 1698499500 2023-10-28T09:25:00-04:00 - - where: -* symbol - Trading pair (BTC to USDT in the above example) -* low - Lowest price of base asset during trading interval -* high - Highest price of base asset during trading interval -* open - Price of base asset at beginning of trading interval -* close - Price of base asset at end of trading interval -* volume - Total amount of base asset traded during interval -* timestamp - End time of interval in seconds since the Unix epoch -* timestamp_iso - End time of interval in seconds since the Unix epoch in ISO format - -You can customize symbol and interval: - -``` -SELECT * -FROM my_coinbase.coinbase_candle_data -WHERE symbol = 'BTC-USD' -AND interval = 300; -``` - -Supported intervals are [listed here](https://docs.cloud.coinbase.com/exchange/reference/exchangerestapi_getproductcandles): -* 60 -* 300 -* 900 -* 3600 -* 21600 -* 86400 - -### Train a Model - -Now it's time to create a time series model using 10000 trading intervals in the past with duration 1m. - -``` -CREATE MODEL mindsdb.coinbase_btc_forecast_model -FROM my_coinbase -( - SELECT * FROM coinbase_candle_data - WHERE symbol = 'BTC-USD' - AND interval = 300 -) - -PREDICT open - -ORDER BY timestamp -WINDOW 20 -HORIZON 10; -``` - -### Making Predictions - -First let's make a view for the most recent BTC-USD aggregate trade data: - -``` -CREATE VIEW mindsdb.recent_coinbase_data AS ( - SELECT * FROM my_coinbase.coinbase_candle_data - WHERE symbol = 'BTC-USD' -) -``` - -Now let's predict the future price of BTC: - -``` -SELECT m.* -FROM mindsdb.recent_coinbase_data AS t -JOIN mindsdb.coinbase_btc_forecast_model AS m -WHERE m.timestamp > LATEST -``` - -This should give you the predicted BTC price for the next interval (we set the horizon to 10) in terms of USD. \ No newline at end of file diff --git a/mindsdb/integrations/handlers/coinbase_handler/__about__.py b/mindsdb/integrations/handlers/coinbase_handler/__about__.py deleted file mode 100644 index b1dcddaa5b8..00000000000 --- a/mindsdb/integrations/handlers/coinbase_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB CoinBase handler' -__package_name__ = 'mindsdb_coinbase_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for the CoinBase API" -__author__ = 'Bustedware LLC' -__github__ = 'https://github.com/bustedware' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/coinbase_handler/__init__.py b/mindsdb/integrations/handlers/coinbase_handler/__init__.py deleted file mode 100644 index 7a990f1e8cc..00000000000 --- a/mindsdb/integrations/handlers/coinbase_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .connection_args import connection_args, connection_args_example -try: - from .coinbase_handler import CoinBaseHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = 'Coinbase' -name = 'coinbase' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/coinbase_handler/coinbase_handler.py b/mindsdb/integrations/handlers/coinbase_handler/coinbase_handler.py deleted file mode 100644 index 8f74dd47d9b..00000000000 --- a/mindsdb/integrations/handlers/coinbase_handler/coinbase_handler.py +++ /dev/null @@ -1,138 +0,0 @@ -import time -import hmac -import base64 -import hashlib -import datetime -from typing import Dict - -import pandas as pd -import requests - -from mindsdb.integrations.handlers.coinbase_handler.coinbase_tables import CoinBaseAggregatedTradesTable -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, -) - -from mindsdb_sql_parser import parse_sql - -_BASE_COINBASE_US_URL = 'https://api.exchange.coinbase.com' - - -class CoinBaseHandler(APIHandler): - """A class for handling connections and interactions with the CoinBase API. - - Attributes: - api_key (str): API key - api_secret (str): API secret - is_connected (bool): Whether or not the API client is connected to CoinBase. - - """ - - def __init__(self, name: str = None, **kwargs): - """Registers all API tables and prepares the handler for an API connection. - - Args: - name: (str): The handler name to use - """ - super().__init__(name) - self.api_key = None - self.api_secret = None - self.api_passphrase = None - - args = kwargs.get('connection_data', {}) - if 'api_key' in args: - self.api_key = args['api_key'] - if 'api_secret' in args: - self.api_secret = args['api_secret'] - if 'api_passphrase' in args: - self.api_passphrase = args['api_passphrase'] - self.client = None - self.is_connected = False - - coinbase_candle_data = CoinBaseAggregatedTradesTable(self) - self._register_table('coinbase_candle_data', coinbase_candle_data) - - def connect(self): - """Creates a new CoinBase API client if needed and sets it as the client to use for requests. - - Returns newly created CoinBase API client, or current client if already set. - """ - self.is_connected = True - return self.client - - def check_connection(self) -> StatusResponse: - """Checks connection to CoinBase API by sending a ping request. - - Returns StatusResponse indicating whether or not the handler is connected. - """ - response = StatusResponse(True) - self.is_connected = response.success - return response - - # symbol: BTC-USD - # granularity 60, 300, 900, 3600, 21600, 86400 - def get_coinbase_candle(self, symbol: str, granularity: int) -> pd.DataFrame: - jdocs = [] - current_time = datetime.datetime.now() - start_time = current_time - datetime.timedelta(seconds=granularity) - start_time_iso = start_time.isoformat().split(".")[0] + "-04:00" - path = "/products/" + symbol + "/candles?granularity=" + str(granularity) + "&start=" + start_time_iso - headers = self.generate_api_headers("GET", path) - url = _BASE_COINBASE_US_URL + path - response = requests.get(url, headers=headers) - candles = response.json() - for candle in candles: - dt = datetime.datetime.fromtimestamp(candle[0], None).isoformat() - low, high, open, close, volume = candle[1:] - jdoc = {"symbol": symbol, "low": low, "high": high, "open": open, "close": close, "volume": volume, "timestamp": candle[0], "timestamp_iso": dt} - jdocs.append(jdoc) - return pd.DataFrame(jdocs) - - def _get_candle(self, params: Dict = None) -> pd.DataFrame: - """Gets aggregate trade data for a symbol based on given parameters - - Returns results as a pandas DataFrame. - - Args: - params (Dict): Trade data params (symbol, interval) - """ - if 'symbol' not in params: - raise ValueError('Missing "symbol" param to fetch trade data for.') - if 'interval' not in params: - raise ValueError('Missing "interval" param (60, 300, 900, 3600, 21600, 86400).') - - candle = self.get_coinbase_candle(params['symbol'], int(params['interval'])) - return candle - - def native_query(self, query: str = None) -> Response: - ast = parse_sql(query) - return self.query(ast) - - def generate_api_headers(self, method: str, path: str) -> dict: - timestamp = str(int(time.time())) - message = timestamp + method + path - signature = base64.b64encode(hmac.new(base64.b64decode(self.api_secret), str.encode(message), hashlib.sha256).digest()) - headers = { - "Content-Type": "application/json", - "CB-ACCESS-SIGN": signature, - "CB-ACCESS-KEY": self.api_key, - "CB-ACCESS-TIMESTAMP": timestamp, - "CB-VERSION": "2015-04-08", - "CB-ACCESS-PASSPHRASE": self.api_passphrase - } - return headers - - def call_coinbase_api(self, method_name: str = None, params: Dict = None) -> pd.DataFrame: - """Calls the CoinBase API method with the given params. - - Returns results as a pandas DataFrame. - - Args: - method_name (str): Method name to call - params (Dict): Params to pass to the API call - """ - if method_name == 'get_candle': - return self._get_candle(params) - raise NotImplementedError('Method name {} not supported by CoinBase API Handler'.format(method_name)) diff --git a/mindsdb/integrations/handlers/coinbase_handler/coinbase_tables.py b/mindsdb/integrations/handlers/coinbase_handler/coinbase_tables.py deleted file mode 100644 index 5d3c0b2e842..00000000000 --- a/mindsdb/integrations/handlers/coinbase_handler/coinbase_tables.py +++ /dev/null @@ -1,56 +0,0 @@ -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb_sql_parser import ast - -import pandas as pd - - -class CoinBaseAggregatedTradesTable(APITable): - - DEFAULT_INTERVAL = 60 - DEFAULT_SYMBOL = 'BTC-USD' - - def select(self, query: ast.Select) -> pd.DataFrame: - """Selects data from the CoinBase API and returns it as a pandas DataFrame. - - Returns dataframe representing the CoinBase API results. - - Args: - query (ast.Select): Given SQL SELECT query - """ - conditions = extract_comparison_conditions(query.where) - - params = { - 'interval': CoinBaseAggregatedTradesTable.DEFAULT_INTERVAL, - 'symbol': CoinBaseAggregatedTradesTable.DEFAULT_SYMBOL, - } - for op, arg1, arg2 in conditions: - if arg1 == 'interval': - if op != '=': - raise NotImplementedError - params['interval'] = arg2 - - elif arg1 == 'symbol': - if op != '=': - raise NotImplementedError - params['symbol'] = arg2 - - coinbase_candle_data = self.handler.call_coinbase_api( - method_name='get_candle', - params=params - ) - - return coinbase_candle_data - - def get_columns(self): - """Gets all columns to be returned in pandas DataFrame responses""" - return [ - 'symbol', - 'low', - 'high', - 'open', - 'close', - 'volume', - 'timestamp', - 'current_time' - ] diff --git a/mindsdb/integrations/handlers/coinbase_handler/connection_args.py b/mindsdb/integrations/handlers/coinbase_handler/connection_args.py deleted file mode 100644 index 3ea8e13603e..00000000000 --- a/mindsdb/integrations/handlers/coinbase_handler/connection_args.py +++ /dev/null @@ -1,34 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - api_key={ - 'type': ARG_TYPE.STR, - 'description': 'API Key For Connecting to CoinBase API.', - 'required': True, - 'label': 'API Key', - 'secret': True - }, - api_secret={ - 'type': ARG_TYPE.PWD, - 'description': 'API Secret For Connecting to CoinBase API.', - 'required': True, - 'label': 'API Secret', - 'secret': True - }, - api_passphrase={ - 'type': ARG_TYPE.PWD, - 'description': 'API Passphrase.', - 'required': True, - 'label': 'API Passphrase', - 'secret': True - }, -) - -connection_args_example = OrderedDict( - api_key='public_key', - api_secret='secret_key', - api_passphrase='passphrase' -) diff --git a/mindsdb/integrations/handlers/coinbase_handler/icon.svg b/mindsdb/integrations/handlers/coinbase_handler/icon.svg deleted file mode 100644 index 02092d22290..00000000000 --- a/mindsdb/integrations/handlers/coinbase_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/coinbase_handler/requirements.txt b/mindsdb/integrations/handlers/coinbase_handler/requirements.txt deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/confluence_handler/README.md b/mindsdb/integrations/handlers/confluence_handler/README.md deleted file mode 100644 index 1037828a358..00000000000 --- a/mindsdb/integrations/handlers/confluence_handler/README.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: Confluence -sidebarTitle: Confluence ---- - -This documentation describes the integration of MindsDB with [Confluence](https://www.atlassian.com/software/confluence), a popular collaboration and documentation tool developed by Atlassian. -The integration allows MindsDB to access data from Confluence and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). - -## Connection - -Establish a connection to Confluence from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/confluence_handler) as an engine. - -```sql -CREATE DATABASE confluence_datasource -WITH - ENGINE = 'confluence', - PARAMETERS = { - "api_base": "https://example.atlassian.net", - "username": "john.doe@example.com", - "password": "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" - }; -``` - -Required connection parameters include the following: - -* `api_base`: The base URL for your Confluence instance/server. -* `username`: The email address associated with your Confluence account. -* `password`: The API token generated for your Confluence account. - - -Refer this [guide](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/) for instructions on how to create API tokens for your account. - - -## Usage - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM confluence_datasource.table_name -LIMIT 10; -``` - - -The above example utilize `confluence_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Supported Tables - -* `spaces`: The table containing information about the spaces in Confluence. -* `pages`: The table containing information about the pages in Confluence. -* `blogposts`: The table containing information about the blog posts in Confluence. -* `whiteboards`: The table containing information about the whiteboards in Confluence. -* `databases`: The table containing information about the databases in Confluence. -* `tasks`: The table containing information about the tasks in Confluence. \ No newline at end of file diff --git a/mindsdb/integrations/handlers/confluence_handler/__about__.py b/mindsdb/integrations/handlers/confluence_handler/__about__.py deleted file mode 100644 index 734b7d6f391..00000000000 --- a/mindsdb/integrations/handlers/confluence_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Confluence handler" -__package_name__ = "mindsdb_confluence_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Confluence" -__author__ = "Balaji Seetharaman" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/confluence_handler/confluence_api_client.py b/mindsdb/integrations/handlers/confluence_handler/confluence_api_client.py deleted file mode 100644 index 2ce6dce173d..00000000000 --- a/mindsdb/integrations/handlers/confluence_handler/confluence_api_client.py +++ /dev/null @@ -1,176 +0,0 @@ -from typing import List - -import requests - - -class ConfluenceAPIClient: - def __init__(self, url: str, username: str, password: str): - self.url = url - self.username = username - self.password = password - self.session = requests.Session() - self.session.auth = (self.username, self.password) - self.session.headers.update({"Accept": "application/json"}) - - def get_spaces( - self, - ids: List[int] = None, - keys: List[str] = None, - space_type: str = None, - status: str = None, - sort_condition: str = None, - limit: int = None, - ): - url = f"{self.url}/wiki/api/v2/spaces" - params = { - "description-format": "view", - } - if ids: - params["ids"] = ids - if keys: - params["keys"] = keys - if space_type: - params["type"] = space_type - if status: - params["status"] = status - if sort_condition: - params["sort"] = sort_condition - if limit: - params["limit"] = limit - - return self._paginate(url, params) - - def get_pages( - self, - page_ids: List[int] = None, - space_ids: List[int] = None, - statuses: List[str] = None, - title: str = None, - sort_condition: str = None, - limit: int = None, - ) -> List[dict]: - url = f"{self.url}/wiki/api/v2/pages" - params = { - "body-format": "storage", - } - if page_ids: - params["id"] = page_ids - if space_ids: - params["space-id"] = space_ids - if statuses: - params["status"] = statuses - if title: - params["title"] = title - if sort_condition: - params["sort"] = sort_condition - if limit: - params["limit"] = limit - - return self._paginate(url, params) - - def get_blogposts( - self, - post_ids: List[int] = None, - space_ids: List[str] = None, - statuses: List[str] = None, - title: str = None, - sort_condition: str = None, - limit: int = None, - ) -> List[dict]: - url = f"{self.url}/wiki/api/v2/blogposts" - params = { - "body-format": "storage", - } - if post_ids: - params["id"] = post_ids - if space_ids: - params["space-id"] = space_ids - if statuses: - params["status"] = statuses - if title: - params["title"] = title - if sort_condition: - params["sort"] = sort_condition - if limit: - params["limit"] = limit - - return self._paginate(url, params) - - def get_whiteboard_by_id(self, whiteboard_id: int) -> dict: - url = f"{self.url}/wiki/api/v2/whiteboards/{whiteboard_id}" - - return self._make_request("GET", url) - - def get_database_by_id(self, database_id: int) -> dict: - url = f"{self.url}/wiki/api/v2/databases/{database_id}" - - return self._make_request("GET", url) - - def get_tasks( - self, - task_ids: List[int] = None, - space_ids: List[str] = None, - page_ids: List[str] = None, - blogpost_ids: List[str] = None, - created_by_ids: List[str] = None, - assigned_to_ids: List[str] = None, - completed_by_ids: List[str] = None, - status: str = None, - limit: int = None, - ) -> List[dict]: - url = f"{self.url}/wiki/api/v2/tasks" - params = { - "body-format": "storage", - } - if task_ids: - params["id"] = task_ids - if space_ids: - params["space-id"] = space_ids - if page_ids: - params["page-id"] = page_ids - if blogpost_ids: - params["blogpost-id"] = blogpost_ids - if created_by_ids: - params["created-by"] = created_by_ids - if assigned_to_ids: - params["assigned-to"] = assigned_to_ids - if completed_by_ids: - params["completed-by"] = completed_by_ids - if status: - params["status"] = status - if limit: - params["limit"] = limit - - return self._paginate(url, params) - - def _paginate(self, url: str, params: dict = None) -> List[dict]: - results = [] - response = self._make_request("GET", url, params) - results.extend(response["results"]) - - while response["_links"].get("next"): - next_url = response["_links"].get("next") - next_params = {} - if params: - next_params.update(params) - if "cursor=" in next_url: - # cursor= is 7 characters long - cursor_start = next_url.find("cursor=") + 7 - cursor_value = next_url[cursor_start:] - if "&" in cursor_value: - cursor_value = cursor_value.split("&")[0] - next_params["cursor"] = cursor_value - response = self._make_request("GET", url, next_params) - else: - response = self._make_request("GET", next_url) - results.extend(response["results"]) - - return results - - def _make_request(self, method: str, url: str, params: dict = None, data: dict = None) -> dict: - response = self.session.request(method, url, params=params, json=data) - - if response.status_code != 200: - raise Exception(f"Request failed with status code {response.status_code}: {response.text}") - - return response.json() diff --git a/mindsdb/integrations/handlers/confluence_handler/confluence_handler.py b/mindsdb/integrations/handlers/confluence_handler/confluence_handler.py deleted file mode 100644 index d1af184b9a5..00000000000 --- a/mindsdb/integrations/handlers/confluence_handler/confluence_handler.py +++ /dev/null @@ -1,102 +0,0 @@ -from typing import Any, Dict - -from mindsdb.integrations.handlers.confluence_handler.confluence_api_client import ConfluenceAPIClient -from mindsdb.integrations.handlers.confluence_handler.confluence_tables import ( - ConfluenceBlogPostsTable, - ConfluenceDatabasesTable, - ConfluencePagesTable, - ConfluenceSpacesTable, - ConfluenceTasksTable, - ConfluenceWhiteboardsTable, -) -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class ConfluenceHandler(APIHandler): - """ - This handler handles the connection and execution of SQL statements on Confluence. - """ - - name = "confluence" - - def __init__(self, name: str, connection_data: Dict, **kwargs: Any) -> None: - """ - Initializes the handler. - - Args: - name (str): The name of the handler instance. - connection_data (Dict): The connection data required to connect to the Confluence API. - kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - self.cache_thread_safe = True - - self._register_table("spaces", ConfluenceSpacesTable(self)) - self._register_table("pages", ConfluencePagesTable(self)) - self._register_table("blogposts", ConfluenceBlogPostsTable(self)) - self._register_table("whiteboards", ConfluenceWhiteboardsTable(self)) - self._register_table("databases", ConfluenceDatabasesTable(self)) - self._register_table("tasks", ConfluenceTasksTable(self)) - - def connect(self) -> ConfluenceAPIClient: - """ - Establishes a connection to the Confluence API. - - Raises: - ValueError: If the required connection parameters are not provided. - - Returns: - atlassian.confluence.Confluence: A connection object to the Confluence API. - """ - if self.is_connected is True: - return self.connection - - if not all( - key in self.connection_data and self.connection_data.get(key) - for key in ["api_base", "username", "password"] - ): - raise ValueError( - "Required parameters (api_base, username, password) must be provided and should not be empty." - ) - - self.connection = ConfluenceAPIClient( - url=self.connection_data.get("api_base"), - username=self.connection_data.get("username"), - password=self.connection_data.get("password"), - ) - - self.is_connected = True - return self.connection - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the Confluence API. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - - try: - connection = self.connect() - connection.get_spaces(limit=1) - response.success = True - except Exception as e: - logger.error(f"Error connecting to Confluence API: {e}!") - response.error_message = e - - self.is_connected = response.success - - return response diff --git a/mindsdb/integrations/handlers/confluence_handler/confluence_tables.py b/mindsdb/integrations/handlers/confluence_handler/confluence_tables.py deleted file mode 100644 index 7fad771cda8..00000000000 --- a/mindsdb/integrations/handlers/confluence_handler/confluence_tables.py +++ /dev/null @@ -1,706 +0,0 @@ -from typing import List - -import pandas as pd - -from mindsdb.integrations.handlers.confluence_handler.confluence_api_client import ConfluenceAPIClient -from mindsdb.integrations.libs.api_handler import APIResource -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, SortColumn -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class ConfluenceSpacesTable(APIResource): - """ - The table abstraction for the 'spaces' resource of the Confluence API. - """ - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - **kwargs, - ): - """ - Executes a parsed SELECT SQL query on the 'spaces' resource of the Confluence API. - - Args: - conditions (List[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (List[SortColumn]): The list of parsed sort columns. - targets (List[str]): The list of target columns to return. - """ - spaces = [] - client: ConfluenceAPIClient = self.handler.connect() - - ids, keys, space_type, status = None, None, None, None - for condition in conditions: - if condition.column == "id": - if condition.op == FilterOperator.EQUAL: - ids = [condition.value] - - elif condition.op == FilterOperator.IN: - ids = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'id'.") - - condition.applied = True - - if condition.column == "key": - if condition.op == FilterOperator.EQUAL: - keys = [condition.value] - - elif condition.op == FilterOperator.IN: - keys = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'key'.") - - condition.applied = True - - if condition.column == "type": - if condition.op == FilterOperator.EQUAL: - space_type = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'type'.") - - condition.applied = True - - if condition.column == "status": - if condition.op == FilterOperator.EQUAL: - status = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'status'.") - - condition.applied = True - - sort_condition = None - if sort: - for sort_column in sort: - if sort_column.column in ["id", "key", "name"]: - if sort_column.ascending: - sort_condition = sort_column.column - - else: - sort_condition = f"-{sort_column.column}" - - sort_column.applied = True - break - - spaces = client.get_spaces( - ids=ids, keys=keys, space_type=space_type, status=status, sort_condition=sort_condition, limit=limit - ) - - spaces_df = pd.json_normalize(spaces, sep="_") - spaces_df = spaces_df[self.get_columns()] - - return spaces_df - - def get_columns(self) -> List[str]: - """ - Retrieves the attributes (columns) of the 'spaces' resource. - - Returns: - List[Text]: A list of attributes (columns) of the 'spaces' resource. - """ - return [ - "id", - "key", - "name", - "type", - "description_view_representation", - "description_view_value", - "status", - "authorId", - "createdAt", - "homepageId", - "_links_webui", - "currentActiveAlias", - ] - - -class ConfluencePagesTable(APIResource): - """ - The table abstraction for the 'pages' resource of the Confluence API. - """ - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - **kwargs, - ): - """ - Executes a parsed SELECT SQL query on the 'pages' resource of the Confluence API. - - Args: - conditions (List[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (List[SortColumn]): The list of parsed sort columns. - targets (List[str]): The list of target columns to return. - """ - pages = [] - client: ConfluenceAPIClient = self.handler.connect() - - page_ids, space_ids, statuses, title = None, None, None, None - for condition in conditions: - if condition.column == "id": - if condition.op == FilterOperator.EQUAL: - page_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - page_ids = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'page_id'.") - - condition.applied = True - - if condition.column == "spaceId": - if condition.op == FilterOperator.EQUAL: - space_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - space_ids = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'spaceId'.") - - condition.applied = True - - if condition.column == "status": - if condition.op == FilterOperator.EQUAL: - statuses = [condition.value] - - elif condition.op == FilterOperator.IN: - statuses = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'status'.") - - condition.applied = True - - if condition.column == "title": - if condition.op == FilterOperator.EQUAL: - title = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'title'.") - - condition.applied = True - - sort_condition = None - if sort: - for sort_column in sort: - if sort_column.column in ["id", "title", "createdAt"]: - sort_condition = sort_column.column if sort_column.column != "createdAt" else "created-date" - if not sort_column.ascending: - sort_condition = f"-{sort_condition}" - - sort_column.applied = True - break - - pages = client.get_pages( - page_ids=page_ids, - space_ids=space_ids, - statuses=statuses, - title=title, - sort_condition=sort_condition, - limit=limit, - ) - - pages_df = pd.json_normalize(pages, sep="_") - pages_df = pages_df[self.get_columns()] - - return pages_df - - def get_columns(self) -> List[str]: - """ - Retrieves the attributes (columns) of the 'pages' resource. - - Returns: - List[Text]: A list of attributes (columns) of the 'pages' resource. - """ - return [ - "id", - "status", - "title", - "spaceId", - "parentId", - "parentType", - "position", - "authorId", - "ownerId", - "lastOwnerId", - "createdAt", - "version_createdAt", - "version_message", - "version_number", - "version_minorEdit", - "version_authorId", - "body_storage_representation", - "body_storage_value", - "_links_webui", - "_links_editui", - "_links_tinyui", - ] - - -class ConfluenceBlogPostsTable(APIResource): - """ - The table abstraction for the 'blogposts' resource of the Confluence API. - """ - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - **kwargs, - ): - """ - Executes a parsed SELECT SQL query on the 'blogposts' resource of the Confluence API. - - Args: - conditions (List[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (List[SortColumn]): The list of parsed sort columns. - targets (List[str]): The list of target columns to return. - """ - blogposts = [] - client: ConfluenceAPIClient = self.handler.connect() - - post_ids, space_ids, statuses, title = None, None, None, None - for condition in conditions: - if condition.column == "id": - if condition.op == FilterOperator.EQUAL: - post_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - post_ids = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'id'.") - - condition.applied = True - - if condition.column == "spaceId": - if condition.op == FilterOperator.EQUAL: - space_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - space_ids = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'spaceKey'.") - - condition.applied = True - - if condition.column == "status": - if condition.op == FilterOperator.EQUAL: - statuses = [condition.value] - - elif condition.op == FilterOperator.IN: - statuses = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'status'.") - - condition.applied = True - - if condition.column == "title": - if condition.op == FilterOperator.EQUAL: - title = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'title'.") - - condition.applied = True - - sort_condition = None - if sort: - for sort_column in sort: - if sort_column.column in ["id", "title", "createdAt"]: - sort_condition = sort_column.column if sort_column.column != "createdAt" else "created-date" - if not sort_column.ascending: - sort_condition = f"-{sort_condition}" - - sort_column.applied = True - break - - blogposts = client.get_blogposts( - post_ids=post_ids, - space_ids=space_ids, - statuses=statuses, - title=title, - sort_condition=sort_condition, - limit=limit, - ) - - blogposts_df = pd.json_normalize(blogposts, sep="_") - blogposts_df = blogposts_df[self.get_columns()] - - return blogposts_df - - def get_columns(self) -> List[str]: - """ - Retrieves the attributes (columns) of the 'blogposts' resource. - - Returns: - List[Text]: A list of attributes (columns) of the 'blogposts' resource. - """ - return [ - "id", - "status", - "title", - "spaceId", - "authorId", - "createdAt", - "version_createdAt", - "version_message", - "version_number", - "version_minorEdit", - "version_authorId", - "body_storage_representation", - "body_storage_value", - "_links_webui", - "_links_editui", - "_links_tinyui", - ] - - -class ConfluenceWhiteboardsTable(APIResource): - """ - The table abstraction for the 'whiteboards' resource of the Confluence API. - """ - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - **kwargs, - ): - """ - Executes a parsed SELECT SQL query on the 'whiteboards' resource of the Confluence API. - - Args: - conditions (List[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (List[SortColumn]): The list of parsed sort columns. - targets (List[str]): The list of target columns to return. - """ - whiteboards = [] - client: ConfluenceAPIClient = self.handler.connect() - - whiteboard_ids = None - for condition in conditions: - if condition.column == "id": - if condition.op == FilterOperator.EQUAL: - whiteboard_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - whiteboard_ids = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'id'.") - - condition.applied = True - - if not whiteboard_ids: - raise ValueError("The 'id' column must be provided in the WHERE clause.") - - whiteboards = [client.get_whiteboard_by_id(whiteboard_id) for whiteboard_id in whiteboard_ids] - - whiteboards_df = pd.json_normalize(whiteboards, sep="_") - whiteboards_df = whiteboards_df[self.get_columns()] - - return whiteboards_df - - def get_columns(self) -> List[str]: - """ - Retrieves the attributes (columns) of the 'whiteboards' resource. - - Returns: - List[Text]: A list of attributes (columns) of the 'whiteboards' resource. - """ - return [ - "id", - "type", - "status", - "title", - "parentId", - "parentType", - "position", - "authorId", - "ownerId", - "createdAt", - "version_createdAt", - "version_message", - "version_number", - "version_minorEdit", - "version_authorId", - ] - - -class ConfluenceDatabasesTable(APIResource): - """ - The table abstraction for the 'databases' resource of the Confluence API. - """ - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - **kwargs, - ): - """ - Executes a parsed SELECT SQL query on the 'databases' resource of the Confluence API. - - Args: - conditions (List[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (List[SortColumn]): The list of parsed sort columns. - targets (List[str]): The list of target columns to return. - """ - databases = [] - client: ConfluenceAPIClient = self.handler.connect() - - database_ids = None - for condition in conditions: - if condition.column == "id": - if condition.op == FilterOperator.EQUAL: - database_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - database_ids = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'id'.") - - condition.applied = True - - if not database_ids: - raise ValueError("The 'id' column must be provided in the WHERE clause.") - - databases = [client.get_database_by_id(database_id) for database_id in database_ids] - - databases_df = pd.json_normalize(databases, sep="_") - databases_df = databases_df[self.get_columns()] - - return databases_df - - def get_columns(self) -> List[str]: - """ - Retrieves the attributes (columns) of the 'databases' resource. - - Returns: - List[Text]: A list of attributes (columns) of the 'databases' resource. - """ - return [ - "id", - "type", - "status", - "title", - "parentId", - "parentType", - "position", - "authorId", - "ownerId", - "createdAt", - "version_createdAt", - "version_message", - "version_number", - "version_minorEdit", - "version_authorId", - ] - - -class ConfluenceTasksTable(APIResource): - """ - The table abstraction for the 'tasks' resource of the Confluence API. - """ - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - **kwargs, - ): - """ - Executes a parsed SELECT SQL query on the 'tasks' resource of the Confluence API. - - Args: - conditions (List[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (List[SortColumn]): The list of parsed sort columns. - targets (List[str]): The list of target columns to return. - """ - tasks = [] - client: ConfluenceAPIClient = self.handler.connect() - - task_ids = None - space_ids = None - page_ids = None - blogpost_ids = None - created_by_ids = None - assigned_to_ids = None - completed_by_ids = None - status = None - - for condition in conditions: - if condition.column == "id": - if condition.op == FilterOperator.EQUAL: - task_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - task_ids = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'id'.") - - condition.applied = True - - if condition.column == "spaceId": - if condition.op == FilterOperator.EQUAL: - space_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - space_ids = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'spaceId'.") - - condition.applied = True - - if condition.column == "pageId": - if condition.op == FilterOperator.EQUAL: - page_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - page_ids = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'pageId'.") - - condition.applied = True - - if condition.column == "blogPostId": - if condition.op == FilterOperator.EQUAL: - blogpost_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - blogpost_ids = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'blogPostId'.") - - condition.applied = True - - if condition.column == "createdBy": - if condition.op == FilterOperator.EQUAL: - created_by_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - created_by_ids = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'createdBy'.") - - condition.applied = True - - if condition.column == "assignedTo": - if condition.op == FilterOperator.EQUAL: - assigned_to_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - assigned_to_ids = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'assignedTo'.") - - condition.applied = True - - if condition.column == "completedBy": - if condition.op == FilterOperator.EQUAL: - completed_by_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - completed_by_ids = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'completedBy'.") - - condition.applied = True - - if condition.column == "status": - if condition.op == FilterOperator.EQUAL: - status = condition.value - - else: - raise ValueError(f"Unsupported operator '{condition.op}' for column 'status'.") - - condition.applied = True - - tasks = client.get_tasks( - task_ids=task_ids, - space_ids=space_ids, - page_ids=page_ids, - blogpost_ids=blogpost_ids, - created_by_ids=created_by_ids, - assigned_to_ids=assigned_to_ids, - completed_by_ids=completed_by_ids, - status=status, - limit=limit, - ) - tasks_df = pd.json_normalize(tasks, sep="_") - - # Each task will have either a 'pageId' or 'blogPostId' but not both. - # In situations where they are all from the same resource, the other column will be empty. - # We will fill the empty column with None to ensure consistency. - for column in ["pageId", "blogPostId"]: - if column not in tasks_df.columns: - tasks_df[column] = None - - tasks_df = tasks_df[self.get_columns()] - - return tasks_df - - def get_columns(self) -> List[str]: - """ - Retrieves the attributes (columns) of the 'tasks' resource. - - Returns: - List[Text]: A list of attributes (columns) of the 'tasks' resource. - """ - return [ - "id", - "localId", - "spaceId", - "pageId", - "blogPostId", - "status", - "body_storage_representation", - "body_storage_value", - "createdBy", - "assignedTo", - "completedBy", - "createdAt", - "updatedAt", - "dueAt", - "completedAt", - ] diff --git a/mindsdb/integrations/handlers/confluence_handler/connection_args.py b/mindsdb/integrations/handlers/confluence_handler/connection_args.py deleted file mode 100644 index 52734cda9bf..00000000000 --- a/mindsdb/integrations/handlers/confluence_handler/connection_args.py +++ /dev/null @@ -1,32 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - api_base={ - "type": ARG_TYPE.URL, - "description": "The base URL of the Confluence instance/server.", - "label": "Base URL", - "required": True - }, - username={ - "type": ARG_TYPE.STR, - "description": "The username for the Confluence account.", - "label": "Username", - "required": True - }, - password={ - "type": ARG_TYPE.STR, - "description": "The API token for the Confluence account.", - "label": "Password", - "required": True, - "secret": True - } -) - -connection_args_example = OrderedDict( - api_base="https://marios.atlassian.net/", - username="your_username", - password="access_token" -) diff --git a/mindsdb/integrations/handlers/confluence_handler/icon.svg b/mindsdb/integrations/handlers/confluence_handler/icon.svg deleted file mode 100644 index a93cf859ca0..00000000000 --- a/mindsdb/integrations/handlers/confluence_handler/icon.svg +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/couchbase_handler/README.md b/mindsdb/integrations/handlers/couchbase_handler/README.md deleted file mode 100644 index ce83f31b6c5..00000000000 --- a/mindsdb/integrations/handlers/couchbase_handler/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# Couchbase Handler - -This is the implementation of the Couchbase handler for MindsDB. - -## Implementation - -This handler was implemented using the `couchbase` library, the Python driver for Couchbase. - -The required arguments to establish a connection are: - -* `connection_string`: the connection string for the endpoint of the Couchbase server -* `bucket`: the bucket name to use when connecting with the Couchbase server -* `user`: the user to authenticate with the Couchbase server -* `password`: the password to authenticate the user with the Couchbase server -* `scope`: scopes are a level of data organization within a bucket. If omitted, will default to `_default` - -Note: The connection string expects either the couchbases:// or couchbase:// protocol. - -If you are using Couchbase Capella, you can find the `connection_string` under the Connect tab. -It will also be required to whitelist the machine(s) that will be running MindsDB and database credentials will need to be created for the user. These steps can also be taken under the Connect tab. - -## Usage - -In order to make use of this handler and connect to a Couchbase server in MindsDB, the following syntax can be used. Note, the example uses the default `travel-sample` bucket which can be enabled from the couchbase UI with pre-defined scope and documents. - -```sql -CREATE DATABASE couchbase_datasource -WITH -engine='couchbase', -parameters={ - "connection_string": "couchbase://localhost", - "bucket":"travel-sample", - "user": "admin", - "password": "password", - "scope": "inventory" -}; -``` - -Now, you can use this established connection to query your database as follows: - -```sql -SELECT * FROM couchbase_datasource.airport -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/couchbase_handler/__about__.py b/mindsdb/integrations/handlers/couchbase_handler/__about__.py deleted file mode 100644 index a7b89dcb1f5..00000000000 --- a/mindsdb/integrations/handlers/couchbase_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Couchbase handler" -__package_name__ = "mindsdb_couchbase_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Couchbase" -__author__ = "MindsDB Inc" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/couchbase_handler/__init__.py b/mindsdb/integrations/handlers/couchbase_handler/__init__.py deleted file mode 100644 index 6d55cc5633f..00000000000 --- a/mindsdb/integrations/handlers/couchbase_handler/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .connection_args import connection_args, connection_args_example -try: - from .couchbase_handler import CouchbaseHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = "Couchbase" -name = "couchbase" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/couchbase_handler/connection_args.py b/mindsdb/integrations/handlers/couchbase_handler/connection_args.py deleted file mode 100644 index acdbfa2a3bc..00000000000 --- a/mindsdb/integrations/handlers/couchbase_handler/connection_args.py +++ /dev/null @@ -1,31 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - "type": ARG_TYPE.STR, - "description": "The user name used to authenticate with the Couchbase server.", - }, - password={ - "type": ARG_TYPE.PWD, - "description": "The password to authenticate the user with the Couchbase server.", - 'secret': True - }, - bucket={ - "type": ARG_TYPE.STR, - "description": "The database/bucket name to use when connecting with the Couchbase server.", - }, - connection_string={ - "type": ARG_TYPE.STR, - "description": "the Connection string to specify the cluster endpoint.", - }, - scope={ - "type": ARG_TYPE.STR, - "description": 'The scope use in the query context in Couchbase server. If blank, scope will be "_default".', - }, -) -connection_args_example = OrderedDict( - connection_string="couchbase://localhost", user="root", password="password", bucket="bucket" -) diff --git a/mindsdb/integrations/handlers/couchbase_handler/couchbase_handler.py b/mindsdb/integrations/handlers/couchbase_handler/couchbase_handler.py deleted file mode 100644 index 94afd2e861b..00000000000 --- a/mindsdb/integrations/handlers/couchbase_handler/couchbase_handler.py +++ /dev/null @@ -1,208 +0,0 @@ -from datetime import timedelta - -import pandas as pd -from couchbase.auth import PasswordAuthenticator -from couchbase.cluster import Cluster -from couchbase.exceptions import UnAmbiguousTimeoutException -from couchbase.options import ClusterOptions -from couchbase.exceptions import KeyspaceNotFoundException, CouchbaseException - -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.utilities import log -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) - - -logger = log.getLogger(__name__) - - -class CouchbaseHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Couchbase statements. - """ - - name = "couchbase" - DEFAULT_TIMEOUT_SECONDS = 5 - - def __init__(self, name, **kwargs): - super().__init__(name) - self.connection_data = kwargs.get("connection_data") - - self.scope = self.connection_data.get("scope") or "_default" - - self.bucket_name = self.connection_data.get("bucket") - self.cluster = None - - self.is_connected = False - - def connect(self): - """ - Set up connections required by the handler. - - Returns: - The connected cluster. - """ - if self.is_connected: - return self.cluster - - auth = PasswordAuthenticator( - self.connection_data.get("user"), - self.connection_data.get("password"), - # NOTE: If using SSL/TLS, add the certificate path. - # We strongly reccomend this for production use. - # cert_path=cert_path - ) - - options = ClusterOptions(auth) - - conn_str = self.connection_data.get("connection_string") - # wan_development is used to avoid latency issues while connecting to Couchbase over the internet - options.apply_profile('wan_development') - # connect to the cluster - cluster = Cluster( - conn_str, - options, - ) - - try: - # wait until the cluster is ready for use - cluster.wait_until_ready(timedelta(seconds=self.DEFAULT_TIMEOUT_SECONDS)) - self.is_connected = cluster.connected - self.cluster = cluster - except UnAmbiguousTimeoutException: - self.is_connected = False - raise - - return self.cluster - - def disconnect(self): - """Close any existing connections - Should switch self.is_connected. - """ - if self.is_connected is False: - return - self.is_connected = self.cluster.connected - return - - def check_connection(self) -> StatusResponse: - """ - Check the connection of the Couchbase bucket - :return: success status and error message if error occurs - """ - result = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - cluster = self.connect() - result.success = cluster.connected - except UnAmbiguousTimeoutException as e: - logger.error( - f'Error connecting to Couchbase {self.connection_data["bucket"]}, {e}!' - ) - result.error_message = str(e) - - if result.success is True and need_to_close: - self.disconnect() - if result.success is False and self.is_connected is True: - self.is_connected = False - return result - - def native_query(self, query: str) -> Response: - """Execute a raw query against Couchbase. - - Args: - query (str): Raw Couchbase query. - - Returns: - HandlerResponse containing query results. - """ - self.connect() - bucket = self.cluster.bucket(self.bucket_name) - cb = bucket.scope(self.scope) - - data = {} - try: - for collection in cb.query(query): - for collection_name, row in collection.items(): - if isinstance(row, dict): - for k, v in row.items(): - data.setdefault(k, []).append(v) - else: - for k, v in collection.items(): - data.setdefault(k, []).append(v) - - response = Response( - RESPONSE_TYPE.TABLE, pd.DataFrame(data) if data else RESPONSE_TYPE.OK - ) - except CouchbaseException as e: - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e.error_context.first_error_message), - ) - - return response - - def query(self, query: ASTNode) -> Response: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - return self.native_query(query.to_string()) - - def get_tables(self) -> Response: - """ - Get a list of collections in database - """ - cluster = self.connect() - bucket = cluster.bucket(self.bucket_name) - unique_collections = set() - for scope in bucket.collections().get_all_scopes(): - for collection in scope.collections: - unique_collections.add(collection.name) - collections = list(unique_collections) - df = pd.DataFrame(collections, columns=["TABLE_NAME"]) - response = Response(RESPONSE_TYPE.TABLE, df) - - return response - - def get_columns(self, table_name) -> Response: - """Returns a list of entity columns - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse: shoud have same columns as information_schema.columns - (https://dev.mysql.com/doc/refman/8.0/en/information-schema-columns-table.html) - Column 'COLUMN_NAME' is mandatory, other is optional. Hightly - recomended to define also 'DATA_TYPE': it should be one of - python data types (by default it str). - """ - - response = Response(False) - - cluster = self.connect() - bucket = cluster.bucket(self.bucket_name) - cb = bucket.scope(self.scope) - - try: - q = f"SELECT * FROM `{table_name}` limit 1" - row_iter = cb.query(q) - data = [] - for row in row_iter: - for k, v in row[table_name].items(): - data.append([k, type(v).__name__]) - df = pd.DataFrame(data, columns=["Field", "Type"]) - response = Response(RESPONSE_TYPE.TABLE, df) - except KeyspaceNotFoundException as e: - response = Response( - RESPONSE_TYPE.ERROR, - error_message=f"Error: {e.error_context.first_error_message}", - ) - - return response diff --git a/mindsdb/integrations/handlers/couchbase_handler/icon.svg b/mindsdb/integrations/handlers/couchbase_handler/icon.svg deleted file mode 100644 index e3a87c34b92..00000000000 --- a/mindsdb/integrations/handlers/couchbase_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/couchbase_handler/requirements.txt b/mindsdb/integrations/handlers/couchbase_handler/requirements.txt deleted file mode 100644 index 9a154260511..00000000000 --- a/mindsdb/integrations/handlers/couchbase_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -couchbase==4.3.1 \ No newline at end of file diff --git a/mindsdb/integrations/handlers/couchbase_handler/tests/__init__.py b/mindsdb/integrations/handlers/couchbase_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/couchbase_handler/tests/test_couchbase_handler.py b/mindsdb/integrations/handlers/couchbase_handler/tests/test_couchbase_handler.py deleted file mode 100644 index 7337b9db7be..00000000000 --- a/mindsdb/integrations/handlers/couchbase_handler/tests/test_couchbase_handler.py +++ /dev/null @@ -1,39 +0,0 @@ -import unittest - -from mindsdb.integrations.handlers.couchbase_handler.couchbase_handler import ( - CouchbaseHandler, -) -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class CouchbaseHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - connection_data = { - "host": "192.168.33.10", - "user": "admin", - "password": "00154abs", - "bucket": "bag-bucket", - "scope": "test-scope", # This is optinal, but if ommited will default to _default. - } - cls.kwargs = dict(connection_data=connection_data) - cls.handler = CouchbaseHandler("test_couchbase_handler", **cls.kwargs) - - def test_0_connect(self): - self.handler.check_connection() - - def test_1_get_tables(self): - tbls = self.handler.get_tables() - assert tbls.type is not RESPONSE_TYPE.ERROR - - def test_2_get_column(self): - tbls = self.handler.get_columns("onsale") - assert tbls.type is not RESPONSE_TYPE.ERROR - - def test_3_native_query_select(self): - tbls = self.handler.native_query("SELECT * FROM onsale") - assert tbls.type is not RESPONSE_TYPE.ERROR - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/couchbasevector_handler/README.md b/mindsdb/integrations/handlers/couchbasevector_handler/README.md deleted file mode 100644 index eec6f5ffe6b..00000000000 --- a/mindsdb/integrations/handlers/couchbasevector_handler/README.md +++ /dev/null @@ -1,103 +0,0 @@ -# Couchbase Vectore Store Handler - -This is the implementation of the Couchbase Vector store data handler for MindsDB. - -## Implementation - -In order to make use of this handler and connect to a Couchbase server in MindsDB, the following syntax can be used. Note, that the example uses the default `travel-sample` bucket which can be enabled from the couchbase UI with pre-defined scope and documents. - -```sql -CREATE DATABASE couchbase_vectorsource -WITH -engine='couchbasevector', -parameters={ - "connection_string": "couchbase://localhost", - "bucket": "travel-sample", - "user": "admin", - "password": "password", - "scope": "inventory" -}; -``` - -This handler is implemented using the `couchbase` library, the Python driver for Couchbase. - -The required arguments to establish a connection are as follows: -* `connection_string`: the connection string for the endpoint of the Couchbase server -* `bucket`: the bucket name to use when connecting with the Couchbase server -* `user`: the user to authenticate with the Couchbase server -* `password`: the password to authenticate the user with the Couchbase server -* `scope`: scopes are a level of data organization within a bucket. If omitted, will default to `_default` - -Note: The connection string expects either the couchbases:// or couchbase:// protocol. - - -If you are using Couchbase Capella, you can find the connection_string under the Connect tab. -It will also be required to whitelist the machine(s) that will be running MindsDB and database credentials will need to be created for the user. These steps can also be taken under the Connect tab. - - -## Usage - -Now, you can use the established connection to create a collection (or table in the context of MindsDB) in Couchbase and insert data into it: - -### Creating tables - -Now, you can use the established connection to create a collection (or table in the context of MindsDB) in Couchbase and insert data into it: - -```sql -CREATE TABLE couchbase_vectorsource.test_embeddings ( - SELECT embeddings - FROM mysql_datasource.test_embeddings -); -``` - - -`mysql_datasource` is another MindsDB data source that has been created by connecting to a MySQL database. The `test_embeddings` table in the `mysql_datasource` data source contains the embeddings that we want to store in Couchbase. - - -### Querying and searching - -You can query your collection (table) as shown below: - -```sql -SELECT * -FROM couchbase_vectorsource.test_embeddings; -``` - -To filter the data in your collection (table) by metadata, you can use the following query: - -```sql -SELECT * -FROM couchbase_vectorsource.test_embeddings -WHERE id = "some_id"; - -``` - -To perform a vector search, the following query can be used: - -```sql -SELECT * -FROM couchbase_vectorsource.test_embeddings -WHERE embeddings = ( - SELECT embeddings - FROM mysql_datasource.test_embeddings - LIMIT 1 -); -``` - -### Deleting records - -You can delete documents using `DELETE` just like in SQL. - - -```sql -DELETE FROM couchbase_vectorsource.test_embeddings -WHERE `metadata.test` = 'test1'; -``` - -### Dropping connection - -To drop the connection, use this command - -```sql -DROP DATABASE couchbase_vectorsource; -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/couchbasevector_handler/__about__.py b/mindsdb/integrations/handlers/couchbasevector_handler/__about__.py deleted file mode 100644 index 57e1e1c6967..00000000000 --- a/mindsdb/integrations/handlers/couchbasevector_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Couchbase Vector handler" -__package_name__ = "mindsdb_couchbasevector_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Couchbase as a Vector Store" -__author__ = "MindsDB Inc" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/couchbasevector_handler/__init__.py b/mindsdb/integrations/handlers/couchbasevector_handler/__init__.py deleted file mode 100644 index 67ead5066b1..00000000000 --- a/mindsdb/integrations/handlers/couchbasevector_handler/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .connection_args import connection_args, connection_args_example -try: - from .couchbasevector_handler import CouchbaseVectorHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = "CouchbaseVector" -name = "couchbasevector" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/couchbasevector_handler/connection_args.py b/mindsdb/integrations/handlers/couchbasevector_handler/connection_args.py deleted file mode 100644 index 6781f9afe03..00000000000 --- a/mindsdb/integrations/handlers/couchbasevector_handler/connection_args.py +++ /dev/null @@ -1,31 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - "type": ARG_TYPE.STR, - "description": "The user name used to authenticate with the Couchbase server.", - }, - password={ - "type": ARG_TYPE.PWD, - "description": "The password to authenticate the user with the Couchbase server.", - 'secret': True - }, - bucket={ - "type": ARG_TYPE.STR, - "description": "The database/bucket name to use when connecting with the Couchbase server.", - }, - connection_string={ - "type": ARG_TYPE.STR, - "description": "the Connection string to specify the cluster endpoint.", - }, - scope={ - "type": ARG_TYPE.STR, - "description": 'The scope use in the query context in Couchbase server. If blank, scope will be "_default".', - }, -) -connection_args_example = OrderedDict( - connection_string="couchbase://localhost", user="root", password="password", bucket="bucket", scope="example_scope" -) diff --git a/mindsdb/integrations/handlers/couchbasevector_handler/couchbasevector_handler.py b/mindsdb/integrations/handlers/couchbasevector_handler/couchbasevector_handler.py deleted file mode 100644 index 7a1c29e7021..00000000000 --- a/mindsdb/integrations/handlers/couchbasevector_handler/couchbasevector_handler.py +++ /dev/null @@ -1,472 +0,0 @@ -import ast -from datetime import timedelta -import uuid - -import pandas as pd -from couchbase.auth import PasswordAuthenticator -from couchbase.cluster import Cluster -from couchbase.exceptions import UnAmbiguousTimeoutException -from couchbase.options import ClusterOptions -from couchbase.exceptions import CouchbaseException -from typing import List, Union - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) -from mindsdb.integrations.libs.vectordatabase_handler import ( - FilterCondition, - TableField, - VectorStoreHandler, -) - - -logger = log.getLogger(__name__) - - -class CouchbaseVectorHandler(VectorStoreHandler): - """ - This handler handles connection and execution of the Couchbase statements. - """ - - name = "couchbasevector" - DEFAULT_TIMEOUT_SECONDS = 5 - - def __init__(self, name, **kwargs): - super().__init__(name) - self.connection_data = kwargs.get("connection_data") - - self.scope = self.connection_data.get("scope") or "_default" - - self.bucket_name = self.connection_data.get("bucket") - self.cluster = None - - self.is_connected = False - - def connect(self): - """ - Set up connections required by the handler. - - Returns: - The connected cluster. - """ - if self.is_connected: - return self.cluster - - auth = PasswordAuthenticator( - self.connection_data.get("user"), - self.connection_data.get("password"), - # NOTE: If using SSL/TLS, add the certificate path. - # We strongly reccomend this for production use. - # cert_path=cert_path - ) - - options = ClusterOptions(auth) - - conn_str = self.connection_data.get("connection_string") - # wan_development is used to avoid latency issues while connecting to Couchbase over the internet - options.apply_profile("wan_development") - # connect to the cluster - cluster = Cluster( - conn_str, - options, - ) - - try: - # wait until the cluster is ready for use - cluster.wait_until_ready( - timedelta(seconds=self.DEFAULT_TIMEOUT_SECONDS) - ) - self.is_connected = cluster.connected - self.cluster = cluster - except UnAmbiguousTimeoutException: - self.is_connected = False - raise - - return self.cluster - - def disconnect(self): - """Close any existing connections - Should switch self.is_connected. - """ - if self.is_connected is False: - return - self.is_connected = self.cluster.connected - return - - def check_connection(self) -> StatusResponse: - """ - Check the connection of the Couchbase bucket - :return: success status and error message if error occurs - """ - result = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - cluster = self.connect() - result.success = cluster.connected - except UnAmbiguousTimeoutException as e: - logger.error( - f'Error connecting to Couchbase {self.connection_data["bucket"]}, {e}!' - ) - result.error_message = str(e) - - if result.success is True and need_to_close: - self.disconnect() - if result.success is False and self.is_connected is True: - self.is_connected = False - return result - - def _translate_conditions( - self, conditions: List[FilterCondition] - ) -> Union[dict, None]: - """ - Translate filter conditions to a dictionary - """ - if conditions is None: - return {} - - return { - condition.column: { - "op": condition.op.value, - "value": condition.value, - } - for condition in conditions - } - - def _construct_full_after_from_query( - self, - where_query: str, - limit_query: str, - offset_query: str, - search_query: str, - ) -> str: - - return f"{where_query} {search_query} {limit_query} {offset_query} " - - def _construct_where_query(self, filter_conditions=None): - """ - Construct where querys from filter conditions - """ - if filter_conditions is None: - return "" - - where_querys = [] - metadata_conditions = { - key: value - for key, value in filter_conditions.items() - if not key.startswith(TableField.EMBEDDINGS.value) - } - for key, value in metadata_conditions.items(): - if value["op"].lower() == "in": - values = list(repr(i) for i in value["value"]) - value["value"] = "({})".format(", ".join(values)) - else: - value["value"] = repr(value["value"]) - where_querys.append(f'{key} {value["op"]} {value["value"]}') - - if len(where_querys) > 1: - return f"WHERE {' AND '.join(where_querys)}" - elif len(where_querys) == 1: - return f"WHERE {where_querys[0]}" - else: - return "" - - def _construct_search_query( - self, table_name: str, field: str, vector: list, k: int, condition: str - ): - """ - Construct a SEARCH query for KNN - :param table_name: Name of the table - :param field: The field on which to perform the search (e.g., embeddings) - :param vector: The vector to search against - :param k: The number of nearest neighbors to return, default: 2 - :return: The SEARCH query as a string - """ - k_value = k if k is not None else 2 - search_query = f""" - {condition} SEARCH({table_name}, {{ - "fields": ["*"], - "query": {{ - "match_none": "" - }}, - "knn": [ - {{ - "k": {k_value}, - "field": "{field}", - "vector": {vector} - }} - ] - }}) - """ - return search_query.strip() - - def select( - self, - table_name: str, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - ) -> pd.DataFrame: - filter_conditions = self._translate_conditions(conditions) - cluster = self.connect() - bucket = cluster.bucket(self.bucket_name) - scope = bucket.scope(self.scope) - documents, metadatas, embeddings = [], [], [] - - vector_filter = ( - next( - ( - condition - for condition in conditions - if condition.column == TableField.EMBEDDINGS.value - ), - None, - ) - if conditions - else None - ) - limit_query = f"LIMIT {limit}" if limit else "" - offset_query = f"OFFSET {offset}" if offset else "" - if vector_filter: - vector = vector_filter.value - if not isinstance(vector, list): - vector = ast.literal_eval(vector) - - where_query = self._construct_where_query(filter_conditions) - if where_query == "": - search_query = self._construct_search_query( - table_name, - TableField.EMBEDDINGS.value, - vector_filter.value, - limit, - "WHERE", - ) - else: - search_query = self._construct_search_query( - table_name, - TableField.EMBEDDINGS.value, - vector_filter.value, - limit, - "AND", - ) - after_from_query = self._construct_full_after_from_query( - where_query, limit_query, offset_query, search_query - ) - - if columns is None: - targets = "id, content, embeddings, metadata" - else: - targets = ", ".join(columns) - query = f"SELECT SEARCH_SCORE() AS score, {targets} FROM {table_name} {after_from_query}" - try: - result = scope.query(query) - except CouchbaseException as e: - raise Exception(f"Error while executing query: '{e}'") - - # Process results - ids, documents, distances = [], [], [] - for hit in result.rows(): - ids.append(hit.get("id", "")) - documents.append(hit.get("content", "")) - embeddings.append(hit.get("embeddings", [])) - metadatas.append(hit.get("metadata", {})) - distances.append(hit.get("score", "")) - else: - - where_query = self._construct_where_query(filter_conditions) - after_from_query = self._construct_full_after_from_query( - where_query, limit_query, offset_query, "" - ) - - if columns is None: - targets = "id, content, embeddings, metadata" - else: - targets = ", ".join(columns) - - query = f"SELECT {targets} FROM {table_name} {after_from_query}" - try: - result = scope.query(query) - except CouchbaseException as e: - raise Exception(f"Error while executing query: '{e}'") - - ids = [] - documents = [] - for hit in result.rows(): - ids.append(hit.get("id", "")) - documents.append(hit.get("content", "")) - embeddings.append(hit.get("embeddings", [])) - metadatas.append(hit.get("metadata", {})) - - distances = None - - # Prepare the payload - payload = { - TableField.ID.value: ids, - TableField.CONTENT.value: [doc for doc in documents], - TableField.METADATA.value: [doc for doc in metadatas], - TableField.EMBEDDINGS.value: [doc for doc in embeddings], - } - if columns: - payload = { - column: payload[column] - for column in columns - if column in payload - } - if distances is not None: - payload[TableField.DISTANCE.value] = distances - return pd.DataFrame(payload) - - def insert(self, table_name: str, data: pd.DataFrame) -> Response: - """ - Insert data into Couchbase. - """ - - data.dropna(axis=1, inplace=True) - # Convert DataFrame to list of dictionaries - records = data.to_dict(orient="records") - cluster = self.connect() - bucket = cluster.bucket(self.bucket_name) - scope = bucket.scope(self.scope) - collection = scope.collection(table_name) - - for record in records: - doc_id = record.get(TableField.ID.value, str(uuid.uuid4())) - document = {TableField.ID.value: doc_id} - - if TableField.CONTENT.value in record: - document[TableField.CONTENT.value] = record[ - TableField.CONTENT.value - ] - - if TableField.EMBEDDINGS.value in record: - document[TableField.EMBEDDINGS.value] = record[ - TableField.EMBEDDINGS.value - ] - if not isinstance(document[TableField.EMBEDDINGS.value], list): - document[TableField.EMBEDDINGS.value] = ast.literal_eval( - document[TableField.EMBEDDINGS.value] - ) - - if TableField.METADATA.value in record: - document[TableField.METADATA.value] = record[ - TableField.METADATA.value - ] - document_key = f"{table_name}::{doc_id}" - - collection.upsert(document_key, document) - return Response(resp_type=RESPONSE_TYPE.OK) - - def upsert(self, table_name: str, data: pd.DataFrame): - return self.insert(table_name, data) - - def update( - self, - table_name: str, - data: pd.DataFrame, - key_columns: List[str] = None, - ): - """ - Update data in Couchbase. - """ - # Convert DataFrame to list of dictionaries - records = data.to_dict(orient="records") - cluster = self.connect() - bucket = cluster.bucket(self.bucket_name) - scope = bucket.scope(self.scope) - collection = scope.collection(table_name) - try: - for record in records: - doc_id = record.get(TableField.ID.value) - if doc_id: - existing_doc = self.collection.get(doc_id) - if existing_doc: - updated_doc = existing_doc.content - updated_doc.update(record) - collection.replace(doc_id, updated_doc) - except CouchbaseException as e: - raise Exception(f"Error while updating document: '{e}'") - - def delete( - self, table_name: str, conditions: List[FilterCondition] = None - ): - """ - Delete documents in Couchbase based on conditions. - """ - filter_conditions = self._translate_conditions(conditions) - cluster = self.connect() - bucket = cluster.bucket(self.bucket_name) - scope = bucket.scope(self.scope) - where_query = self._construct_where_query(filter_conditions) - - query = f"DELETE FROM {table_name} {where_query}" - try: - _ = scope.query(query) - except CouchbaseException as e: - raise Exception( - f"Error while performing delete query index: '{e}'" - ) - - def create_table(self, table_name: str, if_not_exists=True): - """ - In Couchbase, tables are represented as collections within a bucket. - This method creates a new collection if it doesn't exist. - """ - cluster = self.connect() - bucket = cluster.bucket(self.bucket_name) - try: - bucket.collections().create_collection( - scope_name=self.scope, collection_name=table_name - ) - except Exception as e: - raise Exception(f"Error while creating table: '{e}'") - - def drop_table(self, table_name: str, if_exists=True) -> Response: - """ - Drop a collection in Couchbase. - """ - cluster = self.connect() - bucket = cluster.bucket(self.bucket_name) - scope = bucket.scope(self.scope) - _ = scope.collection(table_name) - try: - bucket.collections().drop_collection(table_name) - return Response(resp_type=RESPONSE_TYPE.OK) - except Exception as e: - return Response(resp_type=RESPONSE_TYPE.ERROR, error_message=e) - - def get_tables(self) -> Response: - """ - Get the list of collections in the Couchbase bucket. - """ - cluster = self.connect() - bucket = cluster.bucket(self.bucket_name) - collections = bucket.collections().get_all_scopes() - collection_names = [ - coll.name for scope in collections for coll in scope.collections - ] - collections_df = pd.DataFrame( - columns=["table_name"], data=collection_names - ) - return Response( - resp_type=RESPONSE_TYPE.TABLE, data_frame=collections_df - ) - - def get_columns(self, table_name: str) -> Response: - """ - Get the columns (fields) of a Couchbase collection. - """ - try: - cluster = self.connect() - bucket = cluster.bucket(self.bucket_name) - scope = bucket.scope(self.scope) - _ = scope.collection(table_name) - except Exception: - return Response( - resp_type=RESPONSE_TYPE.ERROR, - error_message=f"Table {table_name} does not exist!", - ) - return super().get_columns(table_name) diff --git a/mindsdb/integrations/handlers/couchbasevector_handler/icon.svg b/mindsdb/integrations/handlers/couchbasevector_handler/icon.svg deleted file mode 100644 index e3a87c34b92..00000000000 --- a/mindsdb/integrations/handlers/couchbasevector_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/couchbasevector_handler/requirements.txt b/mindsdb/integrations/handlers/couchbasevector_handler/requirements.txt deleted file mode 100644 index 9a154260511..00000000000 --- a/mindsdb/integrations/handlers/couchbasevector_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -couchbase==4.3.1 \ No newline at end of file diff --git a/mindsdb/integrations/handlers/couchbasevector_handler/tests/__init__.py b/mindsdb/integrations/handlers/couchbasevector_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/crate_handler/README.md b/mindsdb/integrations/handlers/crate_handler/README.md deleted file mode 100644 index b715ebe25a4..00000000000 --- a/mindsdb/integrations/handlers/crate_handler/README.md +++ /dev/null @@ -1,39 +0,0 @@ -# Crate DB Handler - -This is the implementation of the Crate DB handler for MindsDB. - -## Crate DB -CrateDB is a distributed SQL database management system that integrates a fully searchable document-oriented data store. It is open-source, written in Java, based on a shared-nothing architecture, and designed for high scalability. CrateDB includes components from Trino, Lucene, Elasticsearch and Netty. - - -## Implementation -This handler was implemented using the `crate`, a Python library that allows you to use Python code to run SQL commands on Crate DB. - -The required arguments to establish a connection are, -* `user`: username associated with database -* `password`: password to authenticate your access -* `host`: host to server IP Address or hostname -* `port`: port through which connection is to be made. -* `schema_name`: schema name to get tables. - - _Note : DefaulT value of schema_name is 'doc'_ - -## Usage -In order to make use of this handler and connect to Crate DB in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE crate_datasource -WITH -engine='crate', -parameters={ - "user":"crate", - "password":"", - "host":"127.0.0.1", - "port":4200, - "schema_name":"doc" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM crate_datasource.demo; -~~~~ diff --git a/mindsdb/integrations/handlers/crate_handler/__about__.py b/mindsdb/integrations/handlers/crate_handler/__about__.py deleted file mode 100644 index b5f509144d2..00000000000 --- a/mindsdb/integrations/handlers/crate_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Crate DB handler" -__package_name__ = "mindsdb_cratedb_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Crate DB" -__author__ = "Parthiv Makwana" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/crate_handler/__init__.py b/mindsdb/integrations/handlers/crate_handler/__init__.py deleted file mode 100644 index 9927647b7af..00000000000 --- a/mindsdb/integrations/handlers/crate_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -try: - from .crate_handler import CrateHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = "CrateDB" -name = "crate" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/crate_handler/crate_handler.py b/mindsdb/integrations/handlers/crate_handler/crate_handler.py deleted file mode 100644 index 0cd17c61f89..00000000000 --- a/mindsdb/integrations/handlers/crate_handler/crate_handler.py +++ /dev/null @@ -1,222 +0,0 @@ -from collections import OrderedDict -from typing import Optional -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) -from mindsdb.integrations.libs.const import ( - HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE, -) - - -import pandas as pd -from crate import client as db -from sqlalchemy_cratedb import dialect - -logger = log.getLogger(__name__) - - -class CrateHandler(DatabaseHandler): - name = "crate" - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """Initialize the handler - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - self.kwargs = kwargs - self.parser = parse_sql - self.dialect = "crate" - self.user = connection_data["user"] - self.password = connection_data["password"] - self.schemaName = connection_data.get("schema_name", "doc") - self.host = connection_data["host"] - self.port = connection_data["port"] - - self.connection = None - self.is_connected = False - - def connect(self): - """Set up any connections required by the handler - Should return output of check_connection() method after attempting - connection. Should switch self.is_connected. - Returns: - Connection Object - """ - if self.is_connected: - return self.connection - - is_local = self.host.startswith("localhost") or self.host == "127.0.0.1" - - try: - # Build URL based on connection type - protocol = "http" if is_local else "https" - url = f"{protocol}://{self.user}:{self.password}@{self.host}:{self.port}" - - # Connect with appropriate settings based on connection type - self.connection = db.connect( - url, - timeout=30, - # Only verify SSL for cloud connections - verify_ssl_cert=not is_local, - ) - - self.is_connected = True - except Exception as e: - logger.error(f"Error while connecting to CrateDB: {e}") - - return self.connection - - def disconnect(self): - """Close any existing connections - Should switch self.is_connected. - """ - - if self.is_connected is False: - return - try: - self.connection.close() - self.is_connected = False - except Exception as e: - logger.error(f"Error while disconnecting to CrateDB, {e}") - - return - - def check_connection(self) -> StatusResponse: - """Check connection to the handler - Returns: - HandlerStatusResponse - """ - - responseCode = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - responseCode.success = True - except Exception as e: - logger.error(f"Error connecting to CrateDB, {e}!") - responseCode.error_message = str(e) - finally: - if responseCode.success is True and need_to_close: - self.disconnect() - if responseCode.success is False and self.is_connected is True: - self.is_connected = False - - return responseCode - - def native_query(self, query: str) -> StatusResponse: - """Receive raw query and act upon it somehow. - Args: - query (Any): query in native format (str for sql databases, - etc) - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - conn = self.connect() - cur = conn.cursor() - try: - cur.execute(query) - if cur.rowcount: - result = cur.fetchall() - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame(result, columns=[x[0] for x in cur.description]), - ) - else: - response = Response(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f"Error running query: {query} on CrateDB!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - cur.close() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - - renderer = SqlalchemyRender(dialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """Return list of entities - Return list of entities that will be accesible as tables. - Returns: - HandlerResponse: shoud have same columns as information_schema.tables - (https://dev.mysql.com/doc/refman/8.0/en/information-schema-tables-table.html) - Column 'TABLE_NAME' is mandatory, other is optional. - """ - - q = f"SHOW TABLES FROM {self.schemaName};" - result = self.native_query(q) - return result - - def get_columns(self, table_name: str) -> StatusResponse: - """Returns a list of entity columns - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse: shoud have same columns as information_schema.columns - (https://dev.mysql.com/doc/refman/8.0/en/information-schema-columns-table.html) - Column 'COLUMN_NAME' is mandatory, other is optional. Hightly - recomended to define also 'DATA_TYPE': it should be one of - python data types (by default it str). - """ - - q = f"SHOW COLUMNS FROM {table_name};" - result = self.native_query(q) - return result - - -connection_args = OrderedDict( - host={ - "type": ARG_TYPE.STR, - "description": "The host name or IP address of the CrateDB server/database.", - }, - user={ - "type": ARG_TYPE.STR, - "description": "The user name used to authenticate with the CrateDB server.", - }, - password={ - "type": ARG_TYPE.STR, - "description": "The password to authenticate the user with the CrateDB server.", - }, - port={ - "type": ARG_TYPE.INT, - "description": "Specify port to connect CrateDB server", - }, - schemaName={ - "type": ARG_TYPE.STR, - "description": 'Specify the schema name. Note: It is optional DEFAULT is "doc"', - }, -) - -connection_args_example = OrderedDict( - host="127.0.0.1", - port="4200", - password="", - user="crate", -) diff --git a/mindsdb/integrations/handlers/crate_handler/icon.svg b/mindsdb/integrations/handlers/crate_handler/icon.svg deleted file mode 100644 index 6a713cf0c50..00000000000 --- a/mindsdb/integrations/handlers/crate_handler/icon.svg +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/crate_handler/requirements.txt b/mindsdb/integrations/handlers/crate_handler/requirements.txt deleted file mode 100644 index 97ec2bbe8bd..00000000000 --- a/mindsdb/integrations/handlers/crate_handler/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -crate -sqlalchemy-cratedb -urllib3>=2.6.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/mindsdb/integrations/handlers/crate_handler/tests/__init__.py b/mindsdb/integrations/handlers/crate_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/crate_handler/tests/test_crate_handler.py b/mindsdb/integrations/handlers/crate_handler/tests/test_crate_handler.py deleted file mode 100644 index 1300f54d19e..00000000000 --- a/mindsdb/integrations/handlers/crate_handler/tests/test_crate_handler.py +++ /dev/null @@ -1,46 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.crate_handler.crate_handler import CrateHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class CrateHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": "127.0.0.1", - "port": 4200, - "user": "crate", - "password": "", - } - cls.handler = CrateHandler("test_crate_handler", cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_drop_table(self): - res = self.handler.query("DROP TABLE IF EXISTS PREM;") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_2_create_table(self): - res = self.handler.query("CREATE TABLE IF NOT EXISTS PREM (Premi varchar(50));") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_3_insert_table(self): - res = self.handler.query("INSERT INTO PREM VALUES('Radha <3 Krishna');") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_4_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_5_select_query(self): - query = "SELECT * FROM PREM;" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE or RESPONSE_TYPE.OK - - def test_6_check_connection(self): - self.handler.check_connection() - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/d0lt_handler/README.md b/mindsdb/integrations/handlers/d0lt_handler/README.md deleted file mode 100644 index 461f1aced40..00000000000 --- a/mindsdb/integrations/handlers/d0lt_handler/README.md +++ /dev/null @@ -1,39 +0,0 @@ -# D0lt Handler - -This is the implementation of the D0lt handler for MindsDB. - -## Dolt is Git for Data! -Dolt is a single-node and embedded DBMS that incorporates Git-style versioning as a first-class entity. Dolt behaves like Git where it is a content addressable local database where the main objects are tables instead of files. In Dolt, a user creates a database locally. The database contains tables that can be read and updated using SQL. Similar to Git, writes are staged until the user issues a commit. Upon commit, the writes are appended to permanent storage. - -Branch/merge semantics are supported allowing for the tables to evolve at a different pace for multiple users. This allows for loose collaboration on data as well as multiple views on the same core data. Merge conflicts are detected for schema and data conflicts. Data conflicts are cell-based, not line-based. Remote repositories allow for cooperation among repository instances. Clone, push, and pull semantics are all available. - -## Implementation -This handler was implemented using the `mysql-connector`, a Python library that allows you to use Python code to run SQL commands on D0lt Database. - -The required arguments to establish a connection are, -* `user`: username associated with database -* `password`: password to authenticate your access -* `host`: host to server IP Address or hostname -* `port`: port through which TCPIP connection is to be made -* `database`: Database name to be connected - - -## Usage -In order to make use of this handler and connect to D0lt in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE d0lt_datasource -WITH -engine='d0lt', -parameters={ - "user":"root", - "password":"", - "host":"127.0.0.1", - "port":3306, - "database":"information_schema" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM D0lt_datasource.TEST; -~~~~ diff --git a/mindsdb/integrations/handlers/d0lt_handler/__about__.py b/mindsdb/integrations/handlers/d0lt_handler/__about__.py deleted file mode 100644 index ad8d966fd33..00000000000 --- a/mindsdb/integrations/handlers/d0lt_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB D0lt handler" -__package_name__ = "mindsdb_d0lt_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for D0lt" -__author__ = "Parthiv Makwana" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/d0lt_handler/__init__.py b/mindsdb/integrations/handlers/d0lt_handler/__init__.py deleted file mode 100644 index 6dc5ddc4322..00000000000 --- a/mindsdb/integrations/handlers/d0lt_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .d0lt_handler import ( - D0ltHandler as Handler, - ) - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "D0lt" -name = "d0lt" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/d0lt_handler/d0lt_handler.py b/mindsdb/integrations/handlers/d0lt_handler/d0lt_handler.py deleted file mode 100644 index 13c3403dfe5..00000000000 --- a/mindsdb/integrations/handlers/d0lt_handler/d0lt_handler.py +++ /dev/null @@ -1,12 +0,0 @@ -from mindsdb.integrations.handlers.matrixone_handler.matrixone_handler import MatrixOneHandler - - -class D0ltHandler(MatrixOneHandler): - """ - This handler handles connection and execution of the SQL statements With D0lt. - """ - - name = "d0lt" - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/d0lt_handler/icon.svg b/mindsdb/integrations/handlers/d0lt_handler/icon.svg deleted file mode 100644 index 9f4e636ef51..00000000000 --- a/mindsdb/integrations/handlers/d0lt_handler/icon.svg +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/d0lt_handler/requirements.txt b/mindsdb/integrations/handlers/d0lt_handler/requirements.txt deleted file mode 100644 index 3ccd29487cc..00000000000 --- a/mindsdb/integrations/handlers/d0lt_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/matrixone_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/d0lt_handler/tests/__init__.py b/mindsdb/integrations/handlers/d0lt_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/d0lt_handler/tests/test_d0lt_handler.py b/mindsdb/integrations/handlers/d0lt_handler/tests/test_d0lt_handler.py deleted file mode 100644 index 9efd69ce2fe..00000000000 --- a/mindsdb/integrations/handlers/d0lt_handler/tests/test_d0lt_handler.py +++ /dev/null @@ -1,53 +0,0 @@ -import unittest - -from mindsdb.integrations.handlers.d0lt_handler.d0lt_handler import D0ltHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class D0ltHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": "localhost", - "port": 3306, - "user": "root", - "password": "", - "database": "getting_started", - "ssl": False, - } - cls.handler = D0ltHandler("test_mysql_handler", connection_data=cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_drop_table(self): - res = self.handler.query("DROP TABLE IF EXISTS PREM;") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_2_create_table(self): - res = self.handler.query("CREATE TABLE IF NOT EXISTS PREM (Premi varchar(50));") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_3_insert_table(self): - res = self.handler.query("INSERT INTO PREM VALUES('Radha <3 Krishna');") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_4_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_5_select_query(self): - query = "SELECT * FROM PREM;" - result = self.handler.native_query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_6_get_columns(self): - result = self.handler.get_columns("PREM") - assert result.type is not RESPONSE_TYPE.ERROR - - def test_7_check_connection(self): - self.handler.check_connection() - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/databend_handler/README.md b/mindsdb/integrations/handlers/databend_handler/README.md deleted file mode 100644 index 339cfb86538..00000000000 --- a/mindsdb/integrations/handlers/databend_handler/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# Databend Handler - -This is the implementation of the Databend handler for MindsDB. - -## Databend -Databend is a modern cloud data warehouse that empowers your object storage for real-time analytics. -
-https://databend.rs/ - -## Implementation -This handler was implemented using `databend-sqlalchemy` library, the Databend dialect for SQLAlchemy. - -The required arguments to establish a connection are, -* `host`: the host name or IP address of the Databend warehouse. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server. -* `port`: the TCP/IP port of the Databend warehouse. -* `user`: the username used to authenticate with the Databend warehouse. -* `password`: the password to authenticate the user with the Databend warehouse. -* `database`: the database name to use when connecting with the Databend warehouse. - -## Usage -In order to make use of this handler and connect to Databend in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE databend_datasource -WITH -engine='databend', -parameters={ - "user": "root", - "port": 443, - "password": "password", - "host": "some-url.aws-us-east-2.default.databend.com", - "database": "test_db" -}; -~~~~ - -Now, you can use this established connection to query your data warehouse as follows, -~~~~sql -SELECT * FROM databend_datasource.example_tbl -~~~~ \ No newline at end of file diff --git a/mindsdb/integrations/handlers/databend_handler/__about__.py b/mindsdb/integrations/handlers/databend_handler/__about__.py deleted file mode 100644 index 94ad44a3483..00000000000 --- a/mindsdb/integrations/handlers/databend_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Databend handler' -__package_name__ = 'mindsdb_databend_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Databend" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/databend_handler/__init__.py b/mindsdb/integrations/handlers/databend_handler/__init__.py deleted file mode 100644 index 50c6ea0ecf7..00000000000 --- a/mindsdb/integrations/handlers/databend_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .databend_handler import DatabendHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Databend' -name = 'databend' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/databend_handler/connection_args.py b/mindsdb/integrations/handlers/databend_handler/connection_args.py deleted file mode 100644 index a98fdfe0157..00000000000 --- a/mindsdb/integrations/handlers/databend_handler/connection_args.py +++ /dev/null @@ -1,36 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Databend warehouse.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the Databend warehouse.', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the Databend warehouse.' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Databend warehouse. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the ClickHouse server.' - } -) - -connection_args_example = OrderedDict( - host='some-url.aws-us-east-2.default.databend.com', - port=443, - user='root', - password='password', - database='test_db' -) diff --git a/mindsdb/integrations/handlers/databend_handler/databend_handler.py b/mindsdb/integrations/handlers/databend_handler/databend_handler.py deleted file mode 100644 index 6e0e4bd5c89..00000000000 --- a/mindsdb/integrations/handlers/databend_handler/databend_handler.py +++ /dev/null @@ -1,199 +0,0 @@ -from typing import Optional - -import pandas as pd -from databend_sqlalchemy import connector - -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.base import DatabaseHandler -from databend_sqlalchemy.databend_dialect import DatabendDialect - -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -logger = log.getLogger(__name__) - - -class DatabendHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Databend statements. - """ - name = 'databend' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = 'databend' - - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - if self.is_connected is True: - return self.connection - - if self.connection_data['host'] == 'localhost' or self.connection_data['host'] == '127.0.0.1': - ssl_mode = 'disable' - else: - ssl_mode = 'require' - - self.connection = connector.connect( - f"databend://{self.connection_data['user']}:{self.connection_data['password']}@{self.connection_data['host']}:{self.connection_data['port']}/{self.connection_data['database']}?sslmode={ssl_mode}" - ) - self.is_connected = True - - return self.connection - - def disconnect(self): - """ - Close any existing connections. - """ - - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return self.is_connected - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to Databend, {e}!') - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - cursor = connection.cursor() - - try: - cursor.execute(query) - result = cursor.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, - columns=[x[0] for x in cursor.description] - ) - ) - else: - connection.commit() - response = Response(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f'Error running query: {query} on Databend!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - cursor.close() - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - renderer = SqlalchemyRender(DatabendDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - - query = f""" - SHOW TABLES IN {self.connection_data["database"]} - """ - result = self.native_query(query) - df = result.data_frame - - if df is not None: - df = df[[f'Tables_in_{self.connection_data["database"]}']] - result.data_frame = df.rename(columns={f'Tables_in_{self.connection_data["database"]}': 'table_name'}) - - return result - - def get_columns(self, table_name: str) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - - query = f""" - DESC {self.connection_data["database"]}.{table_name} - """ - result = self.native_query(query) - df = result.data_frame - - result.data_frame = df.rename(columns={'Field': 'column_name', 'Type': 'data_type', 'Null': 'is_nullable', 'Default': 'default_value', 'Extra': 'extra'}) - - return result diff --git a/mindsdb/integrations/handlers/databend_handler/icon.svg b/mindsdb/integrations/handlers/databend_handler/icon.svg deleted file mode 100644 index 12e8f231eee..00000000000 --- a/mindsdb/integrations/handlers/databend_handler/icon.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/databend_handler/requirements.txt b/mindsdb/integrations/handlers/databend_handler/requirements.txt deleted file mode 100644 index 98d30d4b98c..00000000000 --- a/mindsdb/integrations/handlers/databend_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -databend-sqlalchemy \ No newline at end of file diff --git a/mindsdb/integrations/handlers/databend_handler/tests/__init__.py b/mindsdb/integrations/handlers/databend_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/databend_handler/tests/test_databend_handler.py b/mindsdb/integrations/handlers/databend_handler/tests/test_databend_handler.py deleted file mode 100644 index 4b843594b4f..00000000000 --- a/mindsdb/integrations/handlers/databend_handler/tests/test_databend_handler.py +++ /dev/null @@ -1,37 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.databend_handler.databend_handler import DatabendHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class DatabendHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - connection_data = { - "host": "some-url.aws-us-east-2.default.databend.com", - "port": 443, - "user": "root", - "password": "password", - "database": "test_db" - } - cls.handler = DatabendHandler('test_databend_handler', connection_data) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_select_query(self): - query = 'SELECT * FROM covid_19_us_2022_4668 LIMIT 10' - result = self.handler.query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tbls = self.handler.get_tables() - assert tbls.type is not RESPONSE_TYPE.ERROR - - def test_3_describe_table(self): - described = self.handler.get_columns("covid_19_us_2022_4668") - print('described', described) - assert described.type is RESPONSE_TYPE.TABLE - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/databricks_handler/__init__.py b/mindsdb/integrations/handlers/databricks_handler/__init__.py index 6fc16a9d5da..9af359b9c82 100644 --- a/mindsdb/integrations/handlers/databricks_handler/__init__.py +++ b/mindsdb/integrations/handlers/databricks_handler/__init__.py @@ -15,7 +15,7 @@ name = "databricks" type = HANDLER_TYPE.DATA icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY +support_level = HANDLER_SUPPORT_LEVEL.MINDSDB __all__ = [ "Handler", diff --git a/mindsdb/integrations/handlers/databricks_handler/databricks_handler.py b/mindsdb/integrations/handlers/databricks_handler/databricks_handler.py index 2feab0a37d4..755308d419b 100644 --- a/mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +++ b/mindsdb/integrations/handlers/databricks_handler/databricks_handler.py @@ -404,7 +404,7 @@ def native_query(self, query: Text) -> Response: try: cursor.execute(query) result = cursor.fetchall() - if result: + if cursor.description: response = Response( RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(result, columns=[x[0] for x in cursor.description]), @@ -465,6 +465,8 @@ def get_tables(self, all: bool = False) -> Response: {all_filter} """ result = self.native_query(query) + if result.resp_type != RESPONSE_TYPE.TABLE or result.data_frame is None: + return result df = result.data_frame result.data_frame = df.rename(columns={col: col.upper() for col in df.columns}) return result diff --git a/mindsdb/integrations/handlers/databricks_handler/requirements.txt b/mindsdb/integrations/handlers/databricks_handler/requirements.txt index 212e52860fc..0137133cc54 100644 --- a/mindsdb/integrations/handlers/databricks_handler/requirements.txt +++ b/mindsdb/integrations/handlers/databricks_handler/requirements.txt @@ -1 +1 @@ -databricks-sql-connector >= 3.7.1, < 4.0.0 +databricks-sql-connector==4.2.3 diff --git a/mindsdb/integrations/handlers/datastax_handler/README.md b/mindsdb/integrations/handlers/datastax_handler/README.md deleted file mode 100644 index 9183360ac1c..00000000000 --- a/mindsdb/integrations/handlers/datastax_handler/README.md +++ /dev/null @@ -1,49 +0,0 @@ -# Datastax Astra DB Handler - -This is the implementation of the Datastax Astra DB handler for MindsDB. - -## Datastax - -DataStax, Inc. is a real-time data company based in Santa Clara, California.[3] Its product Astra DB is a cloud database-as-a-service based on Apache Cassandra. DataStax also offers DataStax Enterprise (DSE), an on-premises database built on Apache Cassandra, and Astra Streaming, a messaging and event streaming cloud service based on Apache Pulsar. - -## Implementation - -Datastax Astra DB is API-compatible with Apache Cassandra and Scylla DB so this handler just extends the ScyllaHandler and is using the python `scylla-driver` library. - -The required arguments to establish a connection are: - -* `user`: the user to authenticate -* `password`: the password to authenticate the user -* `secure_connection_bundle`: Path to the secure_connection_bundle zip file - -## Usage - -In order to make use of this handler and connect to the Astra DB in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE astra_connection -WITH ENGINE = "astra", -PARAMETERS = { - "user": "user", - "password": "pass", - "secure_connect_bundle": "/home/Downloads/file.zip" - } -``` - -or, reference the bundle from Datastax s3 as: - -```sql -CREATE DATABASE astra_connection -WITH ENGINE = "astra", -PARAMETERS = { - "user": "user", - "password": "pass", - "secure_connect_bundle": "https://datastax-cluster-config-prod.s3.us-east-2.amazonaws.com/32312-b9eb-4e09-a641-213eaesa12-1/secure-connect-demo.zip?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AK..." -} -``` - -Now, you can use this established connection to query your database as follows: - -```sql -SELECT * FROM astra_connection.keystore.example_table LIMIT 10; -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/datastax_handler/__about__.py b/mindsdb/integrations/handlers/datastax_handler/__about__.py deleted file mode 100644 index d2c44b57913..00000000000 --- a/mindsdb/integrations/handlers/datastax_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Datastax handler" -__package_name__ = "mindsdb_datastax_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for DataStax Astra DB" -__author__ = "MindsDB Inc" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/datastax_handler/__init__.py b/mindsdb/integrations/handlers/datastax_handler/__init__.py deleted file mode 100644 index 68c2a38ea9d..00000000000 --- a/mindsdb/integrations/handlers/datastax_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -try: - from .datastax_handler import DatastaxHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = "Datastax Astra DB" -name = "astra" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/datastax_handler/datastax_handler.py b/mindsdb/integrations/handlers/datastax_handler/datastax_handler.py deleted file mode 100644 index 3cecb606014..00000000000 --- a/mindsdb/integrations/handlers/datastax_handler/datastax_handler.py +++ /dev/null @@ -1,12 +0,0 @@ -from mindsdb.integrations.handlers.scylla_handler import Handler as ScyllaHandler - - -class DatastaxHandler(ScyllaHandler): - """ - This handler handles connection and execution of the Datastax Astra DB statements. - """ - - name = "astra" - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/datastax_handler/icon.svg b/mindsdb/integrations/handlers/datastax_handler/icon.svg deleted file mode 100644 index 051d7320425..00000000000 --- a/mindsdb/integrations/handlers/datastax_handler/icon.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/datastax_handler/requirements.txt b/mindsdb/integrations/handlers/datastax_handler/requirements.txt deleted file mode 100644 index 45c4777e7b7..00000000000 --- a/mindsdb/integrations/handlers/datastax_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/scylla_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/datastax_handler/tests/__init__.py b/mindsdb/integrations/handlers/datastax_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/datastax_handler/tests/test_cassandra_handler.py b/mindsdb/integrations/handlers/datastax_handler/tests/test_cassandra_handler.py deleted file mode 100644 index 61dcad98adb..00000000000 --- a/mindsdb/integrations/handlers/datastax_handler/tests/test_cassandra_handler.py +++ /dev/null @@ -1,38 +0,0 @@ -import unittest -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.integrations.handlers.datastax_handler.datastax_handler import ( - DatastaxHandler, -) - - -class DatastaxHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "user": "cassandra", - "password": "", - "secure_connect_bundle": "/home/Downloads/file.zip", - } - } - cls.handler = DatastaxHandler("test_datastax_handler", **cls.kwargs) - - def test_0_connect(self): - self.handler.check_connection() - - def test_1_native_query_show_keyspaces(self): - dbs = self.handler.native_query("DESC KEYSPACES;") - assert dbs.type is not RESPONSE_TYPE.ERROR - - def test_2_get_tables(self): - tbls = self.handler.get_tables() - assert tbls.type is not RESPONSE_TYPE.ERROR - - def test_3_describe_table(self): - described = self.handler.get_columns("home_rentals") - assert described.type is RESPONSE_TYPE.TABLE - - def test_4_select_query(self): - query = "SELECT * FROM home_rentals WHERE 'id'='3712'" - result = self.handler.query(query) - assert result.type is RESPONSE_TYPE.TABLE diff --git a/mindsdb/integrations/handlers/db2_handler/README.md b/mindsdb/integrations/handlers/db2_handler/README.md deleted file mode 100644 index 0b29be1b38d..00000000000 --- a/mindsdb/integrations/handlers/db2_handler/README.md +++ /dev/null @@ -1,105 +0,0 @@ ---- -title: IBM Db2 -sidebarTitle: IBM Db2 ---- - -This documentation describes the integration of MindsDB with [IBM Db2](https://www.ibm.com/db2), the cloud-native database built to power low-latency transactions, real-time analytics and AI applications at scale. -The integration allows MindsDB to access data stored in the IBM Db2 database and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect IBM Db2 to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to your IBM Db2 database from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE db2_datasource -WITH - engine = 'db2', - parameters = { - "host": "127.0.0.1", - "user": "db2inst1", - "password": "password", - "database": "example_db" - }; -``` - -Required connection parameters include the following: - -* `host`: The hostname, IP address, or URL of the IBM Db2 database. -* `user`: The username for the IBM Db2 database. -* `password`: The password for the IBM Db2 database. -* `database`: The name of the IBM Db2 database to connect to. - -Optional connection parameters include the following: - -* `port`: The port number for connecting to the IBM Db2 database. Default is `50000`. -* `schema`: The database schema to use within the IBM Db2 database. - -## Usage - -Retrieve data from a specified table by providing the integration name, schema, and table name: - -```sql -SELECT * -FROM db2_datasource.schema_name.table_name -LIMIT 10; -``` - -Run IBM Db2 native queries directly on the connected database: - -```sql -SELECT * FROM db2_datasource ( - - --Native Query Goes Here - WITH - DINFO (DEPTNO, AVGSALARY, EMPCOUNT) AS - (SELECT OTHERS.WORKDEPT, AVG(OTHERS.SALARY), COUNT(*) - FROM EMPLOYEE OTHERS - GROUP BY OTHERS.WORKDEPT - ), - DINFOMAX AS - (SELECT MAX(AVGSALARY) AS AVGMAX FROM DINFO) - SELECT THIS_EMP.EMPNO, THIS_EMP.SALARY, - DINFO.AVGSALARY, DINFO.EMPCOUNT, DINFOMAX.AVGMAX - FROM EMPLOYEE THIS_EMP, DINFO, DINFOMAX - WHERE THIS_EMP.JOB = 'SALESREP' - AND THIS_EMP.WORKDEPT = DINFO.DEPTNO - -); -``` - - -The above examples utilize `db2_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the IBM Db2 database. -* **Checklist**: - 1. Make sure the IBM Db2 database is active. - 2. Confirm that host, user, password and database are correct. Try a direct connection using a client like DBeaver. - 3. Ensure a stable network between MindsDB and the IBM Db2 database. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel-data - * Incorrect: SELECT * FROM integration.'travel-data' - * Correct: SELECT * FROM integration.\`travel-data\` - - -This [guide](https://www.ibm.com/docs/en/db2/11.5?topic=connect-common-db2-problems) of common connection Db2 connection issues provided by IBM might also be helpful. diff --git a/mindsdb/integrations/handlers/db2_handler/__about__.py b/mindsdb/integrations/handlers/db2_handler/__about__.py deleted file mode 100644 index 0ce1cbd928d..00000000000 --- a/mindsdb/integrations/handlers/db2_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB IBM DB2 handler" -__package_name__ = "mindsdb_db2_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for IBM DB2" -__author__ = "Parthiv Makwana" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/db2_handler/__init__.py b/mindsdb/integrations/handlers/db2_handler/__init__.py deleted file mode 100644 index b20ced449dc..00000000000 --- a/mindsdb/integrations/handlers/db2_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .connection_args import connection_args, connection_args_example -try: - from .db2_handler import DB2Handler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = "IBM DB2" -name = "db2" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/db2_handler/connection_args.py b/mindsdb/integrations/handlers/db2_handler/connection_args.py deleted file mode 100644 index d4638b88a06..00000000000 --- a/mindsdb/integrations/handlers/db2_handler/connection_args.py +++ /dev/null @@ -1,53 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - "type": ARG_TYPE.STR, - "description": "The hostname, IP address, or URL of the IBM Db2 database.", - "required": True, - "label": "Host" - }, - database={ - "type": ARG_TYPE.STR, - "description": "The name of the IBM Db2 database to connect to.", - "required": True, - "label": "Database" - }, - user={ - "type": ARG_TYPE.STR, - "description": "The username for the IBM Db2 database.", - "required": True, - "label": "User" - }, - password={ - "type": ARG_TYPE.PWD, - "description": "The password for the IBM Db2 database.", - 'secret': True, - "required": True, - "label": "Password" - }, - port={ - "type": ARG_TYPE.INT, - "description": "The port number for connecting to the IBM Db2 database. Default is `50000`", - "required": False, - "label": "Port" - }, - schema={ - "type": ARG_TYPE.STR, - "description": "The database schema to use within the IBM Db2 database.", - "required": False, - "label": "Schema" - }, -) - -connection_args_example = OrderedDict( - host="127.0.0.1", - port="25000", - password="1234", - user="db2admin", - schema="db2admin", - database="BOOKS", -) diff --git a/mindsdb/integrations/handlers/db2_handler/db2_handler.py b/mindsdb/integrations/handlers/db2_handler/db2_handler.py deleted file mode 100644 index 4ec8b51aa76..00000000000 --- a/mindsdb/integrations/handlers/db2_handler/db2_handler.py +++ /dev/null @@ -1,231 +0,0 @@ -from typing import Text, Dict, Optional, Any - -import ibm_db_dbi -from ibm_db_dbi import OperationalError, ProgrammingError -from ibm_db_sa.ibm_db import DB2Dialect_ibm_db as DB2Dialect -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -import pandas as pd - -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class DB2Handler(DatabaseHandler): - name = "db2" - - def __init__(self, name: Text, connection_data: Optional[Dict], **kwargs: Any) -> None: - """ - Initializes the handler. - Args: - name (Text): The name of the handler instance. - connection_data (Dict): The connection data required to connect to the IBM Db2 database. - kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self) -> None: - """ - Closes the connection when the handler instance is deleted. - """ - if self.is_connected: - self.disconnect() - - def connect(self) -> ibm_db_dbi.Connection: - """ - Establishes a connection to a IBM Db2 database. - - Raises: - ValueError: If the required connection parameters are not provided. - ibm_db_dbi.OperationalError: If an error occurs while connecting to the IBM Db2 database. - - Returns: - ibm_db_dbi.Connection: A connection object to the IBM Db2 database. - """ - if self.is_connected: - return self.connection - - # Mandatory connection parameters. - if not all(key in self.connection_data for key in ["host", "user", "password", "database"]): - raise ValueError("Required parameters (host, user, password, database) must be provided.") - cloud = "databases.appdomain.cloud" in self.connection_data["host"] - if cloud: - connection_string = f"DATABASE={self.connection_data['database']};HOSTNAME={self.connection_data['host']};PORT={self.connection_data['port']};PROTOCOL=TCPIP;UID={self.connection_data['user']};PWD={self.connection_data['password']};SECURITY=SSL;" - connection_string += "SSLSERVERCERTIFICATE=;" - else: - connection_string = f"DRIVER={'IBM DB2 ODBC DRIVER'};DATABASE={self.connection_data['database']};HOST={self.connection_data['host']};PROTOCOL=TCPIP;UID={self.connection_data['user']};PWD={self.connection_data['password']};" - - # Optional connection parameters. - if "port" in self.connection_data: - connection_string += f"PORT={self.connection_data['port']};" - - if "schema" in self.connection_data: - connection_string += f"CURRENTSCHEMA={self.connection_data['schema']};" - - try: - self.connection = ibm_db_dbi.pconnect(connection_string, "", "") - self.is_connected = True - return self.connection - except OperationalError as operational_error: - logger.error(f"Error while connecting to {self.connection_data.get('database')}, {operational_error}!") - raise - except Exception as unknown_error: - logger.error(f"Unknown error while connecting to {self.connection_data.get('database')}, {unknown_error}!") - raise - - def disconnect(self) -> None: - """ - Closes the connection to the IBM Db2 database if it's currently open. - """ - if not self.is_connected: - return - - self.connection.close() - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the IBM Db2 database. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except (OperationalError, ValueError) as known_error: - logger.error(f"Connection check to IBM Db2 failed, {known_error}!") - response.error_message = str(known_error) - except Exception as unknown_error: - logger.error(f"Connection check to IBM Db2 failed due to an unknown error, {unknown_error}!") - response.error_message = str(unknown_error) - - if response.success and need_to_close: - self.disconnect() - - elif not response.success and self.is_connected: - self.is_connected = False - - return response - - def native_query(self, query: Text) -> Response: - """ - Executes a SQL query on the IBM Db2 database and returns the result (if any). - - Args: - query (str): The SQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - need_to_close = self.is_connected is False - - connection = self.connect() - with connection.cursor() as cur: - try: - cur.execute(query) - - if cur._result_set_produced: - result = cur.fetchall() - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame(result, columns=[x[0] for x in cur.description]), - ) - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except (OperationalError, ProgrammingError) as known_error: - logger.error(f"Error running query: {query} on {self.connection_data.get('database')}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(known_error)) - connection.rollback() - - except Exception as unknown_error: - logger.error(f"Unknown error running query: {query} on {self.connection_data.get('database')}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(unknown_error)) - connection.rollback() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Executes a SQL query represented by an ASTNode on the IBM Db2 database and retrieves the data (if any). - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. - """ - renderer = SqlalchemyRender(DB2Dialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Retrieves a list of all non-system tables and views in the current schema of the IBM Db2 database. - - Returns: - Response: A response object containing the list of tables and views, formatted as per the `Response` class. - """ - connection = self.connect() - - result = connection.tables(connection.current_schema) - - tables = [] - for table in result: - tables.append( - { - "TABLE_NAME": table["TABLE_NAME"], - "TABLE_SCHEMA": table["TABLE_SCHEM"], - "TABLE_TYPE": table["TABLE_TYPE"], - } - ) - - response = Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(tables)) - - return response - - def get_columns(self, table_name: Text) -> Response: - """ - Retrieves column details for a specified table in the IBM Db2 database. - - Args: - table_name (Text): The name of the table for which to retrieve column information. - - Raises: - ValueError: If the 'table_name' is not a valid string. - - Returns: - Response: A response object containing the column details. - """ - if not table_name or not isinstance(table_name, str): - raise ValueError("Invalid table name provided.") - - connection = self.connect() - - result = connection.columns(table_name=table_name) - - columns = [column["COLUMN_NAME"] for column in result] - - response = Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(columns, columns=["COLUMN_NAME"])) - - return response diff --git a/mindsdb/integrations/handlers/db2_handler/icon.svg b/mindsdb/integrations/handlers/db2_handler/icon.svg deleted file mode 100644 index 0b3f7b87b29..00000000000 --- a/mindsdb/integrations/handlers/db2_handler/icon.svg +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/db2_handler/requirements.txt b/mindsdb/integrations/handlers/db2_handler/requirements.txt deleted file mode 100644 index 68e6ad5d901..00000000000 --- a/mindsdb/integrations/handlers/db2_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -ibm-db-sa -ibm-db diff --git a/mindsdb/integrations/handlers/db2_handler/tests/__init__.py b/mindsdb/integrations/handlers/db2_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/db2_handler/tests/test_db2_handler.py b/mindsdb/integrations/handlers/db2_handler/tests/test_db2_handler.py deleted file mode 100644 index 69e24ebd85c..00000000000 --- a/mindsdb/integrations/handlers/db2_handler/tests/test_db2_handler.py +++ /dev/null @@ -1,46 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.db2_handler.db2_handler import DB2Handler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class DB2HandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "127.0.0.1", - "port": "25000", - "user": "db2admin", - "password": "1234", - "database": "Books", - "schema_name": "db2admin", - } - } - cls.handler = DB2Handler("test_db2_handler", **cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_drop_table(self): - res = self.handler.query("DROP TABLE IF EXISTS LOVE") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_2_create_table(self): - res = self.handler.query("CREATE TABLE IF NOT EXISTS LOVE (LOVER varchar(20))") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_3_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_4_select_query(self): - query = "SELECT * FROM AUTHORS" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_check_connection(self): - self.handler.check_connection() - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/derby_handler/README.md b/mindsdb/integrations/handlers/derby_handler/README.md deleted file mode 100644 index c8e64e05d89..00000000000 --- a/mindsdb/integrations/handlers/derby_handler/README.md +++ /dev/null @@ -1,40 +0,0 @@ -# Apache Derby Handler - -This is the implementation of the Apache Derby handler for MindsDB. - -## Apache Derby -Apache Derby, an Apache DB subproject, is an open source relational database implemented entirely in Java and available under the Apache License, Version 2.0. - -Some key features include: - -* Derby has a small footprint -- about 3.5 megabytes for the base engine and embedded JDBC driver. -* Derby is based on the Java, JDBC, and SQL standards. -* Derby provides an embedded JDBC driver that lets you embed Derby in any Java-based solution. -* Derby also supports the more familiar client/server mode with the Derby Network Client JDBC driver and Derby Network Server. -* Derby is easy to install, deploy, and use. - - -## Implementation -This handler was implemented using the JDBC drivers provided by Apache Derby. To establish connection with the database, `JayDeBeApi` library is used. The `JayDeBeApi` module allows you to connect from Python code to databases using Java JDBC. It provides a Python DB-API v2.0 to that database. - -The required arguments to establish a connection are, -* `host`: host to server IP Address or hostname -* `port`: port through which TCPIP connection is to be made -* `database`: Database name to be connected - -## Usage -In order to make use of this handler and connect to Apache Derby in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE derby_datasource -WITH engine='derby', -parameters={ - "host": "localhost", - "port": "1527", - "database": "seconddb" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM derby_datasource.TESTTABLE; -~~~~ diff --git a/mindsdb/integrations/handlers/derby_handler/__about__.py b/mindsdb/integrations/handlers/derby_handler/__about__.py deleted file mode 100644 index 9c9b2098834..00000000000 --- a/mindsdb/integrations/handlers/derby_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Apache Derby handler' -__package_name__ = 'mindsdb_derby_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Apache Derby DB" -__author__ = 'Kavel Baruah' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/derby_handler/__init__.py b/mindsdb/integrations/handlers/derby_handler/__init__.py deleted file mode 100644 index bd8cc2b3631..00000000000 --- a/mindsdb/integrations/handlers/derby_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .connection_args import connection_args, connection_args_example -try: - from .derby_handler import DerbyHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = 'Apache Derby DB' -name = 'derby' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/derby_handler/connection_args.py b/mindsdb/integrations/handlers/derby_handler/connection_args.py deleted file mode 100644 index a48f509a1fc..00000000000 --- a/mindsdb/integrations/handlers/derby_handler/connection_args.py +++ /dev/null @@ -1,50 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Apache Derby server/database.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'Specify port to connect to Apache Derby.' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': """ - The database name to use when connecting with the Apache Derby server. - """ - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Apache Derby server. If specified this is also treated as the schema.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the Apache Derby server.', - 'secret': True - }, - jdbcClass={ - 'type': ARG_TYPE.STR, - 'description': 'The jdbc class which should be used to establish the connection, the default value is: org.apache.derby.jdbc.ClientDriver.' - }, - jdbcJarLocation={ - 'type': ARG_TYPE.STR, - 'description': 'The location of the jar files which contain the JDBC class. This need not be specified if the required classes are already added to the CLASSPATH variable.' - } - -) - - -connection_args_example = OrderedDict( - host='localhost', - port='1527', - user='test', - password='test', - database="testdb", - jdbcClass='org.apache.derby.jdbc.ClientDriver', - jdbcJarLocation='/opt/homebrew/Cellar/derby/10.16.1.1/libexec/lib/derby.jar,/opt/homebrew/Cellar/derby/10.16.1.1/libexec/lib/derbyclient.jar,/opt/homebrew/Cellar/derby/10.16.1.1/libexec/lib/derbynet.jar,/opt/homebrew/Cellar/derby/10.16.1.1/libexec/lib/derbyoptionaltools.jar,/opt/homebrew/Cellar/derby/10.16.1.1/libexec/lib/derbyrun.jar,/opt/homebrew/Cellar/derby/10.16.1.1/libexec/lib/derbyshared.jar,/opt/homebrew/Cellar/derby/10.16.1.1/libexec/lib/derbytools.jar', -) diff --git a/mindsdb/integrations/handlers/derby_handler/derby_handler.py b/mindsdb/integrations/handlers/derby_handler/derby_handler.py deleted file mode 100644 index 564265d1348..00000000000 --- a/mindsdb/integrations/handlers/derby_handler/derby_handler.py +++ /dev/null @@ -1,193 +0,0 @@ -from typing import Optional -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) -import pandas as pd -import jaydebeapi as jdbcconnector - -logger = log.getLogger(__name__) - - -class DerbyHandler(DatabaseHandler): - name = "derby" - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """Initialize the handler - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - self.kwargs = kwargs - self.parser = parse_sql - self.database = connection_data["database"] - self.connection_config = connection_data - self.host = connection_data["host"] - self.port = connection_data["port"] - self.schema = "APP" - self.connection = None - self.is_connected = False - - def connect(self): - """Set up any connections required by the handler - Should return output of check_connection() method after attempting - connection. Should switch self.is_connected. - Returns: - Connection Object - """ - if self.is_connected is True: - return self.connection - - user = self.connection_config.get("user") - password = self.connection_config.get("password") - jdbc_class = self.connection_config.get("jdbcClass") - jar_location = self.connection_config.get("jdbcJarLocation") - - jdbc_url = "jdbc:derby://" + self.host + ":" + self.port + "/" + self.database + ";" - - if not jdbc_class: - jdbc_class = "org.apache.derby.jdbc.ClientDriver" - - if user: - self.schema = user - - try: - if user and password and jar_location: - self.connection = jdbcconnector.connect( - jclassname=jdbc_class, url=jdbc_url, driver_args=[user, password], jars=jar_location.split(",") - ) - elif user and password: - self.connection = jdbcconnector.connect( - jclassname=jdbc_class, url=jdbc_url, driver_args=[user, password] - ) - elif jar_location: - self.connection = jdbcconnector.connect( - jclassname=jdbc_class, url=jdbc_url, jars=jar_location.split(",") - ) - else: - self.connection = jdbcconnector.connect(jdbc_class, jdbc_url) - except Exception as e: - logger.error(f"Error while connecting to {self.database}, {e}") - - return self.connection - - def disconnect(self): - """Close any existing connections - Should switch self.is_connected. - """ - if self.is_connected is False: - return - try: - self.connection.close() - self.is_connected = False - except Exception as e: - logger.error(f"Error while disconnecting to {self.database}, {e}") - - return - - def check_connection(self) -> StatusResponse: - """Check connection to the handler - Returns: - HandlerStatusResponse - """ - responseCode = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - responseCode.success = True - except Exception as e: - logger.error(f"Error connecting to database {self.database}, {e}!") - responseCode.error_message = str(e) - finally: - if responseCode.success is True and need_to_close: - self.disconnect() - if responseCode.success is False and self.is_connected is True: - self.is_connected = False - - return responseCode - - def native_query(self, query: str) -> StatusResponse: - """Receive raw query and act upon it somehow. - Args: - query (Any): query in native format (str for sql databases, - etc) - Returns: - HandlerResponse - """ - need_to_close = self.is_connected is False - conn = self.connect() - with conn.cursor() as cur: - try: - cur.execute(query) - if cur.description: - result = cur.fetchall() - response = Response( - RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(result, columns=[x[0] for x in cur.description]) - ) - else: - response = Response(RESPONSE_TYPE.OK) - self.connection.commit() - except Exception as e: - logger.error(f"Error running query: {query} on {self.database}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - self.connection.rollback() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """Render and execute a SQL query. - - Args: - query (ASTNode): The SQL query. - - Returns: - Response: The query result. - """ - if isinstance(query, ASTNode): - query_str = query.to_string() - else: - query_str = str(query) - - # Replace backticks with double quotes for Derby compatibility - query_str = query_str.replace("`", '"') - - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """Get a list of all the tables in the database. - - Returns: - Response: Names of the tables in the database. - """ - query = f""" - SELECT st.tablename FROM sys.systables st LEFT OUTER JOIN sys.sysschemas ss ON (st.schemaid = ss.schemaid) WHERE ss.schemaname ='{self.schema}' """ - - result = self.native_query(query) - df = result.data_frame - result.data_frame = df.rename(columns={df.columns[0]: "table_name"}) - return result - - def get_columns(self, table_name: str) -> StatusResponse: - """Get details about a table. - - Args: - table_name (str): Name of the table to retrieve details of. - - Returns: - Response: Details of the table. - """ - - query = f""" SELECT COLUMNNAME FROM SYS.SYSCOLUMNS INNER JOIN SYS.SYSTABLES ON SYS.SYSCOLUMNS.REFERENCEID=SYS.SYSTABLES.TABLEID WHERE TABLENAME='{table_name}' """ - return self.native_query(query) diff --git a/mindsdb/integrations/handlers/derby_handler/icon.svg b/mindsdb/integrations/handlers/derby_handler/icon.svg deleted file mode 100644 index e7db405bedf..00000000000 --- a/mindsdb/integrations/handlers/derby_handler/icon.svg +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/derby_handler/requirements.txt b/mindsdb/integrations/handlers/derby_handler/requirements.txt deleted file mode 100644 index 78d1c7fe94b..00000000000 --- a/mindsdb/integrations/handlers/derby_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -jaydebeapi diff --git a/mindsdb/integrations/handlers/derby_handler/tests/__init__.py b/mindsdb/integrations/handlers/derby_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/derby_handler/tests/test_derby_handler.py b/mindsdb/integrations/handlers/derby_handler/tests/test_derby_handler.py deleted file mode 100644 index 5c4fe8c8b5e..00000000000 --- a/mindsdb/integrations/handlers/derby_handler/tests/test_derby_handler.py +++ /dev/null @@ -1,46 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.derby_handler.derby_handler import DerbyHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class DerbyHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "localhost", - "port": "1527", - "database": "seconddb", - } - } - cls.handler = DerbyHandler('test_derby_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_check_connection(self): - self.handler.check_connection() - - def test_2_create(self): - res = self.handler.query('CREATE TABLE TESTTABLEX (ID INT PRIMARY KEY, NAME VARCHAR(14))') - assert res.type is RESPONSE_TYPE.OK - - def test_3_insert(self): - res = self.handler.query("INSERT INTO TESTTABLEX VALUES (100,'ONE HUNDRED'),(200,'TWO HUNDRED'),(300,'THREE HUNDRED')") - assert res.type is RESPONSE_TYPE.OK - - def test_4_select(self): - res = self.handler.query('SELECT * FROM TESTTABLEX') - assert res.type is RESPONSE_TYPE.TABLE - - def test_5_get_tables(self): - res = self.handler.get_tables() - assert res.type is RESPONSE_TYPE.TABLE - - def test_6_get_columns(self): - res = self.handler.get_columns("TESTTABLEX") - assert res.type is RESPONSE_TYPE.TABLE - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/discord_handler/README.md b/mindsdb/integrations/handlers/discord_handler/README.md deleted file mode 100644 index 25f2ffebd3a..00000000000 --- a/mindsdb/integrations/handlers/discord_handler/README.md +++ /dev/null @@ -1,59 +0,0 @@ -## About Discord - -Discord is a communication platform designed around communities. It provides voice, video and text communication channels, along with various features for community management. See [discord.com](https://discord.com/) for more information. - -# Discord Handler Setup - -The Discord handler functions through a Discord bot, which must be registered on the [Discord Developer Portal](https://discord.com/developers). Make sure to give the bot the `Message Content` Privileged Intent the `Send Messages` permission in the channel you want to send messages to. - ---- - -## Parameters - -- `token`: a required token to give the bot access to the Discord API. This can be found on the Discord Developer Portal. - -Step 1 of [this guide](https://discord.com/developers/docs/getting-started) covers the basics of provisioning a bot. - -## Implemented Features - -- [x] Send and receive messages to a Discord channel via the API - -## TODO - -- [ ] Support other options for sending messages such as embeds, images, etc. -- [ ] Support UPDATE and DELETE for messages table - -## Example Usage - -The first step is to create a database with the new `discord` engine by passing in the required `token` parameter: - -~~~~sql -CREATE DATABASE discord_datasource -WITH ENGINE = 'discord', -PARAMETERS = { - "token": "{YOUR_TOKEN_HERE}" -}; -~~~~ - -Use the established connection to send messages to your Discord channel: - -~~~~sql -INSERT INTO discord_datasource.messages (channel_id, text) -VALUES (842979385837092867, 'Hello World!'); -~~~~ - -Query messages with a SELECT statement, but remember to always include a channel id in the WHERE clause: - -~~~~sql -SELECT * FROM discord_datasource.messages -WHERE channel_id = 842979385837092867; -~~~~ - -Select only the rows you want: - -~~~~sql -SELECT author_username, content, timestamp FROM discord_datasource.messages -WHERE channel_id = 842979385837092867 -AND timestamp > '2023-10-28 3:50:01' -LIMIT 25; -~~~~ diff --git a/mindsdb/integrations/handlers/discord_handler/__about__.py b/mindsdb/integrations/handlers/discord_handler/__about__.py deleted file mode 100644 index 3ccad7cdb33..00000000000 --- a/mindsdb/integrations/handlers/discord_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Discord handler" -__package_name__ = "mindsdb_discord_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Discord" -__author__ = 'Jason Wiemels' -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/discord_handler/__init__.py b/mindsdb/integrations/handlers/discord_handler/__init__.py deleted file mode 100644 index c1da0a2b0d4..00000000000 --- a/mindsdb/integrations/handlers/discord_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .discord_handler import DiscordHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Discord" -name = "discord" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/discord_handler/discord_handler.py b/mindsdb/integrations/handlers/discord_handler/discord_handler.py deleted file mode 100644 index 7b58898a641..00000000000 --- a/mindsdb/integrations/handlers/discord_handler/discord_handler.py +++ /dev/null @@ -1,187 +0,0 @@ -import requests -import pandas as pd - -from mindsdb.integrations.handlers.discord_handler.discord_tables import MessagesTable - -from mindsdb.utilities import log - -from mindsdb.integrations.libs.api_handler import APIHandler, FuncParser -from mindsdb.integrations.utilities.date_utils import parse_utc_date - -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) - -discord_bot = None -logger = log.getLogger(__name__) - - -class DiscordHandler(APIHandler): - """ - The Discord handler implementation. - """ - - name = 'discord' - - def __init__(self, name: str, **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.kwargs = kwargs - - self.is_connected = False - - messages = MessagesTable(self) - self._register_table('messages', messages) - - def connect(self): - """ - Set up the connection required by the handler. - Returns - ------- - StatusResponse - connection object - """ - - if self.is_connected: - return StatusResponse(True) - - url = 'https://discord.com/api/v10/applications/@me' - result = requests.get( - url, - headers={ - 'Authorization': f'Bot {self.connection_data["token"]}', - 'Content-Type': 'application/json', - }, - ) - - if result.status_code != 200: - raise ValueError(result.text) - - self.is_connected = True - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - - try: - self.connect() - response.success = True - except Exception as e: - response.error_message = e - logger.error(f'Error connecting to Discord: {response.error_message}') - - self.is_connected = response.success - - return response - - def native_query(self, query: str = None) -> StatusResponse: - """Receive and process a raw query. - Parameters - ---------- - query : str - query in a native format - Returns - ------- - StatusResponse - Request status - """ - operation, params = FuncParser().from_string(query) - - df = self.call_discord_api(operation, params) - - return Response(RESPONSE_TYPE.TABLE, data_frame=df) - - def utc_to_snowflake(self, utc_date: str) -> int: - """ - Convert a UTC date to a Snowflake date. - Args: - utc_date (str): the UTC date - Returns: - int - """ - # https://discord.com/developers/docs/reference#snowflakes - return str( - int(parse_utc_date(utc_date).timestamp() * 1000 - 1420070400000) << 22 - ) - - def call_discord_api( - self, operation: str, params: dict = None, filters: list = None - ): - """ - Call a Discord API method. - Args: - method_name (str): the method name - params (dict): the method parameters - Returns: - pd.DataFrame - """ - - if operation == 'get_messages': - param_strings = {'limit': params['limit']} - if 'after' in params: - param_strings['after'] = self.utc_to_snowflake(params['after']) - if 'before' in params: - param_strings['before'] = self.utc_to_snowflake(params['before']) - - url = ( - f'https://discord.com/api/v10/channels/{params["channel_id"]}/messages' - ) - result = requests.get( - url, - headers={ - 'Authorization': f'Bot {self.connection_data["token"]}', - 'Content-Type': 'application/json', - }, - params=param_strings, - ) - - if result.status_code != 200: - raise ValueError(f'Error calling Discord API: {result.json()}') - - json = result.json() - for message in json: - author = message.get('author') - if author is not None: - message['author_id'] = author.get('id') - message['author_username'] = author.get('username') - message['author_global_name'] = author.get('global_name') - - df = pd.DataFrame.from_records(json) - return df - elif operation == 'send_message': - url = ( - f'https://discord.com/api/v10/channels/{params["channel_id"]}/messages' - ) - result = requests.post( - url, - headers={ - 'Authorization': f'Bot {self.connection_data["token"]}', - 'Content-Type': 'application/json', - }, - json={ - 'content': params['text'], - }, - ) - - if result.status_code != 200: - raise ValueError(f'Error calling Discord API: {result.json()}') - - df = pd.DataFrame.from_records([result.json()]) - return df - else: - raise ValueError(f"Unsupported method: {operation}") diff --git a/mindsdb/integrations/handlers/discord_handler/discord_tables.py b/mindsdb/integrations/handlers/discord_handler/discord_tables.py deleted file mode 100644 index 6c4c75f12b1..00000000000 --- a/mindsdb/integrations/handlers/discord_handler/discord_tables.py +++ /dev/null @@ -1,189 +0,0 @@ -import pandas as pd -from typing import Text, List, Dict, Any - -from mindsdb_sql_parser import ast - -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions - -from mindsdb.integrations.libs.response import HandlerResponse as Response - -from mindsdb.integrations.utilities.handlers.query_utilities.insert_query_utilities import ( - INSERTQueryParser -) - -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class MessagesTable(APITable): - """The Discord Table implementation""" - - def get_columns(self): - return [ - 'id', - 'type', - 'content', - 'author_id', - 'author_username', - 'author_global_name', - 'author_avatar', - 'author_banner_color', - 'attachments', - 'embeds', - 'mentions', - 'mention_roles', - 'pinned', - 'mention_everyone', - 'tts', - 'timestamp', - 'edited_timestamp', - 'flags', - 'components', - 'nonce', - 'referenced_message', - ] - - def select(self, query: ast.Select) -> Response: - """Selects data from the Discord channel. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query. - - Returns - ------- - Response - Response object representing collected data from Discord. - """ - - conditions = extract_comparison_conditions(query.where) - - params = {} - filters = [] - user_filter_flag = channel_filter_flag = False - for op, arg1, arg2 in conditions: - if op == 'or': - raise NotImplementedError('OR is not supported') - if arg1 == 'timestamp': - if op == '>': - params['after'] = arg2 - elif op == '<': - params['before'] = arg2 - else: - raise NotImplementedError( - f'Unsupported operator {op} for timestamp' - ) - - elif arg1 in ['author_id', 'author_username', 'author_global_name']: - if user_filter_flag: - raise NotImplementedError('Multiple user filters are not supported') - user_filter_flag = True - - if op != '=': - raise NotImplementedError(f'Unsupported operator {op} for {arg1}') - - # if arg1 == 'author_id': - # filters.append(lambda x: x.author.id == int(arg2)) - # elif arg1 == 'author_username': - # filters.append(lambda x: x.author.username == arg2) - # elif arg1 == 'author_global_name': - # filters.append(lambda x: x.author.global_name == arg2) - - elif arg1 == 'channel_id': - if op != '=': - raise NotImplementedError(f'Unsupported operator {op} for {arg1}') - channel_filter_flag = True - params['channel_id'] = int(arg2) - - else: - filters.append([op, arg1, arg2]) - - if query.limit is not None: - params['limit'] = query.limit - else: - params['limit'] = 100 - - if not channel_filter_flag: - raise NotImplementedError('Channel filter is required') - - result = self.handler.call_discord_api( - 'get_messages', params=params, filters=filters - ) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - return result - - def insert(self, query: ast.Insert) -> None: - """Sends messages to a Discord channel. - - Parameters - ---------- - query : ast.Insert - Given SQL INSERT query. - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - insert_statement_parser = INSERTQueryParser( - query, - supported_columns=['channel_id', 'text'], - mandatory_columns=['channel_id', 'text'], - all_mandatory=False, - ) - message_data = insert_statement_parser.parse_query() - self.send_message(message_data) - - def send_message(self, message_data: List[Dict[Text, Any]]) -> None: - """Sends messages to a Discord Channel using the parsed message data. - - Parameters - ---------- - message_data : List[Dict[Text, Any]] - List of dictionaries containing the messages to send. - - Returns - ------- - None - """ - for message in message_data: - try: - params = {'channel_id': message['channel_id'], 'text': message['text']} - self.handler.call_discord_api('send_message', params=params) - logger.info("Message sent to Discord channel successfully.") - except Exception as e: - logger.error(f"Error sending message to Discord channel: {e}") - raise e diff --git a/mindsdb/integrations/handlers/discord_handler/icon.svg b/mindsdb/integrations/handlers/discord_handler/icon.svg deleted file mode 100644 index 55a520ef27a..00000000000 --- a/mindsdb/integrations/handlers/discord_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/discord_handler/requirements.txt b/mindsdb/integrations/handlers/discord_handler/requirements.txt deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/discord_handler/tests/__init__.py b/mindsdb/integrations/handlers/discord_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/discord_handler/tests/test_discord.py b/mindsdb/integrations/handlers/discord_handler/tests/test_discord.py deleted file mode 100644 index 7049bc3098a..00000000000 --- a/mindsdb/integrations/handlers/discord_handler/tests/test_discord.py +++ /dev/null @@ -1,61 +0,0 @@ -import unittest -from unittest.mock import patch - -from mindsdb_sql_parser import ast -from mindsdb_sql_parser.ast import BinaryOperation, Identifier, Constant -from mindsdb_sql_parser.ast.select.star import Star -from mindsdb.integrations.handlers.discord_handler.discord_handler import DiscordHandler - - -class DiscordHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.handler = DiscordHandler( - name='discord_datasource', connection_data={'token': 'test-discord-token'} - ) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - @patch('mindsdb.integrations.handlers.discord_handler.discord_handler.requests.get') - def test_1_read_messages(self, mock_get): - mock_get.return_value.status_code = 200 - mock_get.return_value.json.return_value = [{'content': 'Test message'}] - query = ast.Select( - targets=[Star()], - from_table="messages", - where=BinaryOperation( - op='=', args=[Identifier('channel_id'), Constant('1234567890')] - ), - ) - - self.handler._tables['messages'].select(query) - mock_get.assert_called_with( - 'https://discord.com/api/v10/channels/1234567890/messages', - headers={ - 'Authorization': 'Bot test-discord-token', - 'Content-Type': 'application/json', - }, - params={'limit': 100}, - ) - - @patch( - 'mindsdb.integrations.handlers.discord_handler.discord_handler.requests.post' - ) - def test_2_send_message(self, mock_post): - mock_post.return_value.status_code = 200 - self.handler._tables['messages'].send_message( - [{'channel_id': '1234567890', 'text': 'Test message'}] - ) - mock_post.assert_called_with( - 'https://discord.com/api/v10/channels/1234567890/messages', - headers={ - 'Authorization': 'Bot test-discord-token', - 'Content-Type': 'application/json', - }, - json={'content': 'Test message'}, - ) - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/dockerhub_handler/README.md b/mindsdb/integrations/handlers/dockerhub_handler/README.md deleted file mode 100644 index 968f64ce605..00000000000 --- a/mindsdb/integrations/handlers/dockerhub_handler/README.md +++ /dev/null @@ -1,80 +0,0 @@ -# DockerHub Handler - -DockerHub handler for MindsDB provides interfaces to connect to DockerHub via APIs and pull repository data into MindsDB. - ---- - -## Table of Contents - -- [DockerHub Handler](#dockerhub-handler) - - [Table of Contents](#table-of-contents) - - [About DockerHub](#about-dockerhub) - - [DockerHub Handler Implementation](#dockerhub-handler-implementation) - - [DockerHub Handler Initialization](#dockerhub-handler-initialization) - - [Implemented Features](#implemented-features) - - [Example Usage](#example-usage) - ---- - -## About DockerHub - -Docker Hub is the world's easiest way to create, manage, and deliver your team's container applications. - - -## DockerHub Handler Implementation - -This handler was implemented using the `requests` library that makes http calls to https://docs.docker.com/docker-hub/api/latest/#tag/resources. - -## DockerHub Handler Initialization - -The DockerHub handler is initialized with the following parameters: - -- `username`: Username used to login to DockerHub -- `password`: Password used to login to DockerHub - -Read about creating an account [here](https://hub.docker.com/). - -## Implemented Features - -- [x] DockerHub Repo Images Summary for a given Repository - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - - -## Example Usage - -The first step is to create a database with the new `dockerhub` engine. - -~~~~sql -CREATE DATABASE mindsdb_dockerhub -WITH ENGINE = 'dockerhub', -PARAMETERS = { - "username": "user", - "password": "pass" -}; -~~~~ - -Use the established connection to query your database: - -~~~~sql -SELECT * FROM mindsdb_dockerhub.repo_images_summary WHERE namespace="docker" AND repository="trusted-registry-nginx"; -~~~~ - -~~~~sql -SELECT * FROM mindsdb_dockerhub.repo_images WHERE namespace="docker" AND repository="trusted-registry-nginx"; -~~~~ - -~~~~sql -SELECT * FROM mindsdb_dockerhub.repo_tag_details WHERE namespace="docker" AND repository="trusted-registry-nginx" AND tag="latest"; -~~~~ - -~~~~sql -SELECT * FROM mindsdb_dockerhub.repo_tags WHERE namespace="docker" AND repository="trusted-registry-nginx"; -~~~~ - -======= -SELECT * FROM mindsdb_dockerhub.org_settings where organization="docker"; -~~~~ diff --git a/mindsdb/integrations/handlers/dockerhub_handler/__about__.py b/mindsdb/integrations/handlers/dockerhub_handler/__about__.py deleted file mode 100644 index f5d53ccf9da..00000000000 --- a/mindsdb/integrations/handlers/dockerhub_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB DockerHub handler" -__package_name__ = "mindsdb_dockerhub_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Dockerhub" -__author__ = "Abhilash K R" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/dockerhub_handler/__init__.py b/mindsdb/integrations/handlers/dockerhub_handler/__init__.py deleted file mode 100644 index c550a560b71..00000000000 --- a/mindsdb/integrations/handlers/dockerhub_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .dockerhub_handler import DockerHubHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "DockerHub" -name = "dockerhub" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", - "connection_args_example", - "connection_args", -] diff --git a/mindsdb/integrations/handlers/dockerhub_handler/connection_args.py b/mindsdb/integrations/handlers/dockerhub_handler/connection_args.py deleted file mode 100644 index f237de4f619..00000000000 --- a/mindsdb/integrations/handlers/dockerhub_handler/connection_args.py +++ /dev/null @@ -1,25 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - username={ - "type": ARG_TYPE.STR, - "description": "DockerHub username", - "required": True, - "label": "username", - }, - password={ - "type": ARG_TYPE.PWD, - "description": "DockerHub password", - "required": True, - "label": "Api key", - 'secret': True - } -) - -connection_args_example = OrderedDict( - username="username", - password="password" -) diff --git a/mindsdb/integrations/handlers/dockerhub_handler/dockerhub.py b/mindsdb/integrations/handlers/dockerhub_handler/dockerhub.py deleted file mode 100644 index 71f92af8576..00000000000 --- a/mindsdb/integrations/handlers/dockerhub_handler/dockerhub.py +++ /dev/null @@ -1,55 +0,0 @@ -import json -import requests - - -class DockerHubClient: - def __init__(self): - self.auth_token = None - self.docker_hub_base_endpoint = "https://hub.docker.com/v2/" - - def make_request(self, url, method='GET', data=None): - if method not in ['GET', 'POST']: - raise ValueError('Invalid HTTP request method') - headers = {'Content-type': 'application/json'} - if self.auth_token: - headers['Authorization'] = 'JWT ' + self.auth_token - request_method = getattr(requests, method.lower()) - if data and len(data) > 0: - data = json.dumps(data) - resp = request_method(url, data, headers=headers) - else: - resp = request_method(url, headers=headers) - content = {} - if resp.status_code == 200: - content = {'content': json.loads(resp.content.decode()), 'code': 200} - else: - content = {'content': {}, 'code': resp.status_code, 'error': resp.text} - return content - - def login(self, username=None, password=None): - data = {'username': username, 'password': password} - self.auth_token = None - resp = self.make_request(self.docker_hub_base_endpoint + 'users/login/', 'POST', data) - if resp['code'] == 200: - self.auth_token = resp['content']['token'] - return resp - - def get_images_summary(self, namespace, repo): - url = f'{self.docker_hub_base_endpoint}namespaces/{namespace}/repositories/{repo}/images-summary' - return self.make_request(url) - - def get_repo_images(self, namespace, repo): - url = f'{self.docker_hub_base_endpoint}namespaces/{namespace}/repositories/{repo}/images' - return self.make_request(url) - - def get_repo_tag(self, namespace, repo, tag): - url = f'{self.docker_hub_base_endpoint}namespaces/{namespace}/repositories/{repo}/tags/{tag}' - return self.make_request(url) - - def get_repo_tags(self, namespace, repo): - url = f'{self.docker_hub_base_endpoint}namespaces/{namespace}/repositories/{repo}/tags' - return self.make_request(url) - - def get_org_settings(self, namespace): - url = f'{self.docker_hub_base_endpoint}orgs/{namespace}/settings' - return self.make_request(url) diff --git a/mindsdb/integrations/handlers/dockerhub_handler/dockerhub_handler.py b/mindsdb/integrations/handlers/dockerhub_handler/dockerhub_handler.py deleted file mode 100644 index ce541160d02..00000000000 --- a/mindsdb/integrations/handlers/dockerhub_handler/dockerhub_handler.py +++ /dev/null @@ -1,112 +0,0 @@ -from mindsdb.integrations.handlers.dockerhub_handler.dockerhub_tables import ( - DockerHubRepoImagesSummaryTable, - DockerHubRepoImagesTable, - DockerHubRepoTagTable, - DockerHubRepoTagsTable, - DockerHubOrgSettingsTable -) -from mindsdb.integrations.handlers.dockerhub_handler.dockerhub import DockerHubClient -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) - -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - -logger = log.getLogger(__name__) - - -class DockerHubHandler(APIHandler): - """The DockerHub handler implementation""" - - def __init__(self, name: str, **kwargs): - """Initialize the DockerHub handler. - - Parameters - ---------- - name : str - name of a handler instance - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.kwargs = kwargs - self.docker_client = DockerHubClient() - self.is_connected = False - - repo_images_stats_data = DockerHubRepoImagesSummaryTable(self) - self._register_table("repo_images_summary", repo_images_stats_data) - - repo_images_data = DockerHubRepoImagesTable(self) - self._register_table("repo_images", repo_images_data) - - repo_tag_details_data = DockerHubRepoTagTable(self) - self._register_table("repo_tag_details", repo_tag_details_data) - - repo_tags_data = DockerHubRepoTagsTable(self) - self._register_table("repo_tags", repo_tags_data) - - org_settings = DockerHubOrgSettingsTable(self) - self._register_table("org_settings", org_settings) - - def connect(self) -> StatusResponse: - """Set up the connection required by the handler. - - Returns - ------- - StatusResponse - connection object - """ - resp = StatusResponse(False) - status = self.docker_client.login(self.connection_data.get("username"), self.connection_data.get("password")) - if status["code"] != 200: - resp.success = False - resp.error_message = status["error"] - return resp - self.is_connected = True - return resp - - def check_connection(self) -> StatusResponse: - """Check connection to the handler. - - Returns - ------- - StatusResponse - Status confirmation - """ - response = StatusResponse(False) - - try: - status = self.docker_client.login(self.connection_data.get("username"), self.connection_data.get("password")) - if status["code"] == 200: - current_user = self.connection_data.get("username") - logger.info(f"Authenticated as user {current_user}") - response.success = True - else: - response.success = False - logger.info("Error connecting to dockerhub. " + status["error"]) - response.error_message = status["error"] - except Exception as e: - logger.error(f"Error connecting to DockerHub API: {e}!") - response.error_message = e - - self.is_connected = response.success - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - - Parameters - ---------- - query : str - query in a native format - - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/dockerhub_handler/dockerhub_tables.py b/mindsdb/integrations/handlers/dockerhub_handler/dockerhub_tables.py deleted file mode 100644 index d6cad5cb9dc..00000000000 --- a/mindsdb/integrations/handlers/dockerhub_handler/dockerhub_tables.py +++ /dev/null @@ -1,512 +0,0 @@ -import pandas as pd -from typing import List -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor -from mindsdb.utilities import log -from mindsdb_sql_parser import ast - -logger = log.getLogger(__name__) - - -class DockerHubRepoImagesSummaryTable(APITable): - """The DockerHub Repo Images Summary Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://docs.docker.com/docker-hub/api/latest/#tag/images" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - repo images summary matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'repo_images_summary', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'namespace': - if op == '=': - search_params["namespace"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for namespace column.") - elif arg1 == 'repository': - if op == '=': - search_params["repository"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for repository column.") - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("namespace" in search_params) and ("repository" in search_params) - - if not filter_flag: - raise NotImplementedError("Both namespace and repository columns have to be present in WHERE clause.") - - repo_images_summary_df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.docker_client.get_images_summary(search_params["namespace"], search_params["repository"]) - - self.check_res(res=response) - - content = response["content"] - - repo_images_summary_df = pd.json_normalize({"active_from": content["active_from"], "total": content["statistics"]["total"], "active": content["statistics"]["active"], "inactive": content["statistics"]["inactive"]}) - - select_statement_executor = SELECTQueryExecutor( - repo_images_summary_df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - repo_images_summary_df = select_statement_executor.execute_query() - - return repo_images_summary_df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["error"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "active_from", - "total", - "active", - "inactive" - ] - - -class DockerHubOrgSettingsTable(APITable): - """The DockerHub Repo Org Settings Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://hub.docker.com/v2/orgs/{name}/settings" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - org settings matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'org_settings', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'organization': - if op == '=': - search_params["organization"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for organization column.") - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - if "organization" not in search_params: - raise NotImplementedError("organization column has to be present in where clause.") - - organization_df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.docker_client.get_org_settings(search_params["organization"]) - - self.check_res(res=response) - - content = response["content"] - - organization_df = pd.json_normalize({"restricted_images_enabled": content["restricted_images"]["enabled"], "restricted_images_allow_official_images": content["restricted_images"]["allow_official_images"], "restricted_images_allow_verified_publishers": content["restricted_images"]["allow_verified_publishers"]}) - - select_statement_executor = SELECTQueryExecutor( - organization_df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - organization_df = select_statement_executor.execute_query() - - return organization_df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["error"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "restricted_images_enabled", - "restricted_images_allow_official_images", - "restricted_images_allow_verified_publishers" - ] - - -class DockerHubRepoImagesTable(APITable): - """The DockerHub Repo Images Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://hub.docker.com/v2/namespaces/{namespace}/repositories/{repository}/images" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Repo Images matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'repo_images', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'namespace': - if op == '=': - search_params["namespace"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for namespace column.") - elif arg1 == 'repository': - if op == '=': - search_params["repository"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for repository column.") - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("namespace" in search_params) and ("repository" in search_params) - - if not filter_flag: - raise NotImplementedError("namespace and repository column has to be present in where clause.") - - repo_images_summary_df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.docker_client.get_repo_images(search_params["namespace"], search_params["repository"]) - - self.check_res(res=response) - - content = response["content"] - - repo_images_summary_df = pd.json_normalize(content["results"]) - - select_statement_executor = SELECTQueryExecutor( - repo_images_summary_df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - repo_images_summary_df = select_statement_executor.execute_query() - - return repo_images_summary_df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["error"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return ["namespace", - "repository", - "digest", - "tags", - "last_pushed", - "last_pulled", - "status" - ] - - -class DockerHubRepoTagTable(APITable): - """The DockerHub Repo Tag Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://docs.docker.com/docker-hub/api/latest/#tag/images" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Repo Tag matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'repo_tag_details', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'namespace': - if op == '=': - search_params["namespace"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for namespace column.") - elif arg1 == 'repository': - if op == '=': - search_params["repository"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for repository column.") - elif arg1 == 'tag': - if op == '=': - search_params["tag"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for tag column.") - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("namespace" in search_params) and ("repository" in search_params) and ("tag" in search_params) - - if not filter_flag: - raise NotImplementedError("namespace, repository and tag column has to be present in where clause.") - - repo_tag_summary_df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.docker_client.get_repo_tag(search_params["namespace"], search_params["repository"], search_params["tag"]) - - self.check_res(res=response) - - content = response["content"] - - repo_tag_summary_df = pd.json_normalize({"creator": content["creator"], - "id": content["id"], - "images": content["images"], - "last_updated": content["last_updated"], - "last_updater": content["last_updater"], - "last_updater_username": content["last_updater_username"], - "name": content["name"], - "repository": content["repository"], - "full_size": content["full_size"], - "v2": content["v2"], - "tag_status": content["tag_status"], - "tag_last_pulled": content["tag_last_pulled"], - "tag_last_pushed": content["tag_last_pushed"], - "media_type": content["media_type"], - "content_type": content["media_type"] - }) - - select_statement_executor = SELECTQueryExecutor( - repo_tag_summary_df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - repo_tag_summary_df = select_statement_executor.execute_query() - - return repo_tag_summary_df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["error"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return ["creator", - "id", - "images", - "last_updated", - "last_updater", - "last_updater_username", - "name", - "repository", - "full_size", - "v2", - "tag_status", - "tag_last_pulled", - "tag_last_pushed", - "media_type", - "content_type" - ] - - -class DockerHubRepoTagsTable(APITable): - """The DockerHub Repo Tags Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://hub.docker.com/v2/namespaces/{namespace}/repositories/{repository}/tags" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Repo Tag matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'repo_tags', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'namespace': - if op == '=': - search_params["namespace"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for namespace column.") - elif arg1 == 'repository': - if op == '=': - search_params["repository"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for repository column.") - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("namespace" in search_params) and ("repository" in search_params) - - if not filter_flag: - raise NotImplementedError("namespace and repository column has to be present in where clause.") - - repo_tags_summary_df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.docker_client.get_repo_tags(search_params["namespace"], search_params["repository"]) - - self.check_res(res=response) - - content = response["content"] - - repo_tags_summary_df = pd.json_normalize(content["results"]) - - select_statement_executor = SELECTQueryExecutor( - repo_tags_summary_df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - repo_tags_summary_df = select_statement_executor.execute_query() - - return repo_tags_summary_df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["error"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return ["creator", - "id", - "images", - "last_updated", - "last_updater", - "last_updater_username", - "name", - "repository", - "full_size", - "v2", - "tag_status", - "tag_last_pulled", - "tag_last_pushed", - "media_type", - "content_type" - ] diff --git a/mindsdb/integrations/handlers/dockerhub_handler/icon.svg b/mindsdb/integrations/handlers/dockerhub_handler/icon.svg deleted file mode 100644 index 25040e9b51d..00000000000 --- a/mindsdb/integrations/handlers/dockerhub_handler/icon.svg +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/documentdb_handler/README.md b/mindsdb/integrations/handlers/documentdb_handler/README.md deleted file mode 100644 index 04bc0ef78bd..00000000000 --- a/mindsdb/integrations/handlers/documentdb_handler/README.md +++ /dev/null @@ -1,56 +0,0 @@ -## Implementation - -This handler is implemented by extending the MongoDBHandler. - -The required arguments to establish a connection are as follows: - -- `username` is the database user. -- `password` is the database password. -- `host` is the host IP address or URL. -- `port` is the port used to make TCP/IP connection. -- `database` is the database name. - -There are several optional arguments (refer https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html) that can be used as well and can be passed as kwargs: - -- `tls` indicates whether tls is enabled (`True`) or disabled (`False`). -- `serverSelectionTimeoutMS` Controls how long (in milliseconds) the driver will wait to find an available. -- `directConnection` if `True`, forces this client to connect directly to the specified DocumentDB host as a standalone. If `False`, the client connects to the entire replica set of which the given DocumentDB host(s) is a part. -- `tlsAllowInvalidHostnames` If `True`, disables TLS hostname verification. `False` implies tls=True. -- `tlsAllowInvalidCertificates` If `True`, continues the TLS handshake regardless of the outcome of the certificate verification process. -- `retryWrites` Whether supported write operations executed within this MongoClient will be retried once after a network error. -- `tlsCAFile` A file containing a single or a bundle of β€œcertification authority” certificates, which are used to validate certificates passed from the other end of the connection. - -## Usage - -In order to make use of this handler and connect to the DocuementDB database in MindsDB, the following syntax can be used: - -```sql - db.databases.insertOne({ - name: "example_documentdb", - engine: "documentdb", - connection_args: { - "username": "username", - "password": "password", - "host": "127.0.0.1", - "port": "27017", - "database": "sample_database", - "kwargs": { - "directConnection": true, - "serverSelectionTimeoutMS": 2000, - "tls": true, - "tlsAllowInvalidHostnames": true, - "tlsAllowInvalidCertificates": true, - "retryWrites": false, - "tlsCAFile": "/home/global-bundle.pem" - } - } - }); -``` - -You can use this established connection to query your table as follows. - -```sql - use example_documentdb; - show collections; - db.sample_collection.find({}); -``` diff --git a/mindsdb/integrations/handlers/documentdb_handler/__about__.py b/mindsdb/integrations/handlers/documentdb_handler/__about__.py deleted file mode 100644 index 55895a55619..00000000000 --- a/mindsdb/integrations/handlers/documentdb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB DocumentDB handler' -__package_name__ = 'mindsdb_documentdb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for DocumentDB" -__author__ = 'Biswadip Paul' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/documentdb_handler/__init__.py b/mindsdb/integrations/handlers/documentdb_handler/__init__.py deleted file mode 100644 index 982917213cf..00000000000 --- a/mindsdb/integrations/handlers/documentdb_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .connection_args import connection_args, connection_args_example -try: - from .documentdb_handler import DocumentDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = 'Amazon DocumentDB' -name = 'documentdb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/documentdb_handler/connection_args.py b/mindsdb/integrations/handlers/documentdb_handler/connection_args.py deleted file mode 100644 index 89b1995abe0..00000000000 --- a/mindsdb/integrations/handlers/documentdb_handler/connection_args.py +++ /dev/null @@ -1,53 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the DocumentDB server.', - 'required': True, - 'label': 'User', - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the DocumentDB server.', - 'required': True, - 'label': 'Password', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the DocumentDB server.', - 'required': True, - 'label': 'Database', - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the DocumentDB server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.', - 'required': True, - 'label': 'Host', - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the DocumentDB server. Must be an integer.', - 'required': True, - 'label': 'Port', - }, - kwargs={ - 'type': ARG_TYPE.DICT, - 'description': 'Additional parameters of DocumentDB same as MongoDB.', - 'required': False, - 'label': 'Kwargs', - }, -) - - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=27017, - username='documentdb', - password='password', - database='database', -) diff --git a/mindsdb/integrations/handlers/documentdb_handler/documentdb_handler.py b/mindsdb/integrations/handlers/documentdb_handler/documentdb_handler.py deleted file mode 100644 index 1c122b5a628..00000000000 --- a/mindsdb/integrations/handlers/documentdb_handler/documentdb_handler.py +++ /dev/null @@ -1,41 +0,0 @@ -from pymongo import MongoClient - -from mindsdb.integrations.handlers.mongodb_handler import Handler as MongoDBHandler - - -class DocumentDBHandler(MongoDBHandler): - """ - This handler handles connection and execution of the DocumentDB statements. - """ - - name = 'documentdb' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) - connection_data = kwargs.get('connection_data', {}) - self.host = connection_data.get("host") - self.port = int(connection_data.get("port") or 27017) - self.user = connection_data.get("username") - self.password = connection_data.get("password") - self.database = connection_data.get('database') - self.flatten_level = connection_data.get('flatten_level', 0) - self.mykwargs = connection_data.get('kwargs', {}) - - self.connection = None - self.is_connected = False - - def connect(self): - kwargs = {} - if isinstance(self.user, str) and len(self.user) > 0: - kwargs['username'] = self.user - - if isinstance(self.password, str) and len(self.password) > 0: - kwargs['password'] = self.password - - connection = MongoClient( - host=self.host, port=self.port, **{**kwargs, **self.mykwargs} - ) - - self.is_connected = True - self.connection = connection - return self.connection diff --git a/mindsdb/integrations/handlers/documentdb_handler/icon.svg b/mindsdb/integrations/handlers/documentdb_handler/icon.svg deleted file mode 100644 index f7971e3c3ce..00000000000 --- a/mindsdb/integrations/handlers/documentdb_handler/icon.svg +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/documentdb_handler/requirements.txt b/mindsdb/integrations/handlers/documentdb_handler/requirements.txt deleted file mode 100644 index e76ccab0ebb..00000000000 --- a/mindsdb/integrations/handlers/documentdb_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/mongodb_handler/requirements.txt \ No newline at end of file diff --git a/mindsdb/integrations/handlers/dremio_handler/README.md b/mindsdb/integrations/handlers/dremio_handler/README.md deleted file mode 100644 index b346d9b9318..00000000000 --- a/mindsdb/integrations/handlers/dremio_handler/README.md +++ /dev/null @@ -1,36 +0,0 @@ -# Dremio Handler - -This is the implementation of the Dremio handler for MindsDB. - -## Dremio -Dremio is the only data lakehouse that empowers data engineers and analysts with easy-to-use self-service SQL analytics. -
-https://www.dremio.com/why-dremio/ - -## Implementation -This handler was implemented using the `requests` and `pandas` libraries. - -The required arguments to establish a connection are, -* `host`: the host name or IP address of the Dremio server. -* `port`: the port that Dremio is running on. -* `username`: the username used to authenticate with the Dremio server. -* `password`: the password to authenticate the user with the Dremio server. - -## Usage -In order to make use of this handler and connect to Dremio in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE dremio_datasource -WITH -engine='dremio', -parameters={ - "host": "localhost", - "port": 9047, - "username": "username", - "password": "password" -}; -~~~~ - -Now, you can use this established connection to query your data source as follows, -~~~~sql -SELECT * FROM dremio_datasource.example_tbl -~~~~ \ No newline at end of file diff --git a/mindsdb/integrations/handlers/dremio_handler/__about__.py b/mindsdb/integrations/handlers/dremio_handler/__about__.py deleted file mode 100644 index f155c5eaebe..00000000000 --- a/mindsdb/integrations/handlers/dremio_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Dremio handler' -__package_name__ = 'mindsdb_dremio_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Dremio" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/dremio_handler/__init__.py b/mindsdb/integrations/handlers/dremio_handler/__init__.py deleted file mode 100644 index 5545d8111b6..00000000000 --- a/mindsdb/integrations/handlers/dremio_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args -try: - from .dremio_handler import DremioHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Dremio' -name = 'dremio' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/dremio_handler/connection_args.py b/mindsdb/integrations/handlers/dremio_handler/connection_args.py deleted file mode 100644 index 4e6c0378a6d..00000000000 --- a/mindsdb/integrations/handlers/dremio_handler/connection_args.py +++ /dev/null @@ -1,31 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Dremio server.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The port that Dremio is running on.' - }, - username={ - 'type': ARG_TYPE.STR, - 'description': 'The username used to authenticate with the Dremio server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the Dremio server.', - 'secret': True - } -) - -connection_args_example = OrderedDict( - host='localhost', - database=9047, - username='admin', - password='password' -) diff --git a/mindsdb/integrations/handlers/dremio_handler/dremio_handler.py b/mindsdb/integrations/handlers/dremio_handler/dremio_handler.py deleted file mode 100644 index b6de8a40e8a..00000000000 --- a/mindsdb/integrations/handlers/dremio_handler/dremio_handler.py +++ /dev/null @@ -1,222 +0,0 @@ -from typing import Optional - -import json -import time -import requests -import pandas as pd - -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.base import DatabaseHandler -from sqlalchemy_dremio.base import DremioDialect - -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -logger = log.getLogger(__name__) - - -class DremioHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Dremio statements. - """ - - name = 'dremio' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = 'dremio' - - self.connection_data = connection_data - self.kwargs = kwargs - - self.base_url = f"http://{self.connection_data['host']}:{self.connection_data['port']}" - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> dict: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - headers = { - 'Content-Type': 'application/json', - } - - data = '{' + f'"userName": "{self.connection_data["username"]}","password": "{self.connection_data["password"]}"' + '}' - - response = requests.post(self.base_url + '/apiv2/login', headers=headers, data=data) - - return { - 'Authorization': '_dremio' + response.json()['token'], - 'Content-Type': 'application/json', - } - - def disconnect(self): - """ - Close any existing connections. - """ - - self.is_connected = False - return - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to Dremio, {e}!') - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - query = query.replace('"', '\\"').replace('\n', ' ') - - need_to_close = self.is_connected is False - - auth_headers = self.connect() - data = '{' + f'"sql": "{query}"' + '}' - - try: - sql_result = requests.post(self.base_url + '/api/v3/sql', headers=auth_headers, data=data) - - job_id = sql_result.json()['id'] - - if sql_result.status_code == 200: - logger.info('Job creation successful. Job id is: ' + job_id) - else: - logger.info('Job creation failed.') - - logger.info('Waiting for the job to complete...') - - job_status = requests.request("GET", self.base_url + "/api/v3/job/" + job_id, headers=auth_headers).json()[ - 'jobState'] - - while job_status != 'COMPLETED': - if job_status == 'FAILED': - logger.error('Job failed!') - break - - time.sleep(2) - job_status = requests.request("GET", self.base_url + "/api/v3/job/" + job_id, headers=auth_headers).json()[ - 'jobState'] - - job_result = json.loads(requests.request("GET", self.base_url + "/api/v3/job/" + job_id + "/results", headers=auth_headers).text) - - if 'errorMessage' not in job_result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - job_result['rows'] - ) - ) - else: - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(job_result['errorMessage']) - ) - - except Exception as e: - logger.error(f'Error running query: {query} on Dremio!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - - renderer = SqlalchemyRender(DremioDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - - query = """ - SELECT - TABLE_NAME, - TABLE_SCHEMA, - CASE - WHEN TABLE_TYPE = 'TABLE' THEN 'BASE TABLE' - ELSE TABLE_TYPE - END AS TABLE_TYPE - FROM INFORMATION_SCHEMA."TABLES" - WHERE TABLE_TYPE <> 'SYSTEM_TABLE'; - """ - return self.native_query(query) - - def get_columns(self, table_name: str) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - - query = f"DESCRIBE {table_name}" - result = self.native_query(query) - df = result.data_frame - result.data_frame = df.rename(columns={'COLUMN_NAME': 'Field', 'DATA_TYPE': 'Type'}) - return result diff --git a/mindsdb/integrations/handlers/dremio_handler/icon.svg b/mindsdb/integrations/handlers/dremio_handler/icon.svg deleted file mode 100644 index ed2156c236c..00000000000 --- a/mindsdb/integrations/handlers/dremio_handler/icon.svg +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/dremio_handler/requirements.txt b/mindsdb/integrations/handlers/dremio_handler/requirements.txt deleted file mode 100644 index e9f2b41ec61..00000000000 --- a/mindsdb/integrations/handlers/dremio_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -sqlalchemy_dremio \ No newline at end of file diff --git a/mindsdb/integrations/handlers/dremio_handler/tests/__init__.py b/mindsdb/integrations/handlers/dremio_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/dremio_handler/tests/test_dremio_handler.py b/mindsdb/integrations/handlers/dremio_handler/tests/test_dremio_handler.py deleted file mode 100644 index 89f615cd7ba..00000000000 --- a/mindsdb/integrations/handlers/dremio_handler/tests/test_dremio_handler.py +++ /dev/null @@ -1,35 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.dremio_handler.dremio_handler import DremioHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class DremioHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": "localhost", - "port": 9047, - "username": "minura_punchihewa", - "password": "password" - } - cls.handler = DremioHandler('test_dremio_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM staging.zips" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_get_columns(self): - columns = self.handler.get_columns('staging.zips') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/dropbox_handler/README.md b/mindsdb/integrations/handlers/dropbox_handler/README.md deleted file mode 100644 index d29d8af12a7..00000000000 --- a/mindsdb/integrations/handlers/dropbox_handler/README.md +++ /dev/null @@ -1,81 +0,0 @@ ---- -title: Dropbox -sidebarTitle: Dropbox ---- - -# Dropbox Handler - -This documentation describes the integration of MindsDB with [Dropbox](https://www.dropbox.com/official-teams-page?_tk=paid_sem_goog_biz_b&_camp=1033325405&_kw=dropbox|e&_ad=708022104237||c&gad_source=1&gclid=EAIaIQobChMI3qGNp4WPiQMVMpeDBx0X3CdpEAAYASAAEgIb9PD_BwE), a storage service. - -## Connection - -Establish a connection to your Dropbox account from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE dropbox_datasource -WITH - engine = 'dropbox', - parameters = { - "access_token": "ai.L-wqp3eP6r4cSWVklkKAdTNZ3VAuQjWuZMvIs1BzKvZNVW07rKbVNi5HbxvLc9q9D6qSfsf5VTsqYsNPGUkqSJBlpkr88gNboUNuhITmJG9mVw-Olniu4MO3BWVbEIphVxXxxxCd677Y" - }; -``` - -Required connection parameters include the following: - -- `access_token`: The Dropbox access token that enables connection to your Dropbox app. - -To get the `access_token`, go to the Dropbox App: https://www.dropbox.com/en_GB/developers. - -## Usage - -#### Execute the SQL statement - -For fetching the files from Dropbox, you need to provide full paths. If you want to see -all the paths to your files in Dropbox, execute: - -```sql -SELECT * FROM .files; -``` - -In this example, we will fetch the JSON file and display the data in MindsDB Studio. - -```sql -SELECT * from dropbox_datasource.`/json_files/flower/iris.json` -``` - -#### Output - -| sepalLength | sepalWidth | petalLength | petalWidth | species | -| ----------- | ---------- | ----------- | ---------- | ------- | -| 5.1 | 3.5 | 1.4 | 0.2 | setosa | -| 4.9 | 3 | 1.4 | 0.2 | setosa | -| 4.7 | 3.2 | 1.3 | 0.2 | setosa | -| 4.6 | 3.1 | 1.5 | 0.2 | setosa | -| 5 | 3.6 | 1.4 | 0.2 | setosa | - -Wrap the file in backticks (\`) to avoid issues parsing the provided SQL statements. This is especially important when the file contains spaces, special characters or prefixes, such as `my-folder/my-file.csv`. -Currently, the supported file formats are CSV, TSV, JSON, and Parquet. - -The above examples utilize `dropbox_datasource` as the data source name defined in the `CREATE DATABASE` command. - -## Troubleshooting Guide - - -`Database Connection Error` - -- **Symptoms**: Failure to connect MindsDB with the Dropbox. -- **Checklist**: - -1. Confirm that provided Dropbox credentials are correct. Try making a direct connection to the Dropbox using your local script with Dropbox Python SDK. -2. Ensure a stable network between MindsDB and Dropbox. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -- **Symptoms**: SQL queries failing or not recognizing object names containing spaces, special characters or prefixes. -- **Checklist**: 1. Ensure object names with spaces, special characters or prefixes are enclosed in backticks. 2. Examples: - _ Incorrect: SELECT _ FROM integration.travel/travel*data.csv - * Incorrect: SELECT _ FROM integration.'travel/travel_data.csv' - _ Correct: SELECT \_ FROM integration.\`travel/travel_data.csv\` - diff --git a/mindsdb/integrations/handlers/dropbox_handler/__about__.py b/mindsdb/integrations/handlers/dropbox_handler/__about__.py deleted file mode 100644 index e94f3a3f061..00000000000 --- a/mindsdb/integrations/handlers/dropbox_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Dropbox handler" -__package_name__ = "mindsdb_dropbox_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Dropbox" -__author__ = "Ton Hoang Nguyen (Bill)" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2024 - mindsdb" diff --git a/mindsdb/integrations/handlers/dropbox_handler/__init__.py b/mindsdb/integrations/handlers/dropbox_handler/__init__.py deleted file mode 100644 index af04ac29db1..00000000000 --- a/mindsdb/integrations/handlers/dropbox_handler/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example - -try: - from .dropbox_handler import DropboxHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Dropbox" -name = "dropbox" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/dropbox_handler/connection_args.py b/mindsdb/integrations/handlers/dropbox_handler/connection_args.py deleted file mode 100644 index af6dffd40f0..00000000000 --- a/mindsdb/integrations/handlers/dropbox_handler/connection_args.py +++ /dev/null @@ -1,17 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - access_token={ - "type": ARG_TYPE.STR, - "description": " Dropbox Access Token", - "required": True, - "label": "Dropbox Access Token", - }, -) - -connection_args_example = OrderedDict( - access_token="ai.L-wqp3eP6r4cSWVklkKAdTNZ3VAuQjWuZMvIs1BzKvZNVW07rKbVNi5HbxvLc9q9D6qSfsf5VTsqYsNPGUkqSJBlpkr88gNboUNuhITmJG9mVw-Olniu4MO3BWVbEIphVxXxxxCd677Y", -) diff --git a/mindsdb/integrations/handlers/dropbox_handler/dropbox_handler.py b/mindsdb/integrations/handlers/dropbox_handler/dropbox_handler.py deleted file mode 100644 index e9f38db66d6..00000000000 --- a/mindsdb/integrations/handlers/dropbox_handler/dropbox_handler.py +++ /dev/null @@ -1,253 +0,0 @@ -import io -import pandas as pd -import dropbox - -from dropbox.exceptions import AuthError, ApiError, BadInputError -from typing import Dict, Optional, Text - -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.ast import Select, Identifier, Insert - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) - - -from mindsdb.integrations.libs.api_handler import APIHandler, APIResource - - -class ListFilesTable(APIResource): - - def list(self, conditions=None, limit=None, sort=None, targets=None, **kwargs): - files = self.handler._list_files() - data = [] - for file in files: - item = { - "path": file["path"], - "name": file["name"], - "extension": file["extension"], - } - data.append(item) - df = pd.DataFrame(data) - return df - - def get_columns(self): - return ["path", "name", "extension"] - - -class FileTable(APIResource): - - def _get_file_df(self): - try: - df = self.handler._read_file(self.table_name) - if df is None: - raise Exception(f"No such file found for the path: {self.dropbox_path}") - - return df - except Exception as e: - self.handler.logger.error(e) - - def list(self, conditions=None, limit=None, sort=None, targets=None, **kwargs): - return self._get_file_df() - - def get_columns(self): - df = self.handler._read_file(self.table_name) - return df.columns.tolist() - - def insert(self, query: Insert) -> None: - columns = [col.name for col in query.columns] - data = [dict(zip(columns, row)) for row in query.values] - df_new = pd.DataFrame(data) - df_existing = self._get_file_df() - df_combined = pd.concat([df_existing, df_new], ignore_index=True) - self.handler._write_file(self.table_name, df_combined) - - -class DropboxHandler(APIHandler): - - name = "dropbox" - supported_file_formats = ["csv", "tsv", "json", "parquet"] - - def __init__(self, name: Text, connection_data: Optional[Dict], **kwargs): - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - self.logger = log.getLogger(__name__) - self.dbx = None - self.is_connected = False - self._files_table = ListFilesTable(self) - self._register_table("files", self._files_table) - - def connect(self): - try: - if self.is_connected: - return - if "access_token" not in self.connection_data: - raise ValueError("Access token must be provided.") - self.dbx = dropbox.Dropbox(self.connection_data["access_token"]) - self.is_connected = True - self.logger.info( - f"Connected to Dropbox as {self.dbx.users_get_current_account().email}" - ) - except ValueError as e: - self.logger.error(f"Error connecting to Dropbox: {e}") - except AuthError as e: - self.logger.error(f"Authentication error with Dropbox: {e}") - except BadInputError as e: - self.logger.error(f"Bad input error with Dropbox: {e}") - except Exception as e: - self.logger.error(f"Error with Dropbox: {e}") - - def check_connection(self) -> StatusResponse: - response = StatusResponse(False) - try: - self.connect() - response.success = True - except (ApiError, ValueError) as e: - self.logger.error(f"Error connecting to Dropbox with Dropbox: {e}") - response.error_message = str(e) - except AuthError as e: - self.logger.error(f"Authentication error with Dropbox: {e}") - response.error_message = str(e) - except Exception as e: - self.logger.error(f"Error with Dropbox Handler: {e}") - response.error_message = str(e) - return response - - def disconnect(self): - if not self.is_connected: - return - self.dbx = None - self.is_connected = False - self.logger.info("Disconnected from Dropbox") - - def _read_as_content(self, file_path) -> None: - """ - Read files as content - """ - try: - _, res = self.dbx.files_download(file_path) - content = res.content - return content - except ApiError as e: - self.logger.error(f"Error when downloading a file from Dropbox: {e}") - - def query(self, query: ASTNode) -> Response: - - if isinstance(query, Select): - table_name = query.from_table.parts[-1] - if table_name == "files": - table = self._files_table - df = table.select(query) - - # add content - has_content = False - for target in query.targets: - if ( - isinstance(target, Identifier) - and target.parts[-1].lower() == "content" - ): - has_content = True - break - if has_content: - df["content"] = df["path"].apply(self._read_as_content) - else: - table = FileTable(self, table_name=table_name) - df = table.select(query) - - return Response(RESPONSE_TYPE.TABLE, data_frame=df) - elif isinstance(query, Insert): - table_name = query.table.parts[-1] - table = FileTable(self, table_name=table_name) - table.insert(query) - return Response(RESPONSE_TYPE.OK) - else: - raise NotImplementedError( - "Only SELECT and INSERT operations are supported." - ) - - def get_tables(self) -> Response: - table_names = list(self._tables.keys()) - df = pd.DataFrame(table_names, columns=["table_name"]) - return Response(RESPONSE_TYPE.TABLE, data_frame=df) - - def get_columns(self, table_name: str) -> Response: - table = self._get_table(Identifier(table_name)) - columns = table.get_columns() - df = pd.DataFrame(columns, columns=["column_name"]) - return Response(RESPONSE_TYPE.TABLE, data_frame=df) - - def _list_files(self, path=""): - files = [] - result = self.dbx.files_list_folder(path, recursive=True) - files.extend(self._process_entries(result.entries)) - while result.has_more: - result = self.dbx.files_list_folder_continue(result.cursor) - files.extend(self._process_entries(result.entries)) - return files - - def _process_entries(self, entries): - files = [] - for entry in entries: - if isinstance(entry, dropbox.files.FileMetadata): - extension = entry.name.split(".")[-1].lower() - if extension in self.supported_file_formats: - files.append( - { - "path": entry.path_lower, - "name": entry.name, - "extension": extension, - } - ) - return files - - def _read_file(self, path) -> pd.DataFrame: - try: - _, res = self.dbx.files_download(path) - content = res.content - extension = path.split(".")[-1].lower() - if extension == "csv": - df = pd.read_csv(io.BytesIO(content)) - elif extension == "tsv": - df = pd.read_csv(io.BytesIO(content), sep="\t") - elif extension == "json": - df = pd.read_json(io.BytesIO(content)) - elif extension == "parquet": - df = pd.read_parquet(io.BytesIO(content)) - else: - raise ValueError(f"Unsupported file format: {extension}") - return df - except ValueError as e: - self.logger.error(f"Error with file extension: {e}") - except ApiError as e: - self.logger.error(f"Error when downloading a file from Dropbox: {e}") - except Exception as e: - self.logger.error(f"Error with Dropbox Handler: {e}") - - def _write_file(self, path, df: pd.DataFrame): - try: - extension = path.split(".")[-1].lower() - buffer = io.BytesIO() - if extension == "csv": - df.to_csv(buffer, index=False) - elif extension == "tsv": - df.to_csv(buffer, index=False, sep="\t") - elif extension == "json": - df.to_json(buffer, orient="records") - elif extension == "parquet": - df.to_parquet(buffer, index=False) - else: - raise ValueError(f"Unsupported file format: {extension}") - buffer.seek(0) - self.dbx.files_upload( - buffer.read(), path, mode=dropbox.files.WriteMode.overwrite - ) - except ValueError as e: - self.logger.error(f"Error with file extension: {e}") - except ApiError as e: - self.logger.error(f"Error when writing a file to Dropbox: {e}") - except Exception as e: - self.logger.error(f"Error with Dropbox Handler: {e}") diff --git a/mindsdb/integrations/handlers/dropbox_handler/icon.svg b/mindsdb/integrations/handlers/dropbox_handler/icon.svg deleted file mode 100644 index 84ac860264f..00000000000 --- a/mindsdb/integrations/handlers/dropbox_handler/icon.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - diff --git a/mindsdb/integrations/handlers/dropbox_handler/requirements.txt b/mindsdb/integrations/handlers/dropbox_handler/requirements.txt deleted file mode 100644 index 8b8d713ec6b..00000000000 --- a/mindsdb/integrations/handlers/dropbox_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -dropbox -urllib3>=2.2.2 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/mindsdb/integrations/handlers/dropbox_handler/tests/__init__.py b/mindsdb/integrations/handlers/dropbox_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/dropbox_handler/tests/test_dropbox_handler.py b/mindsdb/integrations/handlers/dropbox_handler/tests/test_dropbox_handler.py deleted file mode 100644 index 145547af284..00000000000 --- a/mindsdb/integrations/handlers/dropbox_handler/tests/test_dropbox_handler.py +++ /dev/null @@ -1,18 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.dropbox_handler.dropbox_handler import DropboxHandler - - -class DropboxHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "access_token": "ai.L-wqp3eP6r4cSWVklkKAdTNZ3VAuQjWuZMvIs1BzKvZNVW07rKbVNi5HbxvLc9q9D6qSfsf5VTsqYsNPGUkqSJBlpkr88gNboUNuhITmJG9mVw-Olniu4MO3BWVbEIphVxXxxxCd677Y", - } - cls.handler = DropboxHandler("test_dropbox_handler", cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/druid_handler/README.md b/mindsdb/integrations/handlers/druid_handler/README.md deleted file mode 100644 index d8f7de767ce..00000000000 --- a/mindsdb/integrations/handlers/druid_handler/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# Apache Druid Handler - -This is the implementation of the Apache Druid handler for MindsDB. - -## Apache Druid -Apache Druid is a real-time analytics database designed for fast slice-and-dice analytics ("OLAP" queries) on large data sets. Most often, Druid powers use cases where real-time ingestion, fast query performance, and high uptime are important. -
-https://druid.apache.org/docs/latest/design - -## Implementation -This handler was implemented using the `pydruid` library, the Python API for Apache Druid. - -The required arguments to establish a connection are, -* `host`: the host name or IP address of Apache Druid. -* `port`: the port that Apache Druid is running on. -* `path`: the query path. -* `scheme`: the URI schema. This parameter is optional and the default will be http. -* `user`: the username used to authenticate with Apache Druid. This parameter is optional. -* `password`: the password used to authenticate with Apache Druid. This parameter is optional. - -## Usage -In order to make use of this handler and connect to Apache Druid in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE druid_datasource -WITH -engine='druid', -parameters={ - "host": "localhost", - "port": 8888, - "path": "/druid/v2/sql/", - "scheme": "http" -}; -~~~~ - -Now, you can use this established connection to query your data source as follows, -~~~~sql -SELECT * FROM druid_datasource.example_tbl -~~~~ \ No newline at end of file diff --git a/mindsdb/integrations/handlers/druid_handler/__about__.py b/mindsdb/integrations/handlers/druid_handler/__about__.py deleted file mode 100644 index 02affacacd9..00000000000 --- a/mindsdb/integrations/handlers/druid_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Apache Druid handler' -__package_name__ = 'mindsdb_druid_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Apache Druid" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/druid_handler/__init__.py b/mindsdb/integrations/handlers/druid_handler/__init__.py deleted file mode 100644 index edb24dd45eb..00000000000 --- a/mindsdb/integrations/handlers/druid_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .druid_handler import DruidHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Apache Druid' -name = 'druid' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/druid_handler/connection_args.py b/mindsdb/integrations/handlers/druid_handler/connection_args.py deleted file mode 100644 index e0dd66ccea6..00000000000 --- a/mindsdb/integrations/handlers/druid_handler/connection_args.py +++ /dev/null @@ -1,51 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of Apache Druid.', - 'required': True, - 'label': 'Host' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The port that Apache Druid is running on.', - 'required': True, - 'label': 'Port' - }, - path={ - 'type': ARG_TYPE.STR, - 'description': 'The query path.', - 'required': True, - 'label': 'path' - }, - scheme={ - 'type': ARG_TYPE.STR, - 'description': 'The URI schema. This parameter is optional and the default will be http.', - 'required': False, - 'label': 'Scheme' - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with Apache Druid. This parameter is optional.', - 'required': False, - 'label': 'User' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password used to authenticate with Apache Druid. This parameter is optional.', - 'required': False, - 'label': 'password', - 'secret': True - } -) - -connection_args_example = OrderedDict( - host='localhost', - port=8888, - path='/druid/v2/sql/', - scheme='http' -) diff --git a/mindsdb/integrations/handlers/druid_handler/druid_handler.py b/mindsdb/integrations/handlers/druid_handler/druid_handler.py deleted file mode 100644 index 807cbafb565..00000000000 --- a/mindsdb/integrations/handlers/druid_handler/druid_handler.py +++ /dev/null @@ -1,208 +0,0 @@ -from typing import Optional - -import pandas as pd -from pydruid.db import connect - -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.base import DatabaseHandler -from pydruid.db.sqlalchemy import DruidDialect - -from mindsdb_sql_parser import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) - -logger = log.getLogger(__name__) - - -class DruidHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Apache Druid statements. - """ - - name = "druid" - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = "druid" - - optional_parameters = ["user", "password"] - for parameter in optional_parameters: - if parameter not in connection_data: - connection_data[parameter] = None - - if "path" not in connection_data: - connection_data["path"] = "/druid/v2/sql/" - - if "scheme" not in connection_data: - connection_data["scheme"] = "http" - - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - if self.is_connected is True: - return self.connection - - self.connection = connect( - host=self.connection_data["host"], - port=self.connection_data["port"], - path=self.connection_data["path"], - scheme=self.connection_data["scheme"], - user=self.connection_data["user"], - password=self.connection_data["password"], - ) - self.is_connected = True - - return self.connection - - def disconnect(self): - """ - Close any existing connections. - """ - - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return self.is_connected - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - conn = self.connect() - conn.cursor().execute("select 1") # raise exception if provided wrong credentials - - response.success = True - except Exception as e: - logger.error(f"Error connecting to Druid, {e}!") - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - cursor = connection.cursor() - - try: - cursor.execute(query) - result = cursor.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(result, columns=[x[0] for x in cursor.description]) - ) - else: - connection.commit() - response = Response(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f"Error running query: {query} on Pinot!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - cursor.close() - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - renderer = SqlalchemyRender(DruidDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - - query = """ - SELECT - TABLE_SCHEMA AS table_schema, - TABLE_NAME AS table_name, - TABLE_TYPE AS table_type - FROM INFORMATION_SCHEMA.TABLES - WHERE TABLE_SCHEMA not in ('INFORMATION_SCHEMA', 'sys') - """ - result = self.native_query(query) - - return result - - def get_columns(self, table_name: str, schema_name: Optional[str] = None) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - if schema_name is None: - schema_name = "druid" - query = f""" - SELECT - COLUMN_NAME FIELD, - DATA_TYPE TYPE - FROM INFORMATION_SCHEMA.COLUMNS - WHERE "TABLE_SCHEMA" = '{schema_name}' AND "TABLE_NAME" = '{table_name}' - """ - result = self.native_query(query) - - return result diff --git a/mindsdb/integrations/handlers/druid_handler/icon.svg b/mindsdb/integrations/handlers/druid_handler/icon.svg deleted file mode 100644 index 9058f59499f..00000000000 --- a/mindsdb/integrations/handlers/druid_handler/icon.svg +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/druid_handler/requirements.txt b/mindsdb/integrations/handlers/druid_handler/requirements.txt deleted file mode 100644 index 2e5db8e2fb8..00000000000 --- a/mindsdb/integrations/handlers/druid_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pydruid diff --git a/mindsdb/integrations/handlers/druid_handler/tests/__init__.py b/mindsdb/integrations/handlers/druid_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/druid_handler/tests/test_druid_handler.py b/mindsdb/integrations/handlers/druid_handler/tests/test_druid_handler.py deleted file mode 100644 index 7f4cd18db47..00000000000 --- a/mindsdb/integrations/handlers/druid_handler/tests/test_druid_handler.py +++ /dev/null @@ -1,35 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.druid_handler.druid_handler import DruidHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class DruidHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": "localhost", - "port": 8888, - "path": "/druid/v2/sql/", - "scheme": "http" - } - cls.handler = DruidHandler('test_druid_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM wikipedia" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_get_columns(self): - columns = self.handler.get_columns('wikipedia') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/README.md b/mindsdb/integrations/handlers/duckdb_faiss_handler/README.md index 36c7c455a99..3ba7f070627 100644 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/README.md +++ b/mindsdb/integrations/handlers/duckdb_faiss_handler/README.md @@ -1,78 +1,169 @@ # DuckDB + Faiss Handler +## Using duckdb_faiss handler + This handler combines DuckDB for metadata storage and SQL filtering with Faiss for high-performance vector similarity search. -## Features -- **DuckDB**: Store metadata, content, and IDs with full SQL filtering capabilities -- **Faiss**: High-speed vector indexing and similarity search (CPU/GPU support) -- **Hybrid Search**: Combine metadata filtering with vector similarity search -- **Persistence**: Automatic persistence via MindsDB's handler storage system +### 1. Create a FAISS Database and Knowledge Base -## Configuration +`duckdb_faiss` handler is installed by default with mindsdb. When the `storage` parameter is not specified it creates default vector storage. It can be: +- pgvector (if the KB_PGVECTOR_URL env variable is defined) +- otherwise, a duckdb_faiss database will be created by default -### Connection Parameters +Create knowledge base with default vector db: +``` +CREATE KNOWLEDGE BASE kb_animals +USING + embedding_model = {"provider": "openai", "model_name": "text-embedding-3-small"}; +``` -- `metric`: Distance metric - "cosine" or "l2" (default: "cosine") -- `backend`: Faiss backend - "ivf", "flat", "hnsw" (default: "hnsw") -- `use_gpu`: Enable GPU acceleration (default: False) -- `nlist`: IVF parameter for clustering (default: 1024) -- `nprobe`: IVF search parameter (default: 32) -- `hnsw_m`: HNSW connectivity parameter (default: 32) -- `hnsw_ef_search`: HNSW search parameter (default: 64) -- `persist_directory`: Optional custom storage path +You can create your own duckdb_faiss database manually as well: + +```sql +CREATE DATABASE mindsdb_faiss +WITH ENGINE = 'duckdb_faiss', +PARAMETERS = { + "persist_directory": "/data/faiss_db_location", + "metric": "ip", + "use_gpu": false, + "nlist": 10, + "nprobe": 2 +} +``` + +And use in knowledge base: +```sql +CREATE KNOWLEDGE BASE kb_animals +USING + storage = mindsdb_faiss.animals_table, + embedding_model = {"provider": "openai", "model_name": "text-embedding-3-small"}; +``` -## Usage +Parameters for duckdb_faiss database: +- `persist_directory`: Optional, custom storage path. If not set - a handler storage will be used +- `metric`: Optional, distance metric - possible values: cosine/ip/l1/l2. Default is "cosine" +- `use_gpu`: Optional, enable GPU acceleration (default: False) +- `nlist`: Optional, IVF parameter for clustering. Used as default value in create IVF index. Default is 1024 +- `nprobe`: Optional, controls the number of clusters to search during a query. Default is 1 -### Create Database Connection +### 2. Insert data + +The same as for other vector storages, insert from select or from values: ```sql -CREATE DATABASE faiss_db -WITH - ENGINE = 'duckdb_faiss', - PARAMETERS = {}; +INSERT INTO kb_animals (id, content, legs) +VALUES (1, 'duck', 2), (2, 'cat', 4); ``` -### Create knowledge base +### 3. Querying the Knowledge Base + +**Vector similarity search** +```sql +SELECT * FROM kb_animals +WHERE content = 'cat' AND distance < 0.5; +``` +**Mixed search** ```sql -create knowledge base kb_faiss -using storage = faiss_db.kb_faiss, -embedding_model={"provider": "openai", "model_name": "text-embedding-3-small"}, -metadata_columns=["title", "category"]; +SELECT * FROM kb_animals +WHERE content = 'cat' AND legs = 4; ``` +Supported `LIKE`, `NOT LIKE`, `>`, `>=`, `<`, `<=` filters for metadata columns. -### Insert Data +**Hybrid search** ```sql -INSERT INTO kb_faiss (id, content, metadata, title, category, embeddings) -VALUES - ('doc1', 'This is a news article about technology', 'Tech News', 'news'), - ('doc2', 'A scientific paper about AI research', 'AI Research', 'science'), - ('doc3', 'Business update on market trends', 'Market Update', 'business'); +SELECT * FROM kb_animals +WHERE content = 'cat' AND legs = 4 + AND hybrid_search = TRUE; ``` -### Vector Search +Can be used with bool `hybrid_search` or float `hybrid_search_alpha` parameters + + +## 4. Create FAISS Indexes + +When a new duckdb_faiss is created, it starts from using [flat FAISS index](https://faiss.ai/cpp_api/struct/structfaiss_1_1IndexFlat.html). It works by scanning all index file to get similar vectors. Also a flat index is located in RAM, and its size is restricted by available memory. +To speed up vector search you can convert to other type of indexes. Available options: +- ivf - [Inverted File](https://faiss.ai/cpp_api/struct/structfaiss_1_1IndexIVF.html). It is also located in memory, but faster than FLAT +- ivf_file, the same as ivf, but located on disk and doesn't require being loaded into RAM. This type of index isn't supported on Windows. + +Important: It is not possible to create an index for an empty FAISS knowledge base because both types of indexes require data in the knowledge base before creating it. The loaded data is used to train the index. The size of the training data and the number of clusters can affect index quality. +Query: ```sql --- Vector similarity search -SELECT * FROM kb_faiss -WHERE content = 'paper' and distance < 0.5 -LIMIT 10; - --- With metadata search -SELECT * FROM kb_faiss -WHERE content = 'paper' and category = 'news' -LIMIT 10; - --- Hybrid search (keyword + vector) -SELECT * FROM kb_faiss -WHERE content = 'paper' and category = 'news' and hybrid_search=true -LIMIT 10; +CREATE INDEX ON KNOWLEDGE_BASE kb_animals +WITH ( + type = 'ivf_file', + nlist = 100, + train_count = 10000 +); ``` -### Delete document +Parameters: +- `type` - optional, default is ivf_file + - for windows default is the 'ivf' +- `nlist` optional, number of clusters for IVF, default 1024, +- `train_count` optional, number of vectors to use for training, default is calculated from nlist. + + +## Implementation details + +### How it works + +When a duckdb_faiss table is created, the handler creates a folder for it. It contains: +- duckdb.db - a duckdb database to store metadata for knowledge base +- faiss_index - faiss index file +Folder name - is a table name + +The other files in folders in faiss table: +- duckdb.db* - all files related to duckdb (duckdb.db.wal) +- faiss_index* - all files related faiss index (partitions, merged index for ivf_file) +- dump/ - temporal folder for extracted vectors +- recover/ - temporal folder for index backup + +### Locks and concurrency + +Because IVF and FLAT indexes are loaded in RAM and the disk copy is used only to store changes in the index (insert/delete records), small indexes are unloaded from RAM after each request and loaded again before the next request. + +When the index becomes large the read time increases, so the index is cached in RAM and locked to prevent using it in different processes or threads. If mindsdb is used from different threads or processes, an `index file locked` exception might appear. The lock is released when the handler cache is cleared (default timeout is 1 min). + +Because insert-from-select into the knowledge base is performed in the background, the background process can't use the FAISS index if it is locked by a GUI. The implemented workaround is: +- before the query is sent into background + - search all locks for vector bases of KBs in the query and unload the FAISS database from cache +- after executing query in background + - do the same (unload the FAISS database from cache) + +Locks also prevent inserting into the knowledge base using threads. This query won't work: ```sql -DELETE FROM kb_faiss -WHERE id = 'doc2'; +INSERT INTO my_kb SELECT * FROM db1.table1 +USING threads=10 ``` + + +Important: The FAISS index isn't locked on Windows; the FAISS library can write to a locked file there. + +### Checking resources + +**RAM** +For indexes located in RAM, when data is inserted into the FAISS index it forecasts the required memory and does not allow the insert if it exceeds available memory. +This check is run after every 10k records inserted. + +**disk** +When an index is created, it requires two to three times more disk space (depending on the index type). The free disk space is also checked before starting to create the index. +What occupies disk: +- an old faiss_index file (its backup) +- fetched vectors from old index +- a new index + +### Keyword search + +Implemented by using duckdb [fts extension](https://duckdb.org/docs/stable/core_extensions/full_text_search#match_bm25-function) +When keyword search is used and FTS index doesn't existβ€”it is created. This index is removed when any record is inserted into KB (because FTS index isn't updated after inserts in DuckDB). + +### Mixed search optimizations +For queries that mix vectors and rich metadata: +- The handler estimates metadata selectivity (`COUNT(*) WHERE `) to choose the best execution plan. +- **Vector-first strategy** fetches an expanding set of candidates from FAISS until enough records satisfy the metadata filters. +- **Metadata-first strategy** constrains candidate IDs via DuckDB before scoring them in FAISS batches (`META_BATCH = 10,000`). diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/__init__.py b/mindsdb/integrations/handlers/duckdb_faiss_handler/__init__.py index bf1cd12b782..fd2bd37f864 100644 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/__init__.py +++ b/mindsdb/integrations/handlers/duckdb_faiss_handler/__init__.py @@ -1,4 +1,4 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE +from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL, HANDLER_TYPE from .__about__ import __description__ as description from .__about__ import __version__ as version @@ -16,6 +16,7 @@ name = "duckdb_faiss" type = HANDLER_TYPE.DATA icon_path = "icon.svg" +support_level = HANDLER_SUPPORT_LEVEL.MINDSDB __all__ = [ "Handler", @@ -24,6 +25,7 @@ "type", "title", "description", + "support_level", "connection_args", "connection_args_example", "import_error", diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/duckdb_faiss_handler.py b/mindsdb/integrations/handlers/duckdb_faiss_handler/duckdb_faiss_handler.py index dc536a6430f..22153163ae8 100644 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/duckdb_faiss_handler.py +++ b/mindsdb/integrations/handlers/duckdb_faiss_handler/duckdb_faiss_handler.py @@ -1,21 +1,15 @@ import os -from typing import List +import re +import shutil +import threading +import time +from contextlib import contextmanager +from dataclasses import dataclass +from pathlib import Path +from typing import List, Iterator import pandas as pd -import orjson -import duckdb -from mindsdb_sql_parser.ast import ( - Select, - Delete, - Identifier, - BinaryOperation, - Constant, - NullConstant, - Star, - Tuple as AstTuple, - Function, - TypeCast, -) + from mindsdb.integrations.libs.response import ( RESPONSE_TYPE, @@ -25,7 +19,6 @@ from mindsdb.integrations.libs.vectordatabase_handler import ( FilterCondition, VectorStoreHandler, - FilterOperator, ) from mindsdb.integrations.libs.keyword_search_base import KeywordSearchBase from mindsdb.integrations.utilities.sql_utils import KeywordSearchArgs @@ -33,11 +26,21 @@ from mindsdb.utilities import log from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from .faiss_index import FaissIVFIndex +from .duckdb_faiss_table import DuckDBFaissTable logger = log.getLogger(__name__) +TABLE_CACHE_TTL_SECONDS = 60 + + +@dataclass +class TableCacheEntry: + table: DuckDBFaissTable + last_used_ts: float + in_use_count: int = 0 + + class DuckDBFaissHandler(VectorStoreHandler, KeywordSearchBase): """This handler handles connection and execution of DuckDB with Faiss vector indexing.""" @@ -61,173 +64,206 @@ def __init__(self, name: str, **kwargs): raise ValueError(f"Persist directory {self.persist_directory} does not exist") else: # Use default handler storage - self.persist_directory = self.handler_storage.folder_get("data") + self.persist_directory = self.handler_storage.folder_get("") self._use_handler_storage = True - # DuckDB connection - self.connection = None - self.is_connected = False - - # Initialize storage paths - self.duckdb_path = os.path.join(self.persist_directory, "duckdb.db") - self.faiss_index_path = os.path.join(self.persist_directory, "faiss_index") - self.connect() - - # check keyword index - self.is_kw_index_enabled = False - with self.connection.cursor() as cur: - # check index exists - df = cur.execute( - "SELECT * FROM information_schema.schemata WHERE schema_name = 'fts_main_meta_data'" - ).fetchdf() - if len(df) > 0: - self.is_kw_index_enabled = True - - def connect(self) -> duckdb.DuckDBPyConnection: - """Connect to DuckDB database.""" - if self.is_connected: - return self.connection + Path(self.persist_directory).mkdir(parents=True, exist_ok=True) - try: - self.connection = duckdb.connect(self.duckdb_path) - self.faiss_index = FaissIVFIndex(self.faiss_index_path, self.connection_data) - self.is_connected = True + self.tables_cache = {} + self.tables_cache_lock = threading.Lock() - logger.info("Connected to DuckDB database") - return self.connection + def connect(self): + """ + Handler readiness check. + Must not open long-lived DuckDB/FAISS resources; tables are opened per operation. + """ - except Exception as e: - logger.error(f"Error connecting to DuckDB: {e}") - raise + self.is_connected = True + return True def disconnect(self): - """Close DuckDB connection.""" - if self.is_connected and self.connection: - self.connection.close() - self.faiss_index.close() - self.is_connected = False - - def create_table(self, table_name: str, if_not_exists=True): - with self.connection.cursor() as cur: - cur.execute("CREATE SEQUENCE IF NOT EXISTS faiss_id_sequence START 1") - - cur.execute(""" - CREATE TABLE IF NOT EXISTS meta_data ( - faiss_id INTEGER PRIMARY KEY DEFAULT nextval('faiss_id_sequence'), -- id in FAISS index - id TEXT NOT NULL, -- chunk id - content TEXT, - metadata JSON - ) - """) - - def drop_table(self, table_name: str, if_exists=True): - """Drop table from both DuckDB and Faiss.""" - with self.connection.cursor() as cur: - drop_sql = f"DROP TABLE {'IF EXISTS' if if_exists else ''} meta_data" - cur.execute(drop_sql) + with self.tables_cache_lock: + for item in self.tables_cache.values(): + item.table.close() - if self.faiss_index: - self.faiss_index.drop() + self.tables_cache = {} - def create_index(self, table_name: str, type: str = "ivf", nlist: int = 1024, train_count: int = 10000): - if type != "ivf": - raise NotImplementedError("Only ivf index is supported") - - self.faiss_index.create_index(nlist=nlist, train_count=train_count) + def check_connection(self) -> Response: + """Check the connection to the database.""" + try: + if not self.is_connected: + self.connect() + return StatusResponse(RESPONSE_TYPE.OK) + except Exception as e: + logger.error(f"Connection check failed: {e}") + return StatusResponse(RESPONSE_TYPE.ERROR, error_message=str(e)) - def insert(self, table_name: str, data: pd.DataFrame): - """Insert data into both DuckDB and Faiss.""" + def __del__(self): + """Cleanup on deletion.""" + self.disconnect() + + # -- manage tables -- + + @staticmethod + def _validate_table_name(table_name: str) -> None: + if table_name in (".", ".."): + raise ValueError("Invalid table_name") + if "/" in table_name or "\\" in table_name: + raise ValueError("table_name must not contain path separators") + if not re.fullmatch(r"[A-Za-z0-9_-]+", table_name): + raise ValueError( + "Invalid table_name: only letters, digits, '_' and '-' are allowed (no spaces, dots, or other symbols)" + ) - if self.is_kw_index_enabled: - # drop index, it will be created before a first keyword search - self.drop_kw_index() + def get_table_dir(self, table_name: str) -> Path: + """ + Get folder for a table name + Prevent path traversal by requiring the resolved path to stay within persist_directory. + """ + root = Path(self.persist_directory).resolve() + table_dir = (Path(self.persist_directory) / table_name).resolve() + if table_dir == root or root not in table_dir.parents: + raise ValueError("Invalid table_name path") + return table_dir + + def _close_cached_table(self, table_name: str) -> None: + entry = self.tables_cache.pop(table_name, None) + if entry is None: + return + try: + entry.table.close() + except Exception: + logger.exception("Failed to close cached table '%s'", table_name) - with self.connection.cursor() as cur: - df_ids = cur.execute(""" - insert into meta_data (id, content, metadata) ( - select id, content, metadata from data - ) - RETURNING faiss_id, id - """).fetchdf() + def _close_old_tables_cache(self): + """ + Close stale cached tables that have not been used for more than TTL. + Tables that are currently in use are never closed by pruning. + """ + if not self.tables_cache: + return + + with self.tables_cache_lock: + now_ts = time.time() + to_close: List[str] = [] + for table_name, entry in self.tables_cache.items(): + if entry.in_use_count > 0: + continue + if now_ts - entry.last_used_ts > TABLE_CACHE_TTL_SECONDS: + to_close.append(table_name) + + for table_name in to_close: + self._close_cached_table(table_name) + + @contextmanager + def open_table(self, table_name: str) -> Iterator[DuckDBFaissTable]: + """ + Open DuckDB and Faiss resources scoped to one vector table. + Must always be closed after use to avoid long-lived locks / RAM usage. - data = data.merge(df_ids, on="id") + If `use_cache=True` and `table.cache_required` is True, the opened table is cached + in `self.tables_cache` and re-used across calls. Cached tables are pruned if they + haven't been used for more than TABLE_CACHE_TTL_SECONDS. + """ + table_dir = self.get_table_dir(table_name) + if not table_dir.exists(): + raise ValueError(f"Table '{table_name}' does not exist") - vectors = data["embeddings"] - ids = data["faiss_id"] + with self.tables_cache_lock: + entry = self.tables_cache.get(table_name) - self.faiss_index.insert(list(vectors), list(ids)) - self._sync() + if entry is not None: + table = entry.table + else: + table = DuckDBFaissTable(table_name=table_name, table_dir=table_dir, handler=self).open() - # def upsert(self, table_name: str, data: pd.DataFrame): - # # delete by ids and insert - # ids = list(data['id']) - # self.delete(table_name, [FilterCondition(column='id', op=FilterOperator.IN, value=ids)]) - # self.insert(table_name, data) + if table.cache_required: + entry = TableCacheEntry(table=table, last_used_ts=time.time()) + self.tables_cache[table_name] = entry - def select( - self, - table_name: str, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - ) -> pd.DataFrame: - """Select data with hybrid search logic.""" - - vector_filter = None - meta_filters = [] - if conditions is None: - conditions = [] - for condition in conditions: - if condition.column == "embeddings": - vector_filter = condition + try: + if entry: + with self.tables_cache_lock: + entry.in_use_count += 1 + + yield table + finally: + if entry: + entry.in_use_count -= 1 + entry.last_used_ts = time.time() else: - meta_filters.append(condition) + table.close() - if vector_filter is None: - # If only metadata in filter: - # query duckdb only - return self._select_from_metadata(meta_filters=meta_filters, limit=limit).drop("faiss_id", axis=1) + self._close_old_tables_cache() - # vector_filter is not None - if not meta_filters: - # If only content in filter: query faiss and attach to metadata - return self._select_with_vector(vector_filter=vector_filter, limit=limit) + def create_table(self, table_name: str, if_not_exists=True): + self._validate_table_name(table_name) + table_dir = self.get_table_dir(table_name) + if table_dir.exists() and not if_not_exists: + raise ValueError(f"Vector table '{table_name}' already exists") + table_dir.mkdir(parents=True, exist_ok=True) + + with self.open_table(table_name) as table: + with table.connection.cursor() as cur: + cur.execute("CREATE SEQUENCE IF NOT EXISTS faiss_id_sequence START 1") + cur.execute(""" + CREATE TABLE IF NOT EXISTS meta_data ( + faiss_id INTEGER PRIMARY KEY DEFAULT nextval('faiss_id_sequence'), -- id in FAISS index + id TEXT NOT NULL, -- chunk id + content TEXT, + metadata JSON + ) + """) - """ - If metadata + content: - Query faiss, use limit = 1000 - Query duckdb with `id in (...)` - If count of results is less than input LIMIT value - Repeat the search with increased limit value - Limit value for step = 1000 * 5^i (1000, 2000, 25000, 125000 …) - """ + def drop_table(self, table_name: str, if_exists=True): + """Drop table from both DuckDB and Faiss.""" + table_dir = self.get_table_dir(table_name) - df = pd.DataFrame() + if not table_dir.exists(): + if if_exists: + return + raise ValueError(f"Vector table '{table_name}' does not exist") - total_size = self.get_total_size() + with self.tables_cache_lock: + self._close_cached_table(table_name) - for i in range(10): - batch_size = 1000 * 5**i + shutil.rmtree(table_dir, ignore_errors=False) - # TODO implement reverse search: - # if batch_size > 25% of db: search metadata first and then in faiss by list of ids + if self._use_handler_storage: + self.handler_storage.folder_sync(table_name) - df = self._select_with_vector(vector_filter=vector_filter, meta_filters=meta_filters, limit=batch_size) - if batch_size >= total_size or len(df) >= limit: - break + def get_tables(self) -> Response: + """Get list of tables.""" + rows = [] + root = Path(self.persist_directory) + if root.exists(): + for item in root.iterdir(): + if not item.is_dir(): + continue + rows.append({"table_name": item.name}) + df = pd.DataFrame(rows, columns=["table_name"]) + return Response(RESPONSE_TYPE.TABLE, data_frame=df) - return df[:limit] + # -- table methods -- - def create_kw_index(self): - with self.connection.cursor() as cur: - cur.execute("PRAGMA create_fts_index('meta_data', 'id', 'content')") - self.is_kw_index_enabled = True + def create_index(self, table_name: str, type: str = None, nlist: int = None, train_count: int = None): + with self.open_table(table_name) as table: + table.create_index(type=type, nlist=nlist, train_count=train_count) - def drop_kw_index(self): - with self.connection.cursor() as cur: - cur.execute("pragma drop_fts_index('meta_data')") - self.is_kw_index_enabled = False + def insert(self, table_name: str, data: pd.DataFrame): + with self.open_table(table_name) as table: + table.insert(data) + + def select( + self, + table_name: str, + columns: List[str] = None, + conditions: List[FilterCondition] = None, + offset: int = None, + limit: int = None, + ) -> pd.DataFrame: + with self.open_table(table_name) as table: + return table.select(conditions=conditions, offset=offset, limit=limit) def keyword_select( self, @@ -238,229 +274,20 @@ def keyword_select( limit: int = None, keyword_search_args: KeywordSearchArgs = None, ) -> pd.DataFrame: - if not self.is_kw_index_enabled: - # keyword search is used for first time: create index - self.create_kw_index() - - with self.connection.cursor() as cur: - where_clause = self._translate_filters(conditions) - - score = Function( - namespace="fts_main_meta_data", - op="match_bm25", - args=[ - Identifier("id"), - Constant(keyword_search_args.query), - BinaryOperation(op=":=", args=[Identifier("fields"), Constant(keyword_search_args.column)]), - ], + with self.open_table(table_name) as table: + return table.keyword_select( + conditions=conditions, + offset=offset, + limit=limit, + keyword_search_args=keyword_search_args, ) - no_emtpy_score = BinaryOperation(op="is not", args=[score, NullConstant()]) - if where_clause: - where_clause = BinaryOperation(op="and", args=[where_clause, no_emtpy_score]) - else: - where_clause = no_emtpy_score - - query = Select( - targets=[Star(), BinaryOperation(op="-", args=[Constant(1), score], alias=Identifier("distance"))], - from_table=Identifier("meta_data"), - where=where_clause, - ) - - sql = self.renderer.get_string(query, with_failback=True) - cur.execute(sql) - df = cur.fetchdf() - df["metadata"] = df["metadata"].apply(orjson.loads) - return df - - def get_total_size(self): - with self.connection.cursor() as cur: - cur.execute("select count(1) size from meta_data") - df = cur.fetchdf() - return df["size"].iloc[0] - - def _select_with_vector(self, vector_filter: FilterCondition, meta_filters=None, limit=None) -> pd.DataFrame: - embedding = vector_filter.value - if isinstance(embedding, str): - embedding = orjson.loads(embedding) - - distances, faiss_ids = self.faiss_index.search(embedding, limit or 100) - - # Fetch full data from DuckDB - if len(faiss_ids) > 0: - # ids = [str(idx) for idx in faiss_ids] - meta_df = self._select_from_metadata(faiss_ids=faiss_ids, meta_filters=meta_filters) - vector_df = pd.DataFrame({"faiss_id": faiss_ids, "distance": distances}) - return vector_df.merge(meta_df, on="faiss_id").drop("faiss_id", axis=1).sort_values(by="distance") - - return pd.DataFrame([], columns=["id", "content", "metadata", "distance"]) - - def _select_from_metadata(self, faiss_ids=None, meta_filters=None, limit=None): - query = Select( - targets=[Star()], - from_table=Identifier("meta_data"), - ) - - where_clause = self._translate_filters(meta_filters) - - if faiss_ids: - # TODO what if ids list is too long - split search into batches - in_filter = BinaryOperation( - op="IN", args=[Identifier("faiss_id"), AstTuple([Constant(i) for i in faiss_ids])] - ) - # split into chunks - chunk_size = 10000 - if len(faiss_ids) > chunk_size: - dfs = [] - chunk = 0 - total = 0 - while chunk * chunk_size < len(faiss_ids): - # create results with partition - ids = faiss_ids[chunk * chunk_size : (chunk + 1) * chunk_size] - chunk += 1 - df = self._select_from_metadata(faiss_ids=ids, meta_filters=meta_filters, limit=limit) - total += len(df) - if limit is not None and limit <= total: - # cut the extra from the end - df = df[: -(total - limit)] - dfs.append(df) - break - if len(df) > 0: - dfs.append(df) - if len(dfs) == 0: - return pd.DataFrame([], columns=["faiss_id", "id", "content", "metadata"]) - return pd.concat(dfs) - - if where_clause is None: - where_clause = in_filter - else: - where_clause = BinaryOperation(op="AND", args=[where_clause, in_filter]) - - if limit is not None: - query.limit = Constant(limit) - - query.where = where_clause - - with self.connection.cursor() as cur: - sql = self.renderer.get_string(query, with_failback=True) - cur.execute(sql) - df = cur.fetchdf() - df["metadata"] = df["metadata"].apply(orjson.loads) - return df - - def _translate_filters(self, meta_filters): - if not meta_filters: - return None - - where_clause = None - for item in meta_filters: - parts = item.column.split(".") - key = Identifier(parts[0]) - - # converts 'col.el1.el2' to col->'el1'->>'el2' - if len(parts) > 1: - # intermediate elements - for el in parts[1:-1]: - key = BinaryOperation(op="->", args=[key, Constant(el)]) - - # last element - key = BinaryOperation(op="->>", args=[key, Constant(parts[-1])]) - - is_orig_id = item.column == "metadata._original_doc_id" - - type_cast = None - value = item.value - - if isinstance(value, list) and len(value) > 0 and item.op in (FilterOperator.IN, FilterOperator.NOT_IN): - if is_orig_id: - # convert to str - item.value = [str(i) for i in value] - value = item.value[0] - elif is_orig_id: - if not isinstance(value, str): - value = item.value = str(item.value) - - if isinstance(value, int): - type_cast = "int" - elif isinstance(value, float): - type_cast = "float" - - if type_cast is not None: - key = TypeCast(type_cast, key) - - if item.op in (FilterOperator.NOT_IN, FilterOperator.IN): - values = [Constant(i) for i in item.value] - value = AstTuple(values) - else: - value = Constant(item.value) - - condition = BinaryOperation(op=item.op.value, args=[key, value]) - - if where_clause is None: - where_clause = condition - else: - where_clause = BinaryOperation(op="AND", args=[where_clause, condition]) - return where_clause - - def delete(self, table_name: str, conditions: List[FilterCondition] = None) -> Response: + def delete(self, table_name: str, conditions: List[FilterCondition] = None): """Delete data from both DuckDB and Faiss.""" - with self.connection.cursor() as cur: - where_clause = self._translate_filters(conditions) - - query = Select(targets=[Identifier("faiss_id")], from_table=Identifier("meta_data"), where=where_clause) - cur.execute(self.renderer.get_string(query, with_failback=True)) - df = cur.fetchdf() - ids = list(df["faiss_id"]) - - self.faiss_index.delete_ids(ids) - - query = Delete(table=Identifier("meta_data"), where=where_clause) - cur.execute(self.renderer.get_string(query, with_failback=True)) - - self._sync() + with self.open_table(table_name) as table: + table.delete(conditions) def get_dimension(self, table_name: str) -> int: - if self.faiss_index: - return self.faiss_index.dim - - def _sync(self): - """Sync the database to disk if using persistent storage""" - self.faiss_index.dump() - if self._use_handler_storage: - self.handler_storage.folder_sync(self.persist_directory) - - def get_tables(self) -> Response: - """Get list of tables.""" - with self.connection.cursor() as cur: - df = cur.execute("show tables").fetchdf() - df = df.rename(columns={"name": "table_name"}) - - return Response(RESPONSE_TYPE.TABLE, data_frame=df) - - def check_connection(self) -> Response: - """Check the connection to the database.""" - try: - if not self.is_connected: - self.connect() - return StatusResponse(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f"Connection check failed: {e}") - return StatusResponse(RESPONSE_TYPE.ERROR, error_message=str(e)) - - def native_query(self, query: str) -> Response: - """Execute a native SQL query.""" - try: - with self.connection.cursor() as cur: - cur.execute(query) - result = cur.fetchdf() - return Response(RESPONSE_TYPE.TABLE, data_frame=result) - except Exception as e: - logger.error(f"Error executing native query: {e}") - return Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - def __del__(self): - """Cleanup on deletion.""" - if self.is_connected: - self._sync() - self.disconnect() + with self.open_table(table_name) as table: + return table.get_dimension() diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/duckdb_faiss_table.py b/mindsdb/integrations/handlers/duckdb_faiss_handler/duckdb_faiss_table.py new file mode 100644 index 00000000000..3ee59e93c01 --- /dev/null +++ b/mindsdb/integrations/handlers/duckdb_faiss_handler/duckdb_faiss_table.py @@ -0,0 +1,496 @@ +from pathlib import Path +from typing import List +import math + +import pandas as pd +import orjson +import duckdb +from mindsdb_sql_parser.ast import ( + Select, + Delete, + Identifier, + BinaryOperation, + Constant, + NullConstant, + Star, + Tuple as AstTuple, + Function, + TypeCast, +) + + +from mindsdb.integrations.libs.vectordatabase_handler import ( + FilterCondition, + FilterOperator, +) +from mindsdb.integrations.utilities.sql_utils import KeywordSearchArgs + +from mindsdb.utilities import log + +from .faiss_index import FaissIVFIndex + +logger = log.getLogger(__name__) + + +class DuckDBFaissTable: + META_BATCH_SIZE = 10_000 + VECTOR_MARGIN_K = 5 + VECTOR_GROWTH_MULTIPLIER = 5 + VECTOR_MAX_RATE = 0.25 + VECTOR_MAX_LIMIT = 1_000_000 + VECTOR_MAX_ITERATIONS = 3 + DEFAULT_LIMIT = 100 + + def __init__(self, table_name: str, table_dir: Path, handler): + self.table_name = table_name + self.handler = handler + self.connection: duckdb.DuckDBPyConnection | None = None + self.faiss_index: FaissIVFIndex | None = None + self.table_dir = table_dir + self.is_kw_index_enabled = False + self.cache_required = False + + def open(self) -> "DuckDBFaissTable": + duckdb_path = self.table_dir / "duckdb.db" + self.connection = duckdb.connect(str(duckdb_path)) + self.faiss_index = FaissIVFIndex(str(self.table_dir), self.handler.connection_data) + + self.cache_required = self.faiss_index.lock_required and self.faiss_index.get_size() > 100_000 + + # check keyword index + with self.connection.cursor() as cur: + # check index exists + df = cur.execute( + "SELECT * FROM information_schema.schemata WHERE schema_name = 'fts_main_meta_data'" + ).fetchdf() + if len(df) > 0: + self.is_kw_index_enabled = True + + return self + + def close(self) -> None: + self.faiss_index.close() + self.connection.close() + + @staticmethod + def _empty_result() -> pd.DataFrame: + return pd.DataFrame([], columns=["id", "content", "metadata", "distance"]) + + def _create_kw_index(self): + with self.connection.cursor() as cur: + cur.execute("PRAGMA create_fts_index('meta_data', 'id', 'content')") + self.is_kw_index_enabled = True + + def _drop_kw_index(self): + with self.connection.cursor() as cur: + cur.execute("pragma drop_fts_index('meta_data')") + self.is_kw_index_enabled = False + + def _sync(self, dump_faiss=True): + if dump_faiss: + self.faiss_index.dump() + + if self.handler._use_handler_storage: + self.handler.handler_storage.folder_sync(self.table_name) + + def create_index(self, type: str = None, nlist: int = None, train_count: int = None): + self.faiss_index.create_index(type, nlist=nlist, train_count=train_count) + # index was already saved. don't dump it twice + self._sync(dump_faiss=False) + + def insert(self, data: pd.DataFrame): + """Insert data into both DuckDB and Faiss.""" + + if self.is_kw_index_enabled: + # drop index, it will be created before a first keyword search + self._drop_kw_index() + + with self.connection.cursor() as cur: + df_ids = cur.execute(""" + insert into meta_data (id, content, metadata) ( + select id, content, metadata from data + ) + RETURNING faiss_id, id + """).fetchdf() + + data = data.merge(df_ids, on="id") + + vectors = data["embeddings"] + ids = data["faiss_id"] + + self.faiss_index.insert(list(vectors), list(ids)) + self._sync() + + def select( + self, + conditions: List[FilterCondition] = None, + offset: int = None, + limit: int = None, + ) -> pd.DataFrame: + """Select data with hybrid search logic.""" + + vector_filter = None + meta_filters = [] + if conditions is None: + conditions = [] + for condition in conditions: + if condition.column == "embeddings": + vector_filter = condition + else: + meta_filters.append(condition) + + if vector_filter is None: + # If only metadata in filter: + # query duckdb only + return self._select_from_metadata(meta_filters=meta_filters, limit=limit).drop("faiss_id", axis=1) + + # vector_filter is not None + if not meta_filters: + # If only content in filter: query faiss and attach to metadata + return self._select_with_vector(vector_filter=vector_filter, limit=limit) + + return self.mixed_search(vector_filter=vector_filter, meta_filters=meta_filters, limit=limit) + + def mixed_search(self, vector_filter, meta_filters, limit): + """ + 1. Measure selectivity of META_FILTERS: + Get predicted count of record after applying META_FILTERS using some of methods + Selectivity = count / total records + + 2. selectivity * total_recors > LIMIT / selectivity: + Use Vector-first search + Else: + Use Metadata-first search + """ + + if limit is None: + limit = self.DEFAULT_LIMIT + + total = self.faiss_index.get_size() + if total == 0 or limit == 0: + # no reason to do vector search + return self._empty_result() + + matched_count = self.get_metadata_search_count(meta_filters) + selectivity = matched_count / total + + # compare forecast count of affected records for vector and metadata search and choose what will take less + # do search even if selectivity is 0 because it might be approximate value in the future + if selectivity > 0 and selectivity * total > limit / selectivity: + df = self.vector_first_search(vector_filter, meta_filters, limit, selectivity) + else: + df = self.metadata_first_search(vector_filter, meta_filters, limit) + + return df[:limit] + + def get_metadata_search_count(self, meta_filters): + """ + Get count of records from duckdb with meta_filters + """ + + where_clause = self._translate_filters(meta_filters) + count_query = Select( + targets=[Function("count", args=[Star()], alias=Identifier("cnt"))], + from_table=Identifier("meta_data"), + where=where_clause, + ) + + with self.connection.cursor() as cur: + sql = self.handler.renderer.get_string(count_query, with_failback=True) + cur.execute(sql) + df = cur.fetchdf() + + return int(df["cnt"].iloc[0]) + + def vector_first_search(self, vector_filter, meta_filters, limit, selectivity): + """ + + Calculate required top results from faiss: it is predicted count of records, that required to be scanned + + Top_results = LIMIT / selectivity * VECTOR_MARGIN_K + + Circle: + Search Top_results vectors in faiss + Get ids + query duckdb with META_FILTERS and list of ids + If count of found records < LIMIT: + Increase Top_results = Top_results * VECTOR_GROWTH_MULTIPLIER to make next search iteration + If Top_results > total * VECTOR_MAX_RATE + or Top_results > VECTOR_MAX_LIMIT + or number of iteration >VECTOR_MAX_ITERATIONS: + Something went wrong, maybe META_FILTERS records has greater distance than average record + Break vector-first search and switch to metadata-first + If count of found records >= LIMIT: + Break and return results + """ + + total = self.faiss_index.get_size() + + top_results = math.ceil(limit / selectivity * self.VECTOR_MARGIN_K) + + for i in range(self.VECTOR_MAX_ITERATIONS): + df = self._select_with_vector(vector_filter=vector_filter, meta_filters=meta_filters, limit=top_results) + if len(df) >= limit: + # found required size of data + return df + + top_results = top_results * self.VECTOR_GROWTH_MULTIPLIER + + if top_results > total * self.VECTOR_MAX_RATE or top_results > self.VECTOR_MAX_LIMIT: + # give up with vector_first search + break + + # failback to metadata-first search + return self.metadata_first_search(vector_filter, meta_filters, limit) + + def metadata_first_search(self, vector_filter, meta_filters, limit): + """ + Metadata-first search + + Query list of all ids from duckdb table using META_FILTERS + + Split into batches by META_BATCH. + Per batch: + Get batch of ids + Use ID selector to search in FAISS only by batch of ids + use LIMIT + Combine results in single list alongside with distances + After all batches + get top LIMIT vectors with min distances + Get their ids and find records in duckdb table for them + """ + + embedding = vector_filter.value + if isinstance(embedding, str): + embedding = orjson.loads(embedding) + + where_clause = self._translate_filters(meta_filters) + ids_query = Select( + targets=[Identifier("faiss_id")], + from_table=Identifier("meta_data"), + where=where_clause, + ) + + with self.connection.cursor() as cur: + sql = self.handler.renderer.get_string(ids_query, with_failback=True) + meta_df = cur.execute(sql).fetchdf() + + if meta_df.empty: + return self._empty_result() + + faiss_ids = meta_df["faiss_id"].tolist() + results = [] + for start in range(0, len(faiss_ids), self.META_BATCH_SIZE): + batch_ids = faiss_ids[start : start + self.META_BATCH_SIZE] + + distances, faiss_ids_found = self.faiss_index.search(embedding, limit, allowed_ids=batch_ids) + results.extend(zip(distances, faiss_ids_found)) + + results.sort(key=lambda x: x[0]) + + results = results[:limit] + if len(results) == 0: + raise RuntimeError("Something went wrong, faiss database didn't return results") + distances, faiss_ids = zip(*results) + + meta_df = self._select_from_metadata(faiss_ids=faiss_ids, meta_filters=meta_filters) + vector_df = pd.DataFrame({"faiss_id": faiss_ids, "distance": distances}) + return vector_df.merge(meta_df, on="faiss_id").drop("faiss_id", axis=1).sort_values(by="distance") + + def keyword_select( + self, + conditions: List[FilterCondition] = None, + offset: int = None, + limit: int = None, + keyword_search_args: KeywordSearchArgs = None, + ) -> pd.DataFrame: + if not self.is_kw_index_enabled: + # keyword search is used for first time: create index + self._create_kw_index() + + with self.connection.cursor() as cur: + where_clause = self._translate_filters(conditions) + + score = Function( + namespace="fts_main_meta_data", + op="match_bm25", + args=[ + Identifier("id"), + Constant(keyword_search_args.query), + BinaryOperation(op=":=", args=[Identifier("fields"), Constant(keyword_search_args.column)]), + ], + ) + + no_emtpy_score = BinaryOperation(op="is not", args=[score, NullConstant()]) + if where_clause: + where_clause = BinaryOperation(op="and", args=[where_clause, no_emtpy_score]) + else: + where_clause = no_emtpy_score + + query = Select( + targets=[Star(), BinaryOperation(op="-", args=[Constant(1), score], alias=Identifier("distance"))], + from_table=Identifier("meta_data"), + where=where_clause, + ) + + if limit is not None: + query.limit = Constant(limit) + + if offset is not None: + query.offset = Constant(offset) + + sql = self.handler.renderer.get_string(query, with_failback=True) + cur.execute(sql) + df = cur.fetchdf() + df["metadata"] = df["metadata"].apply(orjson.loads) + return df + + def delete(self, conditions: List[FilterCondition] = None): + """Delete data from both DuckDB and Faiss.""" + with self.connection.cursor() as cur: + where_clause = self._translate_filters(conditions) + + query = Select(targets=[Identifier("faiss_id")], from_table=Identifier("meta_data"), where=where_clause) + cur.execute(self.handler.renderer.get_string(query, with_failback=True)) + df = cur.fetchdf() + ids = list(df["faiss_id"]) + + self.faiss_index.delete_ids(ids) + + query = Delete(table=Identifier("meta_data"), where=where_clause) + cur.execute(self.handler.renderer.get_string(query, with_failback=True)) + + self._sync() + + def get_dimension(self) -> int: + if self.faiss_index and self.faiss_index.index is not None: + return self.faiss_index.dim + + def get_total_size(self): + with self.connection.cursor() as cur: + cur.execute("select count(1) size from meta_data") + df = cur.fetchdf() + return df["size"].iloc[0] + + def _select_with_vector(self, vector_filter: FilterCondition, meta_filters=None, limit=None) -> pd.DataFrame: + embedding = vector_filter.value + if isinstance(embedding, str): + embedding = orjson.loads(embedding) + + distances, faiss_ids = self.faiss_index.search(embedding, limit or self.DEFAULT_LIMIT) + + # Fetch full data from DuckDB + if len(faiss_ids) > 0: + # ids = [str(idx) for idx in faiss_ids] + meta_df = self._select_from_metadata(faiss_ids=faiss_ids, meta_filters=meta_filters) + vector_df = pd.DataFrame({"faiss_id": faiss_ids, "distance": distances}) + return vector_df.merge(meta_df, on="faiss_id").drop("faiss_id", axis=1).sort_values(by="distance") + + return self._empty_result() + + def _select_from_metadata(self, faiss_ids=None, meta_filters=None, limit=None): + query = Select( + targets=[Star()], + from_table=Identifier("meta_data"), + ) + + where_clause = self._translate_filters(meta_filters) + + if faiss_ids: + # TODO what if ids list is too long - split search into batches + in_filter = BinaryOperation( + op="IN", args=[Identifier("faiss_id"), AstTuple([Constant(i) for i in faiss_ids])] + ) + # split into chunks + chunk_size = 10000 + if len(faiss_ids) > chunk_size: + dfs = [] + chunk = 0 + total = 0 + while chunk * chunk_size < len(faiss_ids): + # create results with partition + ids = faiss_ids[chunk * chunk_size : (chunk + 1) * chunk_size] + chunk += 1 + df = self._select_from_metadata(faiss_ids=ids, meta_filters=meta_filters, limit=limit) + total += len(df) + if limit is not None and limit <= total: + # cut the extra from the end + df = df[: -(total - limit)] + dfs.append(df) + break + if len(df) > 0: + dfs.append(df) + if len(dfs) == 0: + return pd.DataFrame([], columns=["faiss_id", "id", "content", "metadata"]) + return pd.concat(dfs) + + if where_clause is None: + where_clause = in_filter + else: + where_clause = BinaryOperation(op="AND", args=[where_clause, in_filter]) + + if limit is not None: + query.limit = Constant(limit) + + query.where = where_clause + + with self.connection.cursor() as cur: + sql = self.handler.renderer.get_string(query, with_failback=True) + cur.execute(sql) + df = cur.fetchdf() + df["metadata"] = df["metadata"].apply(orjson.loads) + return df + + def _translate_filters(self, meta_filters): + if not meta_filters: + return None + + where_clause = None + for item in meta_filters: + parts = item.column.split(".") + key = Identifier(parts[0]) + + # converts 'col.el1.el2' to col->'el1'->>'el2' + if len(parts) > 1: + # intermediate elements + for el in parts[1:-1]: + key = BinaryOperation(op="->", args=[key, Constant(el)]) + + # last element + key = BinaryOperation(op="->>", args=[key, Constant(parts[-1])]) + + is_orig_id = item.column == "metadata._original_doc_id" + + type_cast = None + value = item.value + + if isinstance(value, list) and len(value) > 0 and item.op in (FilterOperator.IN, FilterOperator.NOT_IN): + if is_orig_id: + # convert to str + item.value = [str(i) for i in value] + value = item.value[0] + elif is_orig_id: + if not isinstance(value, str): + value = item.value = str(item.value) + + if isinstance(value, int): + type_cast = "int" + elif isinstance(value, float): + type_cast = "float" + + if type_cast is not None: + key = TypeCast(type_cast, key) + + if item.op in (FilterOperator.NOT_IN, FilterOperator.IN): + values = [Constant(i) for i in item.value] + value = AstTuple(values) + else: + value = Constant(item.value) + + condition = BinaryOperation(op=item.op.value, args=[key, value]) + + if where_clause is None: + where_clause = condition + else: + where_clause = BinaryOperation(op="AND", args=[where_clause, condition]) + return where_clause diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/faiss_index.py b/mindsdb/integrations/handlers/duckdb_faiss_handler/faiss_index.py index 8aef1808004..b276ebd00ed 100644 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/faiss_index.py +++ b/mindsdb/integrations/handlers/duckdb_faiss_handler/faiss_index.py @@ -1,15 +1,24 @@ import os -from typing import Iterable, List +from typing import Iterable, List, Callable, Optional import numpy as np import psutil from pathlib import Path -import portalocker +try: + import fcntl +except ImportError: + fcntl = None import faiss # faiss or faiss-gpu + +from mindsdb.utilities import log + from pydantic import BaseModel +logger = log.getLogger(__name__) + + def _normalize_rows(x: np.ndarray) -> np.ndarray: norms = np.linalg.norm(x, axis=1, keepdims=True) + 1e-12 return x / norms @@ -19,9 +28,55 @@ class FaissParams(BaseModel): metric: str | None = "cosine" use_gpu: bool | None = False nlist: int | None = 1024 - nprobe: int | None = 32 - hnsw_m: int | None = 32 - hnsw_ef_search: int | None = 64 + nprobe: int | None = None + + +def merge_ondisk(trained_index: faiss.Index, shard_fnames: List[str], ivfdata_fname: str, shift_ids=False) -> None: + """ + Modified version of faiss.contrib.ondisk.merge_ondisk. Prevents leaving orphan memory mapped shard files + + Add the contents of the indexes stored in shard_fnames into the index trained_index. + The on-disk data is stored in ivfdata_fname + """ + assert not isinstance(trained_index, faiss.IndexIVFPQR), "IndexIVFPQR is not supported as an on disk index." + # merge the images into an on-disk index + # first load the inverted lists + ivfs = [] + indexes = [] + + for fname in shard_fnames: + # the IO_FLAG_MMAP is to avoid actually loading the data + # thus the total size of the inverted lists can exceed the available RAM + logger.info("read " + fname) + index = faiss.read_index(fname, faiss.IO_FLAG_MMAP) + index_ivf = faiss.extract_index_ivf(index) + ivfs.append(index_ivf.invlists) + + indexes.append(index) + + # construct the output index + index = trained_index + index_ivf = faiss.extract_index_ivf(index) + + assert index.ntotal == 0, "works only on empty index" + + # prepare the output inverted lists. They will be written to merged_index.ivfdata + invlists = faiss.OnDiskInvertedLists(index_ivf.nlist, index_ivf.code_size, ivfdata_fname) + + # merge all the inverted lists + ivf_vector = faiss.InvertedListsPtrVector() + for ivf in ivfs: + ivf_vector.push_back(ivf) + + logger.info("merge %d inverted lists " % ivf_vector.size()) + ntotal = invlists.merge_from_multiple(ivf_vector.data(), ivf_vector.size(), shift_ids) + + # now replace the inverted lists in the output index + index.ntotal = index_ivf.ntotal = ntotal + index_ivf.replace_invlists(invlists, True) + invlists.this.disown() + + del indexes class FaissIndex: @@ -43,7 +98,7 @@ def __init__(self, path: str, config: dict): else: raise ValueError(f"Unknown metric: {metric}") - self.path = path + self.path = os.path.join(path, "faiss_index") self._since_ram_checked = 0 @@ -51,15 +106,27 @@ def __init__(self, path: str, config: dict): self.index_type = "flat" self.dim = None self.index_fd = None + self.lock_required = True + + recover_path = Path(self.path).parent / "recover" + if recover_path.exists(): + # move all files from recover dir that might be left after index failing + for item in recover_path.iterdir(): + if item.is_dir(): + continue + item.rename(Path(self.path).parent / item.name) + if os.path.exists(self.path): self._load_index() def _lock_index(self): - if os.name != "nt": + if not self.lock_required: + return + if os.name != "nt" and fcntl: self.index_fd = open(self.path, "rb") try: - portalocker.lock(self.index_fd, portalocker.LOCK_EX | portalocker.LOCK_NB) - except portalocker.exceptions.AlreadyLocked: + fcntl.flock(self.index_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except OSError: raise ValueError(f"Index is already used: {self.path}") def _load_index(self): @@ -72,22 +139,37 @@ def _load_index(self): available_ram = psutil.virtual_memory().available if required_ram > _1gb and available_ram < required_ram: to_free_gb = round((required_ram - available_ram) / _1gb, 2) - raise ValueError(f"Unable load FAISS index into RAM, free up al least : {to_free_gb} Gb") + raise ValueError(f"Unable load FAISS index into RAM, free up at least : {to_free_gb} Gb") + + # check ivf_file before loading index and locking it + index_merged = Path(self.path).parent / "faiss_index_merged" + if index_merged.exists(): + self.lock_required = False self._lock_index() self.index = faiss.read_index(self.path) self.dim = self.index.d - sub_index = faiss.downcast_index(self.index.index) - if isinstance(sub_index, faiss.IndexIVFFlat): - self.index_type = "ivf" + index = self.index + if hasattr(index, "index"): + index = faiss.downcast_index(index.index) + if isinstance(index, faiss.IndexIVFFlat): + if index_merged.exists(): + self.index_type = "ivf_file" + else: + self.index_type = "ivf" + if self.config.nprobe is not None: + self.index.nprobe = self.config.nprobe def close(self): if self.index_fd is not None: self.index_fd.close() self.index = None + def __del__(self): + self.close() + def _build_flat_index(self): # TODO option to create hnsw @@ -116,6 +198,9 @@ def _check_ram_usage(self, count_vectors, index_type: str = "flat", m=32, nlist= required = (self.dim * 4 + m * 2 * 4) * count_vectors case "ivf": required = (self.dim * 4 + 8) * count_vectors + self.dim * 4 * nlist + case "ivf_file": + # don't restrict for IVF file + required = 0 case _: raise ValueError(f"Unknown index type: {index_type}") @@ -131,7 +216,7 @@ def _check_ram_usage(self, count_vectors, index_type: str = "flat", m=32, nlist= def insert( self, vectors: Iterable[Iterable[float]], - ids: Iterable[float], + ids: Iterable[int], ) -> None: if len(vectors) == 0: return @@ -170,14 +255,18 @@ def dump(self): def drop(self): self.close() - if os.path.exists(self.path): - os.remove(self.path) + + # remove index files (everything except duckdb) + for item in Path(self.path).parent.iterdir(): + if item.is_dir() or item.name.startswith("duckdb."): + continue + item.unlink() def search( self, - query: Iterable[Iterable[float]], + query: Iterable[float], limit: int = 10, - # allowed_ids: Optional[Sequence[int]] = None, + allowed_ids: Optional[Iterable[int]] = None, ): if self.index is None: return [], [] @@ -187,7 +276,16 @@ def search( if self._normalize_vectors: queries = _normalize_rows(queries) - ds, ids = self.index.search(queries, limit) + params = None + if allowed_ids is not None: + allowed_ids_array = np.asarray(list(allowed_ids), dtype=np.int64) + ids_selector = faiss.IDSelectorArray( + len(allowed_ids_array), + faiss.swig_ptr(allowed_ids_array), + ) + params = faiss.IVFSearchParameters(sel=ids_selector) + + ds, ids = self.index.search(queries, limit, params=params) list_id = [i for i in ids[0] if i != -1] list_distances = [1 - d for d in ds[0][: len(list_id)]] @@ -196,31 +294,70 @@ def search( class FaissIVFIndex(FaissIndex): - def _dump_vectors(self, index, path, batch_size: int = 10000): + def _dump_vectors(self, index, path: Path, batch_size: int = 30000): """ - Save vectors from a Faiss IndexIDMap to disk in batches using numpy memmap. + Extract and dump vectors and ids from index. Method is dependent on index type + """ + + if hasattr(index, "id_map"): + ids = faiss.vector_to_array(index.id_map).astype(np.int64, copy=False) + inner = index.index + + def get_batch_vectors(start, size): + return inner.reconstruct_n(start, size).astype(np.float32, copy=False) + + return self._dump_vectors_to_file(ids, path, index.ntotal, batch_size, get_batch_vectors) + else: + invlists = index.invlists + + index.set_direct_map_type(faiss.DirectMap.Hashtable) - - Writes the one memmap for ids and batches for vectors + ids_list = [] + for list_no in range(index.nlist): + list_size = invlists.list_size(list_no) + if list_size == 0: + continue - :param index: Faiss IndexIDMap - :param path: Output directory where batch files will be written - :param batch_size: Number of vectors per batch file + # Get IDs stored in this inverted list + id_array = faiss.rev_swig_ptr(invlists.get_ids(list_no), list_size) + ids_list.append(id_array) + + ids = np.hstack(ids_list).astype(np.int64) + + # to train index first batches will be used. shuffle ids to prevent using the same lists + # TODO shuffle only part of data? + np.random.shuffle(ids) + + def get_batch_vectors(start, size): + ids_batch = ids[start : start + size] + return index.reconstruct_batch(ids_batch).astype(np.float32, copy=False) + + return self._dump_vectors_to_file(ids, path, index.ntotal, batch_size, get_batch_vectors) + + def _dump_vectors_to_file( + self, + ids: np.ndarray, + path: Path, + ntotal: int, + batch_size: int, + get_batch_content: Callable[[int, int], np.ndarray], + ) -> int: """ - if not hasattr(index, "id_map") or not hasattr(index, "index"): - raise ValueError("Expected a Faiss IndexIDMap-like object with 'id_map' and 'index' attributes") + Write ids and vectors to memmap files in batches. - ntotal = index.ntotal + :param ids: vector IDs in the same order as vectors will be dumped. + :param path: directory to store dumps. + :param ntotal: total number of vectors. + :param batch_size: number of vectors per batch file. + :param get_batch_content: function to get a batch content - ids = faiss.vector_to_array(index.id_map).astype(np.int64, copy=False) + """ # Write all ids once to a single memmap file ids_path = path / "ids.mmap" mmap_ids = np.memmap(ids_path, dtype=np.int64, mode="w+", shape=(ntotal,)) mmap_ids[:] = ids - del mmap_ids # flush - - inner = index.index batch_num = 0 while True: @@ -233,8 +370,7 @@ def _dump_vectors(self, index, path, batch_size: int = 10000): ntotal -= size batch_num += 1 - # Reconstruct a contiguous block when possible - vecs = inner.reconstruct_n(start, size).astype(np.float32, copy=False) + vecs = get_batch_content(start, size) vecs_path = path / f"batch_{batch_num:05d}_vecs.mmap" @@ -244,40 +380,18 @@ def _dump_vectors(self, index, path, batch_size: int = 10000): mmap_vecs.flush() del mmap_vecs + del mmap_ids return batch_num - def _create_ifv_index_from_dump(self, path, train_count=10000, nlist=1024): - """ - Build an IVF index (wrapped in IndexIDMap) from memmap batches - - Reads a single `ids.mmap` and multiple `batch_{i}_vecs.mmap` files from `path`. - - Accumulates up to `train_count` vectors to train the IVF quantizer. - - Creates IndexIVFFlat and adds all vectors with their ids to it. - - :param path: Directory containing memmap files - :param train_count: Number of vectors to use for training - :param nlist: number of clusters for IVF - """ - - # Load ids - ids_path = path / "ids.mmap" - if not os.path.exists(ids_path): - raise FileNotFoundError(f"Missing ids memmap: {ids_path}") - - ids = np.fromfile(ids_path, dtype="int64") - - # Collect vector batch files and sort by batch index - vec_files = [f for f in os.listdir(path) if f.startswith("batch_")] - if not vec_files: - raise FileNotFoundError(f"No vector batch memmaps found in {path}") - - vec_files.sort() - + def _train_ivf(self, dump_path, train_count, nlist): # Accumulate training data up to train_count train_left = train_count train_chunks = [] + vec_files = self._get_dump_vector_files(dump_path) + for fname in vec_files: - fpath = path / fname + fpath = dump_path / fname batch_data = np.fromfile(fpath, dtype="float32") rows = int(batch_data.shape[0] / self.dim) @@ -288,20 +402,46 @@ def _create_ifv_index_from_dump(self, path, train_count=10000, nlist=1024): break train_data = np.vstack(train_chunks) - - # nlist can't be less than train data - nlist = min(nlist, len(train_data)) + train_data = train_data[:train_count, :] quantizer = faiss.IndexFlat(self.dim, self.metric) ivf = faiss.IndexIVFFlat(quantizer, self.dim, nlist, self.metric) ivf.train(train_data) - ivf_id_map = faiss.IndexIDMap(ivf) + return ivf + + def _get_dump_vector_files(self, dump_path): + # Collect vector batch files and sort by batch index + vec_files = [f for f in os.listdir(dump_path) if f.startswith("batch_")] + if not vec_files: + raise FileNotFoundError(f"No vector batch memmaps found in {dump_path}") + + vec_files.sort() + return vec_files + + def _create_ivf_index(self, dump_path, train_count, nlist): + """ + Build an in-memory IVF index + + :param dump_path: Directory containing memmap files + :param train_count: Number of vectors to use for training + :param nlist: number of clusters for IVF + """ + + # Load ids + ids_path = dump_path / "ids.mmap" + if not os.path.exists(ids_path): + raise FileNotFoundError(f"Missing ids memmap: {ids_path}") + ids = np.fromfile(ids_path, dtype="int64") + + ivf = self._train_ivf(dump_path, nlist=nlist, train_count=train_count) + + vec_files = self._get_dump_vector_files(dump_path) # load data start = 0 for fname in vec_files: - fpath = path / fname + fpath = dump_path / fname batch_data = np.fromfile(fpath, dtype="float32") rows = int(batch_data.shape[0] / self.dim) @@ -309,24 +449,140 @@ def _create_ifv_index_from_dump(self, path, train_count=10000, nlist=1024): batch_vectors = batch_data.reshape([rows, self.dim]) ids_batch = np.asarray(ids[start : start + rows]) - ivf_id_map.add_with_ids(batch_vectors, ids_batch) + ivf.add_with_ids(batch_vectors, ids_batch) start += rows - return ivf_id_map + # remove dumps + for item in dump_path.iterdir(): + item.unlink() + + return ivf + + def _create_ivf_file_index(self, dump_path, train_count, nlist): + """Build an IVF on disk index""" + + index_path = dump_path.parent + trained_index = self._train_ivf(dump_path, train_count=train_count, nlist=nlist) + # store trained index + trained_path = str(index_path / "faiss_index.trained") + faiss.write_index(trained_index, trained_path) + + ids_path = dump_path / "ids.mmap" + if not os.path.exists(ids_path): + raise FileNotFoundError(f"Missing ids memmap: {ids_path}") + ids = np.fromfile(ids_path, dtype="int64") + + vec_files = self._get_dump_vector_files(dump_path) + + start = 0 + block_fnames = [] + for num, fname in enumerate(vec_files): + index = faiss.read_index(trained_path) + fpath = dump_path / fname + + batch_data = np.fromfile(fpath, dtype="float32") + rows = int(batch_data.shape[0] / self.dim) + + batch_vectors = batch_data.reshape([rows, self.dim]) + + ids_batch = np.asarray(ids[start : start + rows]) + index.add_with_ids(batch_vectors, ids_batch) + block_fname = str(index_path / f"faiss_index_block.{num}") + block_fnames.append(block_fname) + faiss.write_index(index, block_fname) + start += rows + + # remove dumps + for item in dump_path.iterdir(): + item.unlink() + + index = faiss.read_index(trained_path) + + merge_ondisk(index, block_fnames, str(index_path / "faiss_index_merged")) + os.unlink(trained_path) + for block_fname in block_fnames: + os.unlink(block_fname) + + return index + + def get_size(self): + if self.index is None: + return 0 + else: + return self.index.ntotal + + def check_required_disk_space(self, index_type): + base_path = Path(self.path).parent + available = psutil.disk_usage(str(base_path)).free + + # current size of index + index_size = 0 + for item in base_path.iterdir(): + if item.is_dir() or not item.name.startswith("faiss_index"): + continue + index_size += item.stat().st_size + + # k - how more space required than current index size + if index_type == "ivf_file": + # recovery + dump + shard files + k = 3.01 + else: + # recovery + dump + k = 2.01 + + # k-1 because the current index space will be reused + if available < index_size * (k - 1): + to_free_gb = round((index_size * (k - 1)) / 1024**3, 2) + raise ValueError(f"Unable run indexing FAISS not enough disk space, get free at least : {to_free_gb} Gb") + + def create_index(self, index_type=None, nlist=None, train_count=None): + """ + Create or recreate IVF index + + :param index_type: options are: 'ivf' (in RAM) or 'ivf_file' (on disk) + :param nlist: number of inverted lists + :param train_count: count of vectors to use for training. + + """ + + if index_type is None: + if os.name == "nt": + index_type = "ivf" + else: + index_type = "ivf_file" + + elif index_type not in ("ivf", "ivf_file"): + raise NotImplementedError("Only ivf or ivf_file indexes are supported") + + if index_type == "ivf_file" and os.name == "nt": + raise ValueError("'ivf_file' index is not supported on Windows. Try to use 'ivf' instead") - def create_index(self, nlist=1024, train_count=10000): # index might not fit into RAM, extract data to files - dump_path = Path(self.path).parent / "dump" + base_path = Path(self.path).parent + dump_path = base_path / "dump" # if self.index_type != 'flat': # raise ValueError('Index was already created') - if self.index is None: - ntotal = 0 + # check params, apply defaults + if nlist is None: + nlist = self.config.nlist + + ntotal = self.get_size() + + # faiss shows warning if train count is less than 39 * nlist and recommend to use at least this size for train data + nlist_k = 39 + if train_count is not None: + if train_count < nlist * nlist_k: + raise ValueError(f"Train_count can't be less than nlist * {nlist_k} (is {nlist * nlist_k})") else: - ntotal = self.index.ntotal - if nlist > ntotal: - raise ValueError(f"Not enough data to create: {ntotal}, required at lease {nlist} records") + # get 10k if possible but not less than nlist * k + train_count = max(nlist * nlist_k, min(ntotal, 10000)) + + if train_count > ntotal: + raise ValueError(f"Not enough data to create index: {ntotal}, at least {train_count} records are required") + + self.check_required_disk_space(index_type) dump_path.mkdir(exist_ok=True) @@ -339,14 +595,33 @@ def create_index(self, nlist=1024, train_count=10000): # unload flat index from RAM self.close() + # buckup index files + recover_path = base_path / "recover" + recover_path.mkdir(exist_ok=True) + for item in base_path.iterdir(): + if item.is_dir() or item.name.startswith("duckdb."): + continue + item.rename(recover_path / item.name) + # create ivf index - ivf_index = self._create_ifv_index_from_dump(dump_path, train_count=train_count, nlist=nlist) + if index_type == "ivf": + ivf_index = self._create_ivf_index(dump_path, train_count=train_count, nlist=nlist) + self.lock_required = True + + elif index_type == "ivf_file": + ivf_index = self._create_ivf_file_index(dump_path, train_count=train_count, nlist=nlist) + self.lock_required = False + else: + raise ValueError(f"Unknown index type: {index_type}") self.index = ivf_index - self.index_type = "ivf" + self.index_type = index_type self.dump() self._lock_index() - # remove unused items - for item in dump_path.iterdir(): + # remove unused files + dump_path.rmdir() + + for item in recover_path.iterdir(): item.unlink() + recover_path.rmdir() diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/requirements.txt b/mindsdb/integrations/handlers/duckdb_faiss_handler/requirements.txt index 8a1860f26b2..3dd4dc56e15 100644 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/requirements.txt +++ b/mindsdb/integrations/handlers/duckdb_faiss_handler/requirements.txt @@ -1,2 +1 @@ -faiss-cpu>=1.7.4 -portalocker +faiss-cpu==1.13.2 diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/test_faiss_handler.py b/mindsdb/integrations/handlers/duckdb_faiss_handler/test_faiss_handler.py index 915d89f64ab..6a2711cfbcb 100644 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/test_faiss_handler.py +++ b/mindsdb/integrations/handlers/duckdb_faiss_handler/test_faiss_handler.py @@ -1,11 +1,12 @@ +import pytest from unittest.mock import patch import pandas as pd -from tests.unit.executor.test_knowledge_base import TestKB as BaseTestKB, set_litellm_embedding +from tests.unit.executor.test_knowledge_base import TestKB, set_embedding -class TestFAISS(BaseTestKB): +class TestFAISS(TestKB): "Run unit tests using FAISS handler as storage" def _get_storage_table(self, kb_name): @@ -30,9 +31,16 @@ def _get_storage_table(self, kb_name): return f"faiss_{kb_name}.kb_faiss" - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_ivf_index(self, mock_litellm_embedding): - set_litellm_embedding(mock_litellm_embedding) + @pytest.mark.parametrize("index_type", ["ivf", "ivf_file"]) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_ivf_index(self, mock_embedding, index_type): + """ + Run test two times: + - make ivf index and then reindex to ivf_file + - make ivf_file index and then reindex to ivf + """ + + set_embedding(mock_embedding) df = self._get_ral_table() @@ -51,20 +59,24 @@ def test_ivf_index(self, mock_litellm_embedding): """ ) - self.run_sql("CREATE INDEX ON KNOWLEDGE_BASE kb_ral WITH (nlist=10)") + for i in range(2): + self.run_sql(f"CREATE INDEX ON KNOWLEDGE_BASE kb_ral WITH (nlist=10, type='{index_type}')") + + # search works + ret = self.run_sql("select * from kb_ral where k.content = 'white' limit 1") + assert "white" in ret["chunk_content"][0] - # search works - ret = self.run_sql("select * from kb_ral where k.content = 'white' limit 1") - assert "white" in ret["chunk_content"][0] + # -- test insert -- + self.run_sql("insert into kb_ral (id, english) values (10000, 'magpie')") + # search + ret = self.run_sql("select * from kb_ral where k.content = 'magpie' limit 1") + assert "magpie" in ret["chunk_content"][0] - # -- test insert -- - self.run_sql("insert into kb_ral (id, english) values (10000, 'magpie')") - # search - ret = self.run_sql("select * from kb_ral where k.content = 'magpie' limit 1") - assert "magpie" in ret["chunk_content"][0] + # -- test delete -- + self.run_sql("delete from kb_ral where id=10000") + # search + ret = self.run_sql("select * from kb_ral where k.content = 'magpie' limit 1") + assert len(ret) == 0 or "magpie" not in ret["chunk_content"][0] - # -- test delete -- - self.run_sql("delete from kb_ral where id=10000") - # search - ret = self.run_sql("select * from kb_ral where k.content = 'magpie' limit 1") - assert len(ret) == 0 or "magpie" not in ret["chunk_content"][0] + # toggle index type + index_type = "ivf_file" if index_type == "ivf" else "ivf" diff --git a/mindsdb/integrations/handlers/duckdb_handler/README.md b/mindsdb/integrations/handlers/duckdb_handler/README.md index 54c1040a42c..5fa9125b940 100644 --- a/mindsdb/integrations/handlers/duckdb_handler/README.md +++ b/mindsdb/integrations/handlers/duckdb_handler/README.md @@ -1,41 +1,62 @@ -# DuckDB Handler +# DuckDB Handler This is the implementation of the DuckDB handler for MindsDB. ## DuckDB DuckDB is an open-source analytical database system. DuckDB is designed for fast execution of analytical queries. -There are no external dependencies and the DBMS runs completly embedded within a host process, similar to SQLite. +There are no external dependencies, and the DBMS runs completely embedded within a host process, similar to SQLite. DuckDB provides a rich SQL dialect with support for complex queries with transactional guarantees (ACID). -## Implementation -This handler was implemented using the `duckdb` python client library. +## Implementation +This handler was implemented using the `duckdb` Python client library. ### DuckDB version -The DuckDB handler is currently using the `0.7.1.dev187` pre-relase version of the python client library. In case of issues, make sure your DuckDB database is compatible with this version. See the DuckDB handler [requirements.txt](requirements.txt) for details. - +The DuckDB handler is currently using the `1.1.3` release version of the Python client library. In case of issues, make sure your DuckDB or MotherDuck database is compatible with this version. See the DuckDB handler [requirements.txt](requirements.txt) for details. The required arguments to establish a connection are: -* `database`: the name of the DuckDB database file. May also be set to `:memory:`, which will create an in-memory database. +* `database`: the name of the DuckDB or MotherDuck database file. + - Set to `:memory:` to create an in-memory database. + - For MotherDuck, specify the database and motherduck_token. -The optional arguments are: +Additional optional arguments include: +* `motherduck_token`: a token to authenticate with MotherDuck. * `read_only`: a flag that specifies if the connection should be made in read-only mode. -This is required if multiple processes want to access the same database file at the same time. - + - This is required if multiple processes want to access the same database file simultaneously. ## Usage -In order to make use of this handler and connect to a DuckDB database in MindsDB, the following syntax can be used: +To connect to a DuckDB or MotherDuck database in MindsDB, the following syntax can be used: +### DuckDB Example ```sql CREATE DATABASE duckdb_datasource WITH engine='duckdb', parameters={ - "database":"db.duckdb" + "database": "db.duckdb" }; ``` -Now, you can use this established connection to query your database as follows: +### MotherDuck Example +```sql +CREATE DATABASE md_datasource +WITH +engine='duckdb', +parameters={ + "database": "sample_data", + "motherduck_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9." +}; +``` + +Once the connection is established, you can query the database: + ```sql SELECT * FROM duckdb_datasource.my_table; -``` \ No newline at end of file +``` + +For MotherDuck: +```sql +SELECT * FROM md_datasource.movies; +``` + +By leveraging these features, MindsDB provides powerful integrations with DuckDB and MotherDuck for scalable analytics. \ No newline at end of file diff --git a/mindsdb/integrations/handlers/duckdb_handler/__init__.py b/mindsdb/integrations/handlers/duckdb_handler/__init__.py index b7bb0e1a03d..ca5fdcd9218 100644 --- a/mindsdb/integrations/handlers/duckdb_handler/__init__.py +++ b/mindsdb/integrations/handlers/duckdb_handler/__init__.py @@ -1,28 +1,32 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE +from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL, HANDLER_TYPE from .__about__ import __version__ as version, __description__ as description from .connection_args import connection_args, connection_args_example + try: from .duckdb_handler import DuckDBHandler as Handler + import_error = None except Exception as e: Handler = None import_error = e -title = 'DuckDB' -name = 'duckdb' +title = "DuckDB" +name = "duckdb" type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' +icon_path = "icon.svg" +support_level = HANDLER_SUPPORT_LEVEL.MINDSDB __all__ = [ - 'Handler', - 'version', - 'name', - 'type', - 'title', - 'description', - 'connection_args', - 'connection_args_example', - 'import_error', - 'icon_path', + "Handler", + "version", + "name", + "type", + "title", + "description", + "connection_args", + "connection_args_example", + "import_error", + "support_level", + "icon_path", ] diff --git a/mindsdb/integrations/handlers/duckdb_handler/connection_args.py b/mindsdb/integrations/handlers/duckdb_handler/connection_args.py index e5a372f9e88..4d9591e5eb6 100644 --- a/mindsdb/integrations/handlers/duckdb_handler/connection_args.py +++ b/mindsdb/integrations/handlers/duckdb_handler/connection_args.py @@ -2,16 +2,26 @@ from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - connection_args = OrderedDict( database={ - 'type': ARG_TYPE.STR, - 'description': 'The database file to read and write from. The special value :memory: (default) can be used to create an in-memory database.', + "type": ARG_TYPE.STR, + "description": ( + "The database file to read and write from. The special value :memory: (default) " + "can be used to create an in-memory database." + ), + }, + motherduck_token={ + "type": ARG_TYPE.STR, + "description": "Motherduck access token if want to connect motherduck database.", }, read_only={ - 'type': ARG_TYPE.BOOL, - 'description': 'A flag that specifies if the connection should be made in read-only mode.', + "type": ARG_TYPE.BOOL, + "description": ("A flag that specifies if the connection should be made in read-only mode."), }, ) -connection_args_example = OrderedDict(database='db.duckdb', read_only=True) +connection_args_example = OrderedDict( + database="sample_data", + read_only=True, + motherduck_token="ey...enKoT.SsEcCa......", +) diff --git a/mindsdb/integrations/handlers/duckdb_handler/duckdb_handler.py b/mindsdb/integrations/handlers/duckdb_handler/duckdb_handler.py index 7ae5423859c..bc407ef0575 100644 --- a/mindsdb/integrations/handlers/duckdb_handler/duckdb_handler.py +++ b/mindsdb/integrations/handlers/duckdb_handler/duckdb_handler.py @@ -19,14 +19,14 @@ class DuckDBHandler(DatabaseHandler): """This handler handles connection and execution of the DuckDB statements.""" - name = 'duckdb' + name = "duckdb" def __init__(self, name: str, **kwargs): super().__init__(name) self.parser = parse_sql - self.dialect = 'postgresql' - self.connection_data = kwargs.get('connection_data') - self.renderer = SqlalchemyRender('postgres') + self.dialect = "postgresql" + self.connection_data = kwargs.get("connection_data") + self.renderer = SqlalchemyRender("postgres") self.connection = None self.is_connected = False @@ -44,10 +44,17 @@ def connect(self) -> DuckDBPyConnection: if self.is_connected is True: return self.connection + motherduck_token = self.connection_data.get("motherduck_token") + if motherduck_token: + database = ( + f"md:{self.connection_data.get('database')}?motherduck_token={motherduck_token}&attach_mode=single" + ) + else: + database = self.connection_data.get("database") args = { - 'database': self.connection_data.get('database'), - 'read_only': self.connection_data.get('read_only'), + "database": database, + "read_only": self.connection_data.get("read_only"), } self.connection = duckdb.connect(**args) @@ -78,9 +85,7 @@ def check_connection(self) -> StatusResponse: self.connect() response.success = True except Exception as e: - logger.error( - f'Error connecting to DuckDB {self.connection_data["database"]}, {e}!' - ) + logger.error(f"Error connecting to DuckDB {self.connection_data['database']}, {e}!") response.error_message = str(e) finally: if response.success is True and need_to_close: @@ -111,17 +116,13 @@ def native_query(self, query: str) -> Response: if result: response = Response( RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, columns=[x[0] for x in cursor.description] - ), + data_frame=pd.DataFrame(result, columns=[x[0] for x in cursor.description]), ) else: connection.commit() response = Response(RESPONSE_TYPE.OK) except Exception as e: - logger.error( - f'Error running query: {query} on {self.connection_data["database"]}!' - ) + logger.error(f"Error running query: {query} on {self.connection_data['database']}!") response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) cursor.close() @@ -150,10 +151,10 @@ def get_tables(self) -> Response: Response: Names of the tables in the database. """ - q = 'SHOW TABLES;' + q = "SHOW TABLES;" result = self.native_query(q) df = result.data_frame - result.data_frame = df.rename(columns={df.columns[0]: 'table_name'}) + result.data_frame = df.rename(columns={df.columns[0]: "table_name"}) return result def get_columns(self, table_name: str) -> Response: @@ -166,5 +167,5 @@ def get_columns(self, table_name: str) -> Response: Response: Details of the table. """ - query = f'DESCRIBE {table_name};' + query = f"DESCRIBE {table_name};" return self.native_query(query) diff --git a/mindsdb/integrations/handlers/dummy_data_handler/dummy_data_handler.py b/mindsdb/integrations/handlers/dummy_data_handler/dummy_data_handler.py index ec205cb9362..6bac43a3e0f 100644 --- a/mindsdb/integrations/handlers/dummy_data_handler/dummy_data_handler.py +++ b/mindsdb/integrations/handlers/dummy_data_handler/dummy_data_handler.py @@ -84,7 +84,7 @@ def get_tables(self) -> HandlerResponse: q = "SHOW TABLES;" result = self.native_query(q) df = result.data_frame - result.data_frame = df.rename(columns={df.columns[0]: "table_name"}) + result._data = df.rename(columns={df.columns[0]: "table_name"}) return result def get_columns(self, table_name: str) -> HandlerResponse: diff --git a/mindsdb/integrations/handlers/dynamodb_handler/README.md b/mindsdb/integrations/handlers/dynamodb_handler/README.md deleted file mode 100644 index 2e70085e41c..00000000000 --- a/mindsdb/integrations/handlers/dynamodb_handler/README.md +++ /dev/null @@ -1,99 +0,0 @@ ---- -title: Amazon DynamoDB -sidebarTitle: Amazon DynamoDB ---- - -This documentation describes the integration of MindsDB with [Amazon DynamoDB](https://aws.amazon.com/dynamodb/), a serverless, NoSQL database service that enables you to develop modern applications at any scale. - -## Prerequisites - -Before proceeding, ensure that MindsDB is installed locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). - -## Connection - -Establish a connection to your Amazon DynamoDB from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE dynamodb_datasource -WITH - engine = 'dynamodb', - parameters = { - "aws_access_key_id": "PCAQ2LJDOSWLNSQKOCPW", - "aws_secret_access_key": "U/VjewPlNopsDmmwItl34r2neyC6WhZpUiip57i", - "region_name": "us-east-1" - }; -``` - -Required connection parameters include the following: - -* `aws_access_key_id`: The AWS access key that identifies the user or IAM role. -* `aws_secret_access_key`: The AWS secret access key that identifies the user or IAM role. -* `region_name`: The AWS region to connect to. - -Optional connection parameters include the following: - -* `aws_session_token`: The AWS session token that identifies the user or IAM role. This becomes necessary when using temporary security credentials. - -## Usage - -Retrieve data from a specified table by providing the integration name and the table name: - -```sql -SELECT * -FROM dynamodb_datasource.table_name -LIMIT 10; -``` - -Indexes can also be queried by adding a third-level namespace: - -```sql -SELECT * -FROM dynamodb_datasource.table_name.index_name -LIMIT 10; -``` - - -The queries issued to Amazon DynamoDB are in PartiQL, a SQL-compatible query language for Amazon DynamoDB. For more information, refer to the [PartiQL documentation](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/ql-reference.html). - -There are a few limitations to keep in mind when querying data from Amazon DynamoDB (some of which are specific to PartiQL): -- The `LIMIT`, `GROUP BY` and `HAVING` clauses are not supported in PartiQL `SELECT` statements. Furthermore, subqueries and joins are not supported either. Refer to the [PartiQL documentation for SELECT statements](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/ql-reference.select.html) for more information. -- `INSERT` statements are not supported by this integration. However, this can be overcome by issuing a 'native query' via an established connection. An example of this is provided below. - - -Run PartiQL queries directly on Amazon DynamoDB: - -```sql -SELECT * FROM dynamodb_datasource ( - - --Native Query Goes Here - INSERT INTO "Music" value {'Artist' : 'Acme Band1','SongTitle' : 'PartiQL Rocks'} - -); -``` - - -The above examples utilize `dynamodb_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Amazon S3 DynamoDB. -* **Checklist**: - 1. Confirm that provided AWS credentials are correct. Try making a direct connection to the Amazon DynamoDB using the AWS CLI. - 2. Ensure a stable network between MindsDB and AWS. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing special characters. -* **Checklist**: - 1. Ensure table names with special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel-data - * Incorrect: SELECT * FROM integration.'travel-data' - * Correct: SELECT * FROM integration.\`travel-data\` - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/dynamodb_handler/__about__.py b/mindsdb/integrations/handlers/dynamodb_handler/__about__.py deleted file mode 100644 index 81151539d9c..00000000000 --- a/mindsdb/integrations/handlers/dynamodb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB DynamoDB handler' -__package_name__ = 'mindsdb_dynamodb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for DynamoDB" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/dynamodb_handler/__init__.py b/mindsdb/integrations/handlers/dynamodb_handler/__init__.py deleted file mode 100644 index 0ed61c9dbc0..00000000000 --- a/mindsdb/integrations/handlers/dynamodb_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .dynamodb_handler import DynamoDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Amazon DynamoDB' -name = 'dynamodb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/dynamodb_handler/connection_args.py b/mindsdb/integrations/handlers/dynamodb_handler/connection_args.py deleted file mode 100644 index 20282391995..00000000000 --- a/mindsdb/integrations/handlers/dynamodb_handler/connection_args.py +++ /dev/null @@ -1,39 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - aws_access_key_id={ - 'type': ARG_TYPE.STR, - 'description': 'The AWS access key that identifies the user or IAM role.', - 'required': True, - 'label': 'AWS Access Key' - }, - aws_secret_access_key={ - 'type': ARG_TYPE.STR, - 'description': 'The AWS secret access key that identifies the user or IAM role.', - 'secret': True, - 'required': True, - 'label': 'AWS Secret Access Key' - }, - region_name={ - 'type': ARG_TYPE.STR, - 'description': 'The AWS region to connect to.', - 'required': True, - 'label': 'AWS Region' - }, - aws_session_token={ - 'type': ARG_TYPE.STR, - 'description': 'The AWS session token that identifies the user or IAM role. This becomes necessary when using temporary security credentials.', - 'secret': True, - 'required': False, - 'label': 'AWS Session Token' - } -) - -connection_args_example = OrderedDict( - aws_access_key_id='PCAQ2LJDOSWLNSQKOCPW', - aws_secret_access_key='U/VjewPlNopsDmmwItl34r2neyC6WhZpUiip57i', - region_name='us-east-1' -) diff --git a/mindsdb/integrations/handlers/dynamodb_handler/dynamodb_handler.py b/mindsdb/integrations/handlers/dynamodb_handler/dynamodb_handler.py deleted file mode 100644 index 8a32caa3cc4..00000000000 --- a/mindsdb/integrations/handlers/dynamodb_handler/dynamodb_handler.py +++ /dev/null @@ -1,266 +0,0 @@ -from typing import Text, List, Dict, Optional - -import boto3 -from boto3.dynamodb.types import TypeDeserializer -from botocore.exceptions import ClientError -from mindsdb_sql_parser.ast import Select, Insert, Join -from mindsdb_sql_parser.ast.base import ASTNode -import pandas as pd - -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class DynamoDBHandler(DatabaseHandler): - """ - This handler handles connection and execution of the SQL statements on Amazon DynamoDB. - """ - - name = "dynamodb" - - def __init__(self, name: Text, connection_data: Optional[Dict], **kwargs): - """ - Initializes the handler. - - Args: - name (Text): The name of the handler instance. - connection_data (Dict): The connection data required to connect to Amazon DynamoDB. - kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - self.cache_thread_safe = True - - def __del__(self) -> None: - """ - Closes the connection when the handler instance is deleted. - """ - if self.is_connected: - self.disconnect() - - def connect(self) -> boto3.client: - """ - Establishes a connection to Amazon DynamoDB. - - Raises: - ValueError: If the expected connection parameters are not provided. - - Returns: - boto3.client: A client object to Amazon DynamoDB. - """ - if self.is_connected is True: - return self.connection - - # Mandatory connection parameters. - if not all( - key in self.connection_data for key in ["aws_access_key_id", "aws_secret_access_key", "region_name"] - ): - logger.error( - "Connection failed as required parameters (aws_access_key_id, aws_secret_access_key, region_name) have not been provided." - ) - raise ValueError( - "Required parameters (aws_access_key_id, aws_secret_access_key, region_name) must be provided." - ) - - config = { - "aws_access_key_id": self.connection_data.get("aws_access_key_id"), - "aws_secret_access_key": self.connection_data.get("aws_secret_access_key"), - "region_name": self.connection_data.get("region_name"), - } - - # Optional connection parameters. - optional_parameters = ["aws_session_token"] - for param in optional_parameters: - if param in self.connection_data: - config[param] = self.connection_data[param] - - # An exception is not raised even if the credentials are invalid, therefore, no error handling is required. - self.connection = boto3.client("dynamodb", **config) - - self.is_connected = True - - return self.connection - - def disconnect(self) -> None: - """ - Closes the connection to the Amazon DynamoDB if it's currently open. - """ - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to Amazon DynamoDB. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - connection = self.connect() - connection.list_tables() - - response.success = True - except (ValueError, ClientError) as known_error: - logger.error(f"Connection check to Amazon DynamoDB failed, {known_error}!") - response.error_message = str(known_error) - except Exception as unknown_error: - logger.error(f"Connection check to Amazon DynamoDB failed due to an unknown error, {unknown_error}!") - response.error_message = str(unknown_error) - - if response.success and need_to_close: - self.disconnect() - - elif not response.success and self.is_connected: - self.is_connected = False - - return response - - def native_query(self, query: Text) -> Response: - """ - Executes a native SQL query (PartiQL) on Amazon DynamoDB and returns the result. - - Args: - query (Text): The SQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - need_to_close = self.is_connected is False - - connection = self.connect() - - try: - result = connection.execute_statement(Statement=query) - - if result["Items"]: - # TODO: Can parsing be optimized? - records = [] - records.extend(self._parse_records(result["Items"])) - - while "LastEvaluatedKey" in result: - result = connection.execute_statement(Statement=query, NextToken=result["NextToken"]) - records.extend(self._parse_records(result["Items"])) - - response = Response(RESPONSE_TYPE.TABLE, data_frame=pd.json_normalize(records)) - else: - response = Response(RESPONSE_TYPE.OK) - except ClientError as client_error: - logger.error(f"Error running query: {query} on DynamoDB!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(client_error)) - except Exception as unknown_error: - logger.error(f"Unknown error running query: {query} on DynamoDB!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(unknown_error)) - - connection.close() - if need_to_close is True: - self.disconnect() - - return response - - def _parse_records(self, records: List[Dict]) -> Dict: - """ - Parses the records returned by the PartiQL query execution. - - Args: - records (List[Dict]): A list of records returned by the PartiQL query execution. - - Returns: - Dict: A dictionary containing the parsed record. - """ - deserializer = TypeDeserializer() - - parsed_records = [] - for record in records: - parsed_records.append({k: deserializer.deserialize(v) for k, v in record.items()}) - - return parsed_records - - def query(self, query: ASTNode) -> Response: - """ - Executes a SQL query represented by an ASTNode on Amazon DynamoDB and retrieves the data. - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. - """ - if isinstance(query, Select): - error_message = None - if query.limit or query.group_by or query.having or query.offset: - error_message = "The provided SELECT query contains unsupported clauses. " - - if isinstance(query.from_table, Select): - error_message = "The provided SELECT query contains subqueies, which are not supported. " - - if isinstance(query.from_table, Join): - error_message = "The provided SELECT query contains JOIN clauses, which are not supported. " - - if error_message: - error_message += "Please refer to the following documentation for running PartiQL SELECT queries against Amazon DynamoDB: https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/ql-reference.select.html" - raise ValueError(error_message) - - # TODO: Add support for INSERT queries. - elif isinstance(query, Insert): - raise ValueError("Insert queries are not supported by this integration at the moment.") - - return self.native_query(query.to_string()) - - def get_tables(self) -> Response: - """ - Retrieves a list of all tables in Amazon DynamoDB. - - Returns: - Response: A response object containing a list of tables in Amazon DynamoDB. - """ - result = self.connection.list_tables() - - df = pd.DataFrame(data=result["TableNames"], columns=["table_name"]) - - response = Response(RESPONSE_TYPE.TABLE, df) - - return response - - def get_columns(self, table_name: Text) -> Response: - """ - Retrieves column (attribute) details for a specified table in Amazon DynamoDB. - - Args: - table_name (Text): The name of the table for which to retrieve column information. - - Raises: - ValueError: If the 'table_name' is not a valid string. - - Returns: - Response: A response object containing the column details. - """ - if not table_name or not isinstance(table_name, str): - raise ValueError("Invalid table name provided.") - - result = self.connection.describe_table(TableName=table_name) - - df = pd.DataFrame(result["Table"]["AttributeDefinitions"]) - - df = df.rename(columns={"AttributeName": "column_name", "AttributeType": "data_type"}) - - response = Response(RESPONSE_TYPE.TABLE, df) - - return response diff --git a/mindsdb/integrations/handlers/dynamodb_handler/icon.svg b/mindsdb/integrations/handlers/dynamodb_handler/icon.svg deleted file mode 100644 index 2f569d6349a..00000000000 --- a/mindsdb/integrations/handlers/dynamodb_handler/icon.svg +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/dynamodb_handler/tests/__init__.py b/mindsdb/integrations/handlers/dynamodb_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/dynamodb_handler/tests/test_dynamodb_handler.py b/mindsdb/integrations/handlers/dynamodb_handler/tests/test_dynamodb_handler.py deleted file mode 100644 index 791a94a5320..00000000000 --- a/mindsdb/integrations/handlers/dynamodb_handler/tests/test_dynamodb_handler.py +++ /dev/null @@ -1,34 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.dynamodb_handler.dynamodb_handler import DyanmoDBHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class DynamoDBHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - 'aws_access_key_id': 'PCAQ2LJDOSWLNSQKOCPW', - 'aws_secret_access_key': 'U/VjewPlNopsDmmwItl34r2neyC6WhZpUiip57i', - 'region_name': 'us-east-1' - } - cls.handler = DyanmoDBHandler('test_dynamodb_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM TryDaxTable" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_4_get_columns(self): - columns = self.handler.get_columns('TryDaxTable') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/edgelessdb_handler/README.md b/mindsdb/integrations/handlers/edgelessdb_handler/README.md deleted file mode 100644 index 187a7c75aaf..00000000000 --- a/mindsdb/integrations/handlers/edgelessdb_handler/README.md +++ /dev/null @@ -1,42 +0,0 @@ -# EdgelessDB Handler - -This is the implementation of the EdgelessDB Handler for MindsDB. - -## EdgelessDB -EdgelessDB is an open-source MySQL-compatible database for confidential computing. EdgelessDB runs entirely inside runtime-encrypted Intel SGX enclaves. In contrast to other databases, EdgelessDB ensures that all data is always encryptedβ€”in memory as well as on disk. EdgelessDB has no storage constraints and delivers close to native performance. - -Central to EdgelessDB is the concept of a manifest. The manifest is defined in JSON and is similar to a smart contract. It defines the initial state of the database, including access control, in an attestable way. - -## Implementation - -This handler was implemented by extending mysql connector. - -The required arguments to establish a connection are: - -* `host`: the host name of the EdgelessDB connection -* `port`: the port to use when connecting -* `user`: the user to authenticate -* `password`: the password to authenticate the user -* `database`: database name - -## Usage - -In order to make use of this handler and connect to a EdgelessDB server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE edgelessdb_datasource -WITH ENGINE = "EdgelessDB", -PARAMETERS = { - "user": "root", - "password": "password", - "host": "localhost", - "port": 8080, - "database": "test" -} -``` - -Now, you can use this established connection to query your database as follows: - -```sql -SELECT * FROM edgelessdb_datasource.test LIMIT 10; -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/edgelessdb_handler/__about__.py b/mindsdb/integrations/handlers/edgelessdb_handler/__about__.py deleted file mode 100644 index 1fa04ecd0e4..00000000000 --- a/mindsdb/integrations/handlers/edgelessdb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB EdgelessDB handler' -__package_name__ = 'mindsdb_edgelessdb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for EdgelessDB" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/edgelessdb_handler/__init__.py b/mindsdb/integrations/handlers/edgelessdb_handler/__init__.py deleted file mode 100644 index 6ff347bdec6..00000000000 --- a/mindsdb/integrations/handlers/edgelessdb_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .connection_args import connection_args, connection_args_example -try: - from .edgelessdb_handler import EdgelessDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = 'EdgelessDB' -name = 'edgelessdb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'icon_path', 'title', - 'description', 'connection_args', 'connection_args_example', 'import_error' -] diff --git a/mindsdb/integrations/handlers/edgelessdb_handler/connection_args.py b/mindsdb/integrations/handlers/edgelessdb_handler/connection_args.py deleted file mode 100644 index 4625ac63f99..00000000000 --- a/mindsdb/integrations/handlers/edgelessdb_handler/connection_args.py +++ /dev/null @@ -1,45 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the EdgelessDB server.', - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the EdgelessDB server.', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the EdgelessDB server.', - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the EdgelessDB server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.', - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the EdgelessDB server. Must be an integer.', - }, - ssl={'type': ARG_TYPE.BOOL, 'description': 'Set it to False to disable ssl.'}, - ssl_ca={ - 'type': ARG_TYPE.PATH, - 'description': 'Path or URL of the Certificate Authority (CA) certificate file', - }, - ssl_cert={ - 'type': ARG_TYPE.PATH, - 'description': 'Path name or URL of the server public key certificate file', - }, - ssl_key={ - 'type': ARG_TYPE.PATH, - 'description': 'The path name or URL of the server private key file', - }, -) - -connection_args_example = OrderedDict( - host='127.0.0.1', port=3306, user='root', password='password', database='database' -) diff --git a/mindsdb/integrations/handlers/edgelessdb_handler/edgelessdb_handler.py b/mindsdb/integrations/handlers/edgelessdb_handler/edgelessdb_handler.py deleted file mode 100644 index 5dedef33e01..00000000000 --- a/mindsdb/integrations/handlers/edgelessdb_handler/edgelessdb_handler.py +++ /dev/null @@ -1,12 +0,0 @@ -from mindsdb.integrations.handlers.mysql_handler import Handler as MySQLHandler - - -class EdgelessDBHandler(MySQLHandler): - """ - This handler handles connection and execution of the EdgelessDB statements. - """ - - name = 'edgelessdb' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/edgelessdb_handler/icon.svg b/mindsdb/integrations/handlers/edgelessdb_handler/icon.svg deleted file mode 100644 index cf126f79bce..00000000000 --- a/mindsdb/integrations/handlers/edgelessdb_handler/icon.svg +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/edgelessdb_handler/requirements.txt b/mindsdb/integrations/handlers/edgelessdb_handler/requirements.txt deleted file mode 100644 index ee467569031..00000000000 --- a/mindsdb/integrations/handlers/edgelessdb_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/mysql_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/edgelessdb_handler/tests/__init__.py b/mindsdb/integrations/handlers/edgelessdb_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/edgelessdb_handler/tests/test_edgelessdb_handler.py b/mindsdb/integrations/handlers/edgelessdb_handler/tests/test_edgelessdb_handler.py deleted file mode 100644 index 7d3d8ad839e..00000000000 --- a/mindsdb/integrations/handlers/edgelessdb_handler/tests/test_edgelessdb_handler.py +++ /dev/null @@ -1,66 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.edgelessdb_handler.edgelessdb_handler import EdgelessDBHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class EdgelessDBHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "localhost", - "port": 8080, - "user": "root", - "password": "password", - "database": "test", - } - } - cls.handler = EdgelessDBHandler('test_edgelessdb_handler', **cls.kwargs) - - def test_0_connect(self): - assert self.handler.connect() - - def test_1_drop_table(self): - res = self.handler.query("DROP TABLE IF EXISTS TEST_TABLE") - assert res.type is RESPONSE_TYPE.OK - - def test_2_create_table(self): - res = self.handler.query( - '''CREATE TABLE TEST_TABLE ( - ID INT PRIMARY KEY, - NAME VARCHAR(14) - )''' - ) - assert res.type is RESPONSE_TYPE.OK - - def test_3_insert(self): - res = self.handler.query( - """INSERT INTO TEST_TABLE - VALUES - (100,'ONE HUNDRED'), - (200,'TWO HUNDRED'), - (300,'THREE HUNDRED')""" - ) - assert res.type is RESPONSE_TYPE.OK - - def test_4_select(self): - res = self.handler.query('SELECT * FROM TEST_TABLE') - assert res.type is RESPONSE_TYPE.TABLE - - def test_5_check_connection(self): - assert self.handler.check_connection() - - def test_6_get_tables(self): - res = self.handler.get_tables() - assert res.type is RESPONSE_TYPE.TABLE - - def test_7_get_columns(self): - res = self.handler.get_columns("TEST_TABLE") - assert res.type is RESPONSE_TYPE.TABLE - - def test_8_disconnect(self): - assert self.handler.disconnect() - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/elasticsearch_handler/README.md b/mindsdb/integrations/handlers/elasticsearch_handler/README.md deleted file mode 100644 index b672ad22b99..00000000000 --- a/mindsdb/integrations/handlers/elasticsearch_handler/README.md +++ /dev/null @@ -1,118 +0,0 @@ ---- -title: ElasticSearch -sidebarTitle: ElasticSearch ---- - -This documentation describes the integration of MindsDB with [ElasticSearch](https://www.elastic.co/), a distributed, multitenant-capable full-text search engine with an HTTP web interface and schema-free JSON documents.. -The integration allows MindsDB to access data from ElasticSearch and enhance ElasticSearch with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect ElasticSearch to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to ElasticSearch. - -## Connection - -Establish a connection to ElasticSearch from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/elasticsearch_handler) as an engine. - -```sql -CREATE DATABASE elasticsearch_datasource -WITH ENGINE = 'elasticsearch', -PARAMETERS={ - 'cloud_id': 'xyz', -- optional, if hosts are provided - 'hosts': 'https://xyz.xyz.gcp.cloud.es.io:123', -- optional, if cloud_id is provided - 'api_key': 'xyz', -- optional, if user and password are provided - 'user': 'elastic', -- optional, if api_key is provided - 'password': 'xyz' -- optional, if api_key is provided -}; -``` - -The connection parameters include the following: - -* `cloud_id`: The Cloud ID provided with the ElasticSearch deployment. Required only when `hosts` is not provided. -* `hosts`: The ElasticSearch endpoint provided with the ElasticSearch deployment. Required only when `cloud_id` is not provided. -* `api_key`: The API key that you generated for the ElasticSearch deployment. Required only when `user` and `password` are not provided. -* `user` and `password`: The user and password used to authenticate. Required only when `api_key` is not provided. - - -If you want to connect to the local instance of ElasticSearch, use the below statement: - -```sql -CREATE DATABASE elasticsearch_datasource -WITH ENGINE = 'elasticsearch', -PARAMETERS = { - "hosts": "127.0.0.1:9200", - "user": "user", - "password": "password" -}; -``` - -Required connection parameters include the following (at least one of these parameters should be provided): - -* `hosts`: The IP address and port where ElasticSearch is deployed. -* `user`: The user used to autheticate access. -* `password`: The password used to autheticate access. - - -## Usage - -Retrieve data from a specified index by providing the integration name and index name: - -```sql -SELECT * -FROM elasticsearch_datasource.my_index -LIMIT 10; -``` - - -The above examples utilize `elasticsearch_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - - -At the moment, the Elasticsearch SQL API has certain limitations that have an impact on the queries that can be issued via MindsDB. The most notable of these limitations are listed below: -1. Only `SELECT` queries are supported at the moment. -2. Array fields are not supported. -3. Nested fields cannot be queried directly. However, they can be accessed using the `.` operator. - -For a detailed guide on the limitations of the Elasticsearch SQL API, refer to the [official documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/sql-limitations.html). - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Elasticsearch server. -* **Checklist**: - 1. Make sure the Elasticsearch server is active. - 2. Confirm that server, cloud ID and credentials are correct. - 3. Ensure a stable network between MindsDB and Elasticsearch. - - - -`Transport Error` or `Request Error` - -* **Symptoms**: Errors related to the issuing of unsupported queries to Elasticsearch. -* **Checklist**: - 1. Ensure the query is a `SELECT` query. - 2. Avoid querying array fields. - 3. Access nested fields using the `.` operator. - 4. Refer to the [official documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/sql-limitations.html) for more information if needed. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing index names containing special characters. -* **Checklist**: - 1. Ensure table names with special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel-data - * Incorrect: SELECT * FROM integration.'travel-data' - * Correct: SELECT * FROM integration.\`travel-data\` - - -This [troubleshooting guide](https://www.elastic.co/guide/en/elasticsearch/reference/current/troubleshooting.html) provided by Elasticsearch might also be helpful. diff --git a/mindsdb/integrations/handlers/elasticsearch_handler/__about__.py b/mindsdb/integrations/handlers/elasticsearch_handler/__about__.py deleted file mode 100644 index 38a6c79dce6..00000000000 --- a/mindsdb/integrations/handlers/elasticsearch_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Elasticsearch handler" -__package_name__ = "mindsdb_elasticsearch_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Elasticsearch" -__author__ = "Minura Punchihewa" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/elasticsearch_handler/__init__.py b/mindsdb/integrations/handlers/elasticsearch_handler/__init__.py deleted file mode 100644 index c2b70ee71e7..00000000000 --- a/mindsdb/integrations/handlers/elasticsearch_handler/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example - -try: - from .elasticsearch_handler import ElasticsearchHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Elasticsearch" -name = "elasticsearch" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/elasticsearch_handler/connection_args.py b/mindsdb/integrations/handlers/elasticsearch_handler/connection_args.py deleted file mode 100644 index 9857096337b..00000000000 --- a/mindsdb/integrations/handlers/elasticsearch_handler/connection_args.py +++ /dev/null @@ -1,38 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - hosts={ - "type": ARG_TYPE.STR, - "description": "The host name(s) or IP address(es) of the Elasticsearch server(s). If multiple host name(s) or " - "IP address(es) exist, they should be separated by commas, e.g., `host1:port1, host2:port2`. " - "If this parameter is not provided, `cloud_id` should be.", - }, - cloud_id={ - "type": ARG_TYPE.STR, - "description": "The unique ID to your hosted Elasticsearch cluster on Elasticsearch Service. If this parameter is " - "not provided, `hosts` should be.", - }, - user={ - "type": ARG_TYPE.STR, - "description": "The username to connect to the Elasticsearch server with.", - }, - password={ - "type": ARG_TYPE.PWD, - "description": "The password to authenticate the user with the Elasticsearch server.", - "secret": True, - }, - api_key={ - "type": ARG_TYPE.STR, - "description": "The API key for authentication with the Elasticsearch server.", - "secret": True, - }, -) - -connection_args_example = OrderedDict( - hosts="localhost:9200", - user="admin", - password="password", -) diff --git a/mindsdb/integrations/handlers/elasticsearch_handler/elasticsearch_handler.py b/mindsdb/integrations/handlers/elasticsearch_handler/elasticsearch_handler.py deleted file mode 100644 index 3c7f2be6eb4..00000000000 --- a/mindsdb/integrations/handlers/elasticsearch_handler/elasticsearch_handler.py +++ /dev/null @@ -1,264 +0,0 @@ -from typing import Text, Dict, Optional - -from elasticsearch import Elasticsearch -from elasticsearch.exceptions import ( - ConnectionError, - AuthenticationException, - TransportError, - RequestError, -) -from pandas import DataFrame -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender - -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, - HandlerStatusResponse as StatusResponse, - RESPONSE_TYPE, -) -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class ElasticsearchHandler(DatabaseHandler): - """ - This handler handles the connection and execution of SQL statements on Elasticsearch. - """ - - name = "elasticsearch" - - def __init__(self, name: Text, connection_data: Optional[Dict], **kwargs) -> None: - """ - Initializes the handler. - - Args: - name (Text): The name of the handler instance. - connection_data (Dict): The connection data required to connect to the AWS (S3) account. - kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self) -> None: - """ - Closes the connection when the handler instance is deleted. - """ - if self.is_connected: - self.disconnect() - - def connect(self) -> Elasticsearch: - """ - Establishes a connection to the Elasticsearch host. - - Raises: - ValueError: If the expected connection parameters are not provided. - - Returns: - elasticsearch.Elasticsearch: A connection object to the Elasticsearch host. - """ - if self.is_connected is True: - return self.connection - - config = {} - - # Mandatory connection parameters. - if ("hosts" not in self.connection_data) and ("cloud_id" not in self.connection_data): - raise ValueError("Either the hosts or cloud_id parameter should be provided!") - - # Optional/Additional connection parameters. - optional_parameters = ["hosts", "cloud_id", "api_key"] - for parameter in optional_parameters: - if parameter in self.connection_data: - if parameter == "hosts": - config["hosts"] = self.connection_data[parameter].split(",") - else: - config[parameter] = self.connection_data[parameter] - - # Ensure that if either user or password is provided, both are provided. - if ("user" in self.connection_data) != ("password" in self.connection_data): - raise ValueError("Both user and password should be provided if one of them is provided!") - - if "user" in self.connection_data: - config["basic_auth"] = ( - self.connection_data["user"], - self.connection_data["password"], - ) - - try: - self.connection = Elasticsearch( - **config, - ) - self.is_connected = True - return self.connection - except ConnectionError as conn_error: - logger.error(f"Connection error when connecting to Elasticsearch: {conn_error}") - raise - except AuthenticationException as auth_error: - logger.error(f"Authentication error when connecting to Elasticsearch: {auth_error}") - raise - except Exception as unknown_error: - logger.error(f"Unknown error when connecting to Elasticsearch: {unknown_error}") - raise - - def disconnect(self) -> None: - """ - Closes the connection to the Elasticsearch host if it's currently open. - """ - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the Elasticsearch host. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - connection = self.connect() - - # Execute a simple query to test the connection. - connection.sql.query(body={"query": "SELECT 1"}) - response.success = True - # All exceptions are caught here to ensure that the connection is closed if an error occurs. - except Exception as error: - logger.error(f"Error connecting to Elasticsearch, {error}!") - response.error_message = str(error) - - if response.success and need_to_close: - self.disconnect() - - elif not response.success and self.is_connected: - self.is_connected = False - - return response - - def native_query(self, query: Text) -> Response: - """ - Executes a native SQL query on the Elasticsearch host and returns the result. - - Args: - query (str): The SQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - need_to_close = self.is_connected is False - - connection = self.connect() - try: - response = connection.sql.query(body={"query": query}) - records = response["rows"] - columns = response["columns"] - - new_records = True - while new_records: - try: - if response["cursor"]: - response = connection.sql.query(body={"query": query, "cursor": response["cursor"]}) - - new_records = response["rows"] - records = records + new_records - except KeyError: - new_records = False - - column_names = [column["name"] for column in columns] - if not records: - null_record = [None] * len(column_names) - records = [null_record] - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=DataFrame(records, columns=column_names), - ) - - except (TransportError, RequestError) as transport_or_request_error: - logger.error(f"Error running query: {query} on Elasticsearch, {transport_or_request_error}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(transport_or_request_error)) - except Exception as unknown_error: - logger.error(f"Unknown error running query: {query} on Elasticsearch, {unknown_error}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(unknown_error)) - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Executes a SQL query represented by an ASTNode on the Elasticsearch host and retrieves the data. - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. - """ - # TODO: Add support for other query types. - # Use postgresql dialect for SQL rendering - Elasticsearch SQL is ANSI-compatible - renderer = SqlalchemyRender("postgresql") - query_str = renderer.get_string(query, with_failback=True) - logger.debug(f"Executing SQL query: {query_str}") - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Retrieves a list of all non-system tables (indexes) in the Elasticsearch host. - - Returns: - Response: A response object containing a list of tables (indexes) in the Elasticsearch host. - """ - query = """ - SHOW TABLES - """ - result = self.native_query(query) - - df = result.data_frame - - # Remove indices that are system indices: These are indices that start with a period. - df = df[~df["name"].str.startswith(".")] - - df = df.drop(["catalog", "kind"], axis=1) - result.data_frame = df.rename(columns={"name": "table_name", "type": "table_type"}) - - return result - - def get_columns(self, table_name: Text) -> Response: - """ - Retrieves column (field) details for a specified table (index) in the Elasticsearch host. - - Args: - table_name (str): The name of the table for which to retrieve column information. - - Raises: - ValueError: If the 'table_name' is not a valid string. - - Returns: - Response: A response object containing the column details. - """ - if not table_name or not isinstance(table_name, str): - raise ValueError("Invalid table name provided.") - - query = f""" - DESCRIBE {table_name} - """ - result = self.native_query(query) - - df = result.data_frame - df = df.drop("mapping", axis=1) - result.data_frame = df.rename(columns={"column": "column_name", "type": "data_type"}) - - return result diff --git a/mindsdb/integrations/handlers/elasticsearch_handler/icon.svg b/mindsdb/integrations/handlers/elasticsearch_handler/icon.svg deleted file mode 100644 index 477ee98fb65..00000000000 --- a/mindsdb/integrations/handlers/elasticsearch_handler/icon.svg +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/elasticsearch_handler/requirements.txt b/mindsdb/integrations/handlers/elasticsearch_handler/requirements.txt deleted file mode 100644 index 5b0adfd5730..00000000000 --- a/mindsdb/integrations/handlers/elasticsearch_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -elasticsearch>=8.0.0,<9.0.0 -urllib3>=2.6.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/mindsdb/integrations/handlers/elasticsearch_handler/tests/__init__.py b/mindsdb/integrations/handlers/elasticsearch_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/elasticsearch_handler/tests/test_elasticsearch_handler.py b/mindsdb/integrations/handlers/elasticsearch_handler/tests/test_elasticsearch_handler.py deleted file mode 100644 index 3a8575d4f01..00000000000 --- a/mindsdb/integrations/handlers/elasticsearch_handler/tests/test_elasticsearch_handler.py +++ /dev/null @@ -1,32 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.elasticsearch_handler.elasticsearch_handler import ( - ElasticsearchHandler, -) -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class ElasticsearchHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = {"hosts": "localhost:9200"} - cls.handler = ElasticsearchHandler("test_elasticsearch_handler", cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT customer_first_name, customer_full_name FROM kibana_sample_data_ecommerce" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_get_columns(self): - columns = self.handler.get_columns("kibana_sample_data_ecommerce") - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unused/unit/handler_tests/test_email_handler.py b/mindsdb/integrations/handlers/email_handler/tests/test_email_handler.py similarity index 100% rename from tests/unused/unit/handler_tests/test_email_handler.py rename to mindsdb/integrations/handlers/email_handler/tests/test_email_handler.py diff --git a/mindsdb/integrations/handlers/empress_handler/README.md b/mindsdb/integrations/handlers/empress_handler/README.md deleted file mode 100644 index 522bcccbe6f..00000000000 --- a/mindsdb/integrations/handlers/empress_handler/README.md +++ /dev/null @@ -1,45 +0,0 @@ -# Empress Embedded Handler - -This is the implementation of the Empress Embedded handler for MindsDB. - -## Empress Embedded -Empress Embedded is a relational database management system that is designed to run in embedded environments such as mobile devices, IoT devices, and other resource-constrained systems. It is a lightweight and fast database that provides a high-performance storage engine, efficient indexing, and support for transactions and recovery. - -## Implementation -This handler was implemented using [pyodbc](https://pypi.org/project/pyodbc/), interacting with the [Empress Embedded ODBC Interface](http://www.empress.com/products/api-hliodbc.html). - -The required arguments to establish a connection are: -* `host`: host to server IP Address or hostname -* `port`: port through which TCPIP connection is to be made -* `user`: username associated with database -* `password`: password to authenticate your access -* `server`: Server name to be connected -* `database`: Database name to be connected - - -## Usage - -You should be able to access any ODBC Data Source providing that a corresponding driver exists to support that Data Source. The Data Source may reside on a remote Server platform connected by a network or locally on the same computer. - -Documentation for installation and set up can be found [here](https://www.tmphero.org/test/empress8_manual/english/prodoc/d2/d2_2.htm). - -In order to make use of this handler and connect to Empress Embedded in MindsDB, the following syntax can be used: -~~~~sql -CREATE -DATABASE empress_db -WITH engine='empress', -parameters={ - "host": "127.0.0.1", - "port": "6322" , - "user": "admin", - "password": "password", - "server": "test_server", - "database": "test_db" -}; -~~~~ - -Now, you can use this established connection to query your database as follows: -~~~~sql -SELECT * FROM test_db.test; -~~~~ - diff --git a/mindsdb/integrations/handlers/empress_handler/__about__.py b/mindsdb/integrations/handlers/empress_handler/__about__.py deleted file mode 100644 index 2fd5fdc3766..00000000000 --- a/mindsdb/integrations/handlers/empress_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Empress Embedded handler' -__package_name__ = 'mindsdb_empress_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Empress Embedded" -__author__ = 'Panagiotis-Alexios Spanakis and Theodoros Malikourtis' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/empress_handler/__init__.py b/mindsdb/integrations/handlers/empress_handler/__init__.py deleted file mode 100644 index 081973d2d33..00000000000 --- a/mindsdb/integrations/handlers/empress_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .empress_handler import EmpressHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Empress Embedded' -name = 'empress' -type = HANDLER_TYPE.DATA -icon_path = 'icon.png' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'connection_args', 'connection_args_example', - 'description', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/empress_handler/connection_args.py b/mindsdb/integrations/handlers/empress_handler/connection_args.py deleted file mode 100644 index eb68706c546..00000000000 --- a/mindsdb/integrations/handlers/empress_handler/connection_args.py +++ /dev/null @@ -1,42 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Empress Embedded server.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'Specify port to connect to Empress Embedded server' - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Empress Embedded server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the Empress Embedded server.', - 'secret': True - }, - server={ - 'type': ARG_TYPE.STR, - 'description': 'The server name used to authenticate with the Empress Embedded server.' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'Specify database name to connect Empress Embedded server' - }, - -) - -connection_args_example = OrderedDict( - host='localhost', - port=6322, - user='admin', - password='password', - server='test', - database='test_db' -) diff --git a/mindsdb/integrations/handlers/empress_handler/empress_handler.py b/mindsdb/integrations/handlers/empress_handler/empress_handler.py deleted file mode 100644 index 0e819ffedc0..00000000000 --- a/mindsdb/integrations/handlers/empress_handler/empress_handler.py +++ /dev/null @@ -1,211 +0,0 @@ -import pyodbc - -import pandas as pd -from mindsdb_sql_parser import parse_sql - -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.integrations.libs.base import DatabaseHandler - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -logger = log.getLogger(__name__) - - -class EmpressHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Empress Embedded statements. - """ - - name = 'empress' - - def __init__(self, name: str, **kwargs): - """ - Initializes a new instance of the Empress Embedded handler. - - Args: - name (str): The name of the database. - connection_data (dict): parameters for connecting to the database - **kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = 'empress' - self.connection_args = kwargs.get('connection_data') - self.database = self.connection_args.get('database') - self.server = self.connection_args.get('server') - self.user = self.connection_args.get('user') - self.password = self.connection_args.get('password') - self.host = self.connection_args.get('host') - self.port = self.connection_args.get('port', 6322) - self.connection = None - self.is_connected = False - - def __del__(self): - """ - Destructor for the Empress Embedded class. - """ - if self.is_connected is True: - self.disconnect() - - def connect(self) -> StatusResponse: - """ - Establishes a connection to the Empress Embedded server. - Returns: - HandlerStatusResponse - """ - if self.is_connected: - return self.connection - - conn_str = f"DRIVER={{Empress ODBC Interface [Default]}};Server={self.server};Port={self.port};UID={self.user};PWD={self.password};Database={self.database};" - self.connection = pyodbc.connect(conn_str) - self.is_connected = True - return self.connection - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to Empress Embedded, {e}!') - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def disconnect(self): - """ - Closes the connection to the Empress Embedded server. - """ - - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return self.is_connected - - def native_query(self, query: str) -> Response: - """ - Receive raw query and act upon it somehow. - Args: - query (str): SQL query to execute. - Returns: - HandlerResponse - """ - need_to_close = self.is_connected is False - - connection = self.connect() - with connection.cursor() as cursor: - try: - cursor.execute(query) - result = cursor.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame.from_records( - result, - columns=[x[0] for x in cursor.description] - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except Exception as e: - logger.error(f'Error running query: {query} on {self.connection_args["database"]}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INSERT, DELETE, etc - Returns: - HandlerResponse - """ - - renderer = SqlalchemyRender('sqlite') - - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Gets a list of table names in the database. - - Returns: - list: A list of table names in the database. - """ - connection = self.connect() - cursor = connection.cursor() - # Execute query to get all table names - cursor.execute( - "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE'") - - table_names = [x[0] for x in cursor.fetchall()] - - # Create dataframe with table names - df = pd.DataFrame(table_names, columns=['table_name', 'data_type']) - - # Create response object - response = Response( - RESPONSE_TYPE.TABLE, - df - ) - - return response - - def get_columns(self, table_name: str) -> Response: - """ - Gets a list of column names in the specified table. - - Args: - table_name (str): The name of the table to get column names from. - - Returns: - list: A list of column names in the specified table. - """ - conn = self.connect() - cursor = conn.cursor() - cursor.execute("SELECT column_name FROM information_schema.columns WHERE table_name='{}'".format(table_name)) - results = cursor.fetchall() - - # construct a pandas dataframe from the query results - df = pd.DataFrame( - results, - columns=['column_name', 'data_type'] - ) - - response = Response( - RESPONSE_TYPE.TABLE, - df - ) - - return response diff --git a/mindsdb/integrations/handlers/empress_handler/icon.png b/mindsdb/integrations/handlers/empress_handler/icon.png deleted file mode 100644 index 8c9da0386d8..00000000000 Binary files a/mindsdb/integrations/handlers/empress_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/empress_handler/requirements.txt b/mindsdb/integrations/handlers/empress_handler/requirements.txt deleted file mode 100644 index e9f735586c9..00000000000 --- a/mindsdb/integrations/handlers/empress_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pyodbc \ No newline at end of file diff --git a/mindsdb/integrations/handlers/empress_handler/tests/__init__.py b/mindsdb/integrations/handlers/empress_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/empress_handler/tests/test_empress_handler.py b/mindsdb/integrations/handlers/empress_handler/tests/test_empress_handler.py deleted file mode 100644 index 7da09720f2a..00000000000 --- a/mindsdb/integrations/handlers/empress_handler/tests/test_empress_handler.py +++ /dev/null @@ -1,55 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.empress_handler.empress_handler import EmpressHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class EmpressHandlerTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "127.0.0.1", - "port": "6322", - "user": "admin", - "password": "password", - "server": "test_server", - "database": "test_db" - } - } - cls.handler = EmpressHandler('test_empress_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_2_select_query(self): - query = "SELECT * FROM test_db.home_rentals" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_3_get_columns(self): - columns = self.handler.get_columns('test') - assert columns.type is not RESPONSE_TYPE.ERROR - - def test_4_drop_table(self): - res = self.handler.native_query("DROP TABLE IF EXISTS test_db.test") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_5_create_table(self): - res = self.handler.native_query("CREATE TABLE IF NOT EXISTS test_db.test (id INT, name VARCHAR(255))") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_6_insert(self): - res = self.handler.native_query("INSERT INTO test VALUES (100,'ONE HUNDRED')") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_7_disconnect(self): - assert self.handler.disconnect() - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/eventbrite_handler/README.md b/mindsdb/integrations/handlers/eventbrite_handler/README.md deleted file mode 100644 index 9fb3d0b6c5e..00000000000 --- a/mindsdb/integrations/handlers/eventbrite_handler/README.md +++ /dev/null @@ -1,74 +0,0 @@ -# Eventbrite Handler - -Eventbrite handler for MindsDB provides interfaces to connect to Eventbrite via APIs into MindsDB. Here is the API documentation: https://www.eventbrite.com/platform/api - -## About Eventbrite - -Eventbrite is a global self-service ticketing platform for live experiences that allows anyone to create, share, find and attend events that fuel their passions and enrich their lives. From music festivals, marathons, conferences, community rallies and fundraisers, to gaming competitions and air guitar contests. Our mission is to bring the world together through live experiences. - -## Eventbrite Handler Implementation - -This handler was implemented using the [eventbrite-python](https://github.com/GearPlug/eventbrite-python/tree/main) library. -eventbrite-python is a Python library that wraps Eventbrite API v3. - -## Eventbrite Handler Initialization - -The Eventbrite handler is initialized with the following parameters: - -- `access_token`: API key to use for authentication and have an access to data - -Read about creating a Eventbrite API Authentication [here](https://www.eventbrite.com/platform/api?internal_ref=social#/introduction/authentication/1.-get-a-private-token). - -## Example Usage - -```sql -CREATE DATABASE my_eventbrite_handler -WITH ENGINE = "eventbrite", -PARAMETERS = { - "access_token": "your access token" -}; -``` - -Use the established connection to query your database: - -**For ListEventsTable, you need organization permission to list all of their events. Otherwise, 403 error** - -```sql -SELECT * FROM my_eventbrite_handler.user -``` - -```sql -SELECT * FROM my_eventbrite_handler.organization -``` - -```sql -SELECT * FROM my_eventbrite_handler.category -``` - -```sql -SELECT * FROM my_eventbrite_handler.subcategory -``` - -```sql -SELECT * FROM my_eventbrite_handler.formats -``` - -Run more advanced queries: - -```sql -SELECT id, name - FROM my_eventbrite_handler.category - ORDER BY name ASC - LIMIT 3 -``` - -```sql -SELECT * FROM my_eventbrite_handler.user -WHERE event_id= "717926867587"; -``` - -```sql -SELECT * FROM my_eventbrite_handler.events -WHERE organization_id = '1871338711793'; - -``` diff --git a/mindsdb/integrations/handlers/eventbrite_handler/__about__.py b/mindsdb/integrations/handlers/eventbrite_handler/__about__.py deleted file mode 100644 index 41a80afb5cd..00000000000 --- a/mindsdb/integrations/handlers/eventbrite_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Eventbrite handler' -__package_name__ = 'mindsdb_eventbrite_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for the Eventbrite API" -__author__ = 'Ton Hoang Nguyen (Bill)' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/eventbrite_handler/__init__.py b/mindsdb/integrations/handlers/eventbrite_handler/__init__.py deleted file mode 100644 index 1b719395a9d..00000000000 --- a/mindsdb/integrations/handlers/eventbrite_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .eventbrite_handler import EventbriteHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Eventbrite" -name = "eventbrite" -type = HANDLER_TYPE.DATA -icon_path = "icon.png" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/eventbrite_handler/eventbrite_handler.py b/mindsdb/integrations/handlers/eventbrite_handler/eventbrite_handler.py deleted file mode 100644 index a1219963dfa..00000000000 --- a/mindsdb/integrations/handlers/eventbrite_handler/eventbrite_handler.py +++ /dev/null @@ -1,94 +0,0 @@ -import os - -from eventbrite.client import Client -from mindsdb.utilities import log -from mindsdb.utilities.config import Config -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse - -from .eventbrite_tables import EventbriteUserTable -from .eventbrite_tables import EventbriteOrganizationTable -from .eventbrite_tables import EventbriteCategoryTable -from .eventbrite_tables import EventbriteSubcategoryTable -from .eventbrite_tables import EventbriteFormatTable -from .eventbrite_tables import EventbriteEventDetailsTable -from .eventbrite_tables import EventbriteEventsTable - -logger = log.getLogger(__name__) - - -class EventbriteHandler(APIHandler): - """A class for handling connections and interactions with the Eventbrite API. - - Attributes: - api (Client): The `Client` object for accessing Eventbrite API. - """ - - def __init__(self, name=None, **kwargs): - super().__init__(name) - - args = kwargs.get("connection_data", {}) - self.connection_args = {} - handler_config = Config().get("eventbrite_handler", {}) - - # Set up connection arguments - for k in ["access_token"]: - if k in args: - self.connection_args[k] = args[k] - elif f"EVENTBRITE_{k.upper()}" in os.environ: - self.connection_args[k] = os.environ[f"EVENTBRITE_{k.upper()}"] - elif k in handler_config: - self.connection_args[k] = handler_config[k] - - self.api = None - self.is_connected = False - - userTable = EventbriteUserTable(self) - self._register_table("user", userTable) - - organizationTable = EventbriteOrganizationTable(self) - self._register_table("organization", organizationTable) - - categoryTable = EventbriteCategoryTable(self) - self._register_table("category", categoryTable) - - subcategoryTable = EventbriteSubcategoryTable(self) - self._register_table("subcategory", subcategoryTable) - - formatTable = EventbriteFormatTable(self) - self._register_table("formats", formatTable) - - eventDetailsTable = EventbriteEventDetailsTable(self) - self._register_table("event_details", eventDetailsTable) - - eventsTable = EventbriteEventsTable(self) - self._register_table("events", eventsTable) - - def connect(self): - """Initialize the Eventbrite API Client.""" - if self.is_connected: - return self.api - - self.api = Client(access_token=self.connection_args["access_token"]) - - self.is_connected = True - return self.api - - def check_connection(self) -> StatusResponse: - """Check the connection to the Eventbrite API.""" - response = StatusResponse(False) - - try: - api = self.connect() - me = api.get_current_user() - logger.info(f"Connected to Eventbrite as {me['name']}") - response.success = True - - except Exception as e: - response.error_message = f"Error connecting to Eventbrite API: {e}" - logger.error(response.error_message) - - if not response.success and self.is_connected: - self.is_connected = False - - return response diff --git a/mindsdb/integrations/handlers/eventbrite_handler/eventbrite_tables.py b/mindsdb/integrations/handlers/eventbrite_handler/eventbrite_tables.py deleted file mode 100644 index d8f3bc9bcfd..00000000000 --- a/mindsdb/integrations/handlers/eventbrite_handler/eventbrite_tables.py +++ /dev/null @@ -1,612 +0,0 @@ -import pandas as pd -import collections -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb_sql_parser import ast -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb.integrations.utilities.handlers.query_utilities import ( - SELECTQueryParser, - SELECTQueryExecutor, -) - - -def flatten(d, parent_key="", sep="_"): - items = [] - for k, v in d.items(): - new_key = parent_key + sep + k if parent_key else k - if isinstance(v, collections.MutableMapping): - items.extend(flatten(v, new_key, sep=sep).items()) - else: - items.append((new_key, v)) - return dict(items) - - -class EventbriteUserTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - self.handler.connect() - - if query.limit is not None: - raise NotImplementedError("Limit is not supported for user info table") - - user_info = self.handler.api.get_current_user() - - # Normalize email field - if "emails" in user_info and isinstance(user_info["emails"], list): - user_info["email"] = ( - user_info["emails"][0]["email"] if user_info["emails"] else None - ) - user_info["email_verified"] = ( - user_info["emails"][0]["verified"] if user_info["emails"] else None - ) - user_info["email_primary"] = ( - user_info["emails"][0]["primary"] if user_info["emails"] else None - ) - del user_info["emails"] - else: - user_info["email"] = None - user_info["email_verified"] = None - user_info["email_primary"] = None - - data = pd.DataFrame([user_info]) - - # Select columns based on query - columns = self.get_columns() - selected_columns = [ - target.parts[-1] - for target in query.targets - if isinstance(target, ast.Identifier) - ] - if selected_columns: - columns = [col for col in columns if col in selected_columns] - data = data[columns] - - return data - - def get_columns(self): - return [ - "email", - "email_verified", - "email_primary", - "id", - "name", - "first_name", - "last_name", - "is_public", - "image_id", - ] - - -class EventbriteOrganizationTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - self.handler.connect() - - organization_info = self.handler.api.get_user_organizations() - - # Normalize organization data - organizations = organization_info.get("organizations", []) - result = pd.DataFrame(organizations) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - - return result - - def get_columns(self): - return [ - "_type", - "name", - "vertical", - "parent_id", - "locale", - "created", - "image_id", - "id", - ] - - -class EventbriteCategoryTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - self.handler.connect() - - category_info = self.handler.api.list_categories() - categories = category_info.get("categories", []) - result = pd.DataFrame(categories) - - select_statement_parser = SELECTQueryParser( - query, "categoryInfoTable", self.get_columns() - ) - - ( - selected_columns, - where_conditions, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - total_results = result_limit if result_limit else 100 - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - - select_statement_executor = SELECTQueryExecutor( - result, selected_columns, where_conditions, order_by_conditions - ) - - result = select_statement_executor.execute_query() - - return result.head(total_results) - - def get_columns(self): - return [ - "resource_uri", - "id", - "name", - "name_localized", - "short_name", - "short_name_localized", - ] - - -class EventbriteSubcategoryTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - self.handler.connect() - - # Dummy function to simulate API call and response - category_info = self.handler.api.list_subcategories() - - # Normalizing the category data - categories = category_info.get("subcategories", []) - result = pd.DataFrame(categories) - - select_statement_parser = SELECTQueryParser( - query, "subcategoryInfoTable", self.get_columns() - ) - - ( - selected_columns, - where_conditions, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - total_results = result_limit if result_limit else 100 - - # Normalize nested fields - parent_category = result["parent_category"].apply(pd.Series) - parent_category.columns = [f"parent_{col}" for col in parent_category.columns] - result = pd.concat([result, parent_category], axis=1).drop( - "parent_category", axis=1 - ) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - - select_statement_executor = SELECTQueryExecutor( - result, selected_columns, where_conditions, order_by_conditions - ) - - result = select_statement_executor.execute_query() - - return result.head(total_results) - - def get_columns(self): - return [ - "resource_uri", - "id", - "name", - "name_localized", - "parent_category", - "parent_resource_uri", - "parent_id", - "parent_name", - "parent_name_localized", - "parent_short_name", - "parent_short_name_localized", - ] - - -class EventbriteFormatTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - self.handler.connect() - - # Simulate API call and get the response - format_info = self.handler.api.list_formats() - - # Normalize format data - formats = format_info.get("formats", []) - result = pd.DataFrame(formats) - - select_statement_parser = SELECTQueryParser( - query, "formatInfoTable", self.get_columns() - ) - - ( - selected_columns, - where_conditions, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - total_results = result_limit if result_limit else 100 - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - - select_statement_executor = SELECTQueryExecutor( - result, selected_columns, where_conditions, order_by_conditions - ) - - result = select_statement_executor.execute_query() - - return result.head(total_results) - - def get_columns(self): - return [ - "resource_uri", - "id", - "name", - "name_localized", - "short_name", - "short_name_localized", - ] - - -class EventbriteEventDetailsTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - self.handler.connect() - - conditions = extract_comparison_conditions(query.where) - allowed_keys = set(["event_id"]) - - params = {} - filters = [] - for op, arg1, arg2 in conditions: - if op == "or": - raise NotImplementedError("OR is not supported") - elif op == "=" and arg1 in allowed_keys: - params[arg1] = arg2 - elif op != "=": - raise NotImplementedError(f"Unknown op: {op}") - else: - filters.append([op, arg1, arg2]) - - if query.limit is not None: - params["max_results"] = query.limit.value - - if "event_id" not in params: - # search not works without searchQuery, use 'London' - params["event_id"] = "717926867587" - - event_details = self.handler.api.get_event(params["event_id"]) - - # Normalize event data - flat_event_details = flatten(event_details) - result = pd.DataFrame([flat_event_details]) - - for col in ["name", "description", "start", "end", "logo"]: - if col in result.columns: - result = pd.concat( - [result, result[col].apply(pd.Series).add_prefix(f"{col}_")], - axis=1, - ) - result = result.drop([col], axis=1) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - return result - - def get_columns(self): - return [ - "name_text", - "name_html", - "description_text", - "description_html", - "url", - "start_timezone", - "start_local", - "start_utc", - "end_timezone", - "end_local", - "end_utc", - "organization_id", - "created", - "changed", - "published", - "capacity", - "capacity_is_custom", - "status", - "currency", - "listed", - "shareable", - "online_event", - "tx_time_limit", - "hide_start_date", - "hide_end_date", - "locale", - "is_locked", - "privacy_setting", - "is_series", - "is_series_parent", - "inventory_type", - "is_reserved_seating", - "show_pick_a_seat", - "show_seatmap_thumbnail", - "show_colors_in_seatmap_thumbnail", - "source", - "is_free", - "version", - "summary", - "facebook_event_id", - "logo_id", - "organizer_id", - "venue_id", - "category_id", - "subcategory_id", - "format_id", - "id", - "resource_uri", - "is_externally_ticketed", - "logo_crop_mask", - "logo_original", - "logo_id", - "logo_url", - "logo_aspect_ratio", - "logo_edge_color", - "logo_edge_color_set", - ] - - -class EventbriteEventsTable(APITable): - def __init__(self, handler): - self.handler = handler - - def select(self, query: ast.Select) -> pd.DataFrame: - self.handler.connect() - - conditions = extract_comparison_conditions(query.where) - allowed_keys = set(["organization_id"]) - - params = {} - for op, arg1, arg2 in conditions: - if op == "=" and arg1 in allowed_keys: - params[arg1] = arg2 - else: - raise NotImplementedError( - f"Unsupported operation or field: {op} {arg1}" - ) - - if "organization_id" not in params: - raise ValueError("Organization ID must be provided") - - event_list = self.handler.api.list_events(params["organization_id"]) - result = pd.DataFrame(event_list["events"]) - - # Normalize event data and split nested dictionaries into separate columns - result = pd.concat( - [ - result.drop(["name", "description", "start", "end", "logo"], axis=1), - result["name"].apply(pd.Series).add_prefix("name_"), - result["description"].apply(pd.Series).add_prefix("description_"), - result["start"].apply(pd.Series).add_prefix("start_"), - result["end"].apply(pd.Series).add_prefix("end_"), - result["logo"].apply(pd.Series).add_prefix("logo_"), - ], - axis=1, - ) - - select_statement_parser = SELECTQueryParser( - query, "subcategoryInfoTable", self.get_columns() - ) - - ( - selected_columns, - where_conditions, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - total_results = result_limit if result_limit else 100 - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - - select_statement_executor = SELECTQueryExecutor( - result, selected_columns, where_conditions, order_by_conditions - ) - - result = select_statement_executor.execute_query() - - return result.head(total_results) - - def get_columns(self): - return [ - "name_text", - "name_html", - "description_text", - "description_html", - "url", - "start_timezone", - "start_local", - "start_utc", - "end_timezone", - "end_local", - "end_utc", - "organization_id", - "created", - "changed", - "published", - "capacity", - "capacity_is_custom", - "status", - "currency", - "listed", - "shareable", - "online_event", - "tx_time_limit", - "hide_start_date", - "hide_end_date", - "locale", - "is_locked", - "privacy_setting", - "is_series", - "is_series_parent", - "inventory_type", - "is_reserved_seating", - "show_pick_a_seat", - "show_seatmap_thumbnail", - "show_colors_in_seatmap_thumbnail", - "source", - "is_free", - "summary", - "organizer_id", - "venue_id", - "category_id", - "subcategory_id", - "format_id", - "id", - "resource_uri", - "is_externally_ticketed", - ] diff --git a/mindsdb/integrations/handlers/eventbrite_handler/icon.png b/mindsdb/integrations/handlers/eventbrite_handler/icon.png deleted file mode 100644 index f6daa52aedd..00000000000 Binary files a/mindsdb/integrations/handlers/eventbrite_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/eventbrite_handler/requirements.txt b/mindsdb/integrations/handlers/eventbrite_handler/requirements.txt deleted file mode 100644 index 90ca589ab43..00000000000 --- a/mindsdb/integrations/handlers/eventbrite_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -eventbrite-python \ No newline at end of file diff --git a/mindsdb/integrations/handlers/eventstoredb_handler/README.md b/mindsdb/integrations/handlers/eventstoredb_handler/README.md deleted file mode 100644 index 83bf5cc70cf..00000000000 --- a/mindsdb/integrations/handlers/eventstoredb_handler/README.md +++ /dev/null @@ -1,33 +0,0 @@ - -# EventStoreDBHandler - -### How it works? -This handler treats EventStoreDB streams as tables and every JSON Event's data key as column. -Events with nested JSON are flattened with underscore as the separator. - -### Prerequisites - -**EventStoreDB Configuration** - -- RunProjections=All and to enable the $streams projection. This is required to allow MindsDB to get all the available tables i.e. streams -- EnableAtomPubOverHTTP=True. The handler connects to EventStoreDB over the atom pub API. - -### Limitations -- Stream names can only contain characters supported in SQL i.e. dots and dashes commonly used for stream names will not work at the moment. -- JSON data is flattened with underscore as separator. This is not configurable at the moment. - -### Example Usage - -```buildoutcfg -CREATE DATABASE securedb -WITH ENGINE = "eventstoredb", -PARAMETERS = { - "user": "admin", - "password": "changeit", - "tls": True, - "host":"localhost", - "port":2113 - }; -``` -This will create a connection to a local secure node. Once this is successful, you can read about how to run machine learning algorithms on top of your EventStoreDB data at: [https://docs.mindsdb.com/ml-types](https://docs.mindsdb.com/ml-types) - diff --git a/mindsdb/integrations/handlers/eventstoredb_handler/__about__.py b/mindsdb/integrations/handlers/eventstoredb_handler/__about__.py deleted file mode 100644 index cecad8c3021..00000000000 --- a/mindsdb/integrations/handlers/eventstoredb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB EventStoreDB handler' -__package_name__ = 'mindsdb_eventstoredb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for EventStoreDB" -__author__ = 'Lokhesh Ujhoodha' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/eventstoredb_handler/__init__.py b/mindsdb/integrations/handlers/eventstoredb_handler/__init__.py deleted file mode 100644 index 74ed512ff2e..00000000000 --- a/mindsdb/integrations/handlers/eventstoredb_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .eventstoredb_handler import EventStoreDB as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'EventStoreDB' -name = 'eventstoredb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', - 'version', - 'name', - 'type', - 'title', - 'description', - 'import_error', - 'icon_path' -] diff --git a/mindsdb/integrations/handlers/eventstoredb_handler/eventstoredb_handler.py b/mindsdb/integrations/handlers/eventstoredb_handler/eventstoredb_handler.py deleted file mode 100644 index ea22bb8e284..00000000000 --- a/mindsdb/integrations/handlers/eventstoredb_handler/eventstoredb_handler.py +++ /dev/null @@ -1,217 +0,0 @@ -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.ast import Select -from mindsdb.utilities import log -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -from .utils.helpers import get_auth_string, build_basic_url, build_health_url, build_stream_url, build_streams_url, build_next_url, entry_to_df, build_stream_url_last_event -import requests -import pandas as pd -import re - -logger = log.getLogger(__name__) - - -class EventStoreDB(DatabaseHandler): - """ - Handler for EventStoreDB - The handler uses the Atom Pub Over HTTP of EventStoreDB. - This means that you need to enable AtomPuvOverHTTP on your EventStoreDB instance if you are using v20+ - - Why not gRPC? At the moment we cannot use https://pypi.org/project/esdbclient/ which uses the gRPC endpoint - because mysql-connector-python 8.0.32 requires protobuf<=3.20.3,>=3.11.0, but esdbclient - requires protobuf 4.22.1. - - Why not TCP? At the moment https://github.com/epoplavskis/photon-pump only works in insecure mode and - configuration is limited. - - Third reason, there is no official Python client at the moment of writing of this handler. - But once there is better ESDB Python support for gRPC, we should move this integration from AtomPub to gRPC. - """ - - name = 'eventstoredb' - # defaults to an insecure localhost single node - scheme = 'http' - host = 'localhost' - port = '2113' - is_connected = None - basic_url = "" - read_batch_size = 500 # should be adjusted based on use case - headers = { - 'Accept': 'application/json', - 'ES-ResolveLinkTo': "True" - } - tlsverify = False - - def __init__(self, name, **kwargs): - super().__init__(name) - self.parser = parse_sql - connection_data = kwargs['connection_data'] - username = connection_data.get('username') - password = connection_data.get('password') - self.host = connection_data.get('host') - if connection_data.get('tls') is not None and isinstance(connection_data.get('tls'), bool) \ - and connection_data.get('tls'): - self.scheme = 'https' - if connection_data.get('port') is not None and isinstance(connection_data.get('port'), int): - self.port = connection_data.get('port') - if connection_data.get('page_size') is not None: - if isinstance(connection_data.get('page_size'), int) and connection_data.get('page_size') > 0: - self.read_batch_size = connection_data.get('page_size') - if username is not None and password is not None: - if isinstance(username, str) and isinstance(password, str): - self.headers['authorization'] = get_auth_string(username, password) - if connection_data.get('tlsverify') is not None and isinstance(connection_data.get('tlsverify'), bool): - self.tlsverify = connection_data.get('tlsverify') - - self.basic_url = build_basic_url(self.scheme, self.host, self.port) - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self, **kwargs): - if self.check_connection() == StatusResponse(True): - self.is_connected = True - logger.info(f'{self.name} connection successful!') - return StatusResponse(True) - logger.info(f'{self.name} connection could not be made.') - return StatusResponse(False) - - def disconnect(self, **kwargs): - return - - def check_connection(self) -> StatusResponse: - try: - response = requests.get(build_health_url(self.basic_url), verify=self.tlsverify) - if response.status_code == 204: - return StatusResponse(True) - except Exception as e: - logger.error(f'{self.name} check connection failed with: {e}!') - return StatusResponse(False) - - def query(self, query: ASTNode) -> Response: - if type(query) == Select: - stream_name = query.from_table.parts[-1] # i.e. table name - params = { - 'embed': 'tryharder' - } - stream_endpoint = build_stream_url(self.basic_url, stream_name) - response = requests.get(stream_endpoint, params=params, headers=self.headers, verify=self.tlsverify) - entries = [] - if response is not None and response.status_code == 200: - json_response = response.json() - for entry in json_response["entries"]: - entry = entry_to_df(entry) - entries.append(entry) - while True: - end_of_stream = True - if 'links' in json_response: - for link in json_response['links']: - if 'relation' in link: - if link['relation'] == 'next': - end_of_stream = False - response = requests.get(build_next_url(link['uri'], self.read_batch_size), - params=params, headers=self.headers, verify=self.tlsverify) - json_response = response.json() - for entry in json_response["entries"]: - entry = entry_to_df(entry) - entries.append(entry) - if end_of_stream: - break - - df = pd.concat(entries) - - return Response( - RESPONSE_TYPE.TABLE, - df - ) - else: - return Response( - RESPONSE_TYPE.ERROR, - error_message="Only 'select' queries are supported for EventStoreDB" - ) - - def native_query(self, query: str) -> Response: - ast = self.parser(query) - return self.query(ast) - - def get_tables(self) -> Response: - """ - List all streams i.e tables - """ - params = { - 'embed': 'tryharder' - } - stream_endpoint = build_streams_url(self.basic_url) - response = requests.get(stream_endpoint, params=params, headers=self.headers, verify=self.tlsverify) - streams = [] - if response is not None and response.status_code == 200: - json_response = response.json() - for entry in json_response["entries"]: - if "title" in entry: - streams.append(entry["title"].split('@')[1]) - while True: - end_of_stream = True - if 'links' in json_response: - for link in json_response['links']: - if 'relation' in link: - if link['relation'] == 'next': - end_of_stream = False - response = requests.get(build_next_url(link['uri'], self.read_batch_size), - params=params, headers=self.headers, verify=self.tlsverify) - json_response = response.json() - for entry in json_response["entries"]: - if "title" in entry: - streams.append(entry["title"].split('@')[1]) - if end_of_stream: - break - - df = pd.DataFrame(streams, - columns=['table_name']) - return Response( - RESPONSE_TYPE.TABLE, - df - ) - - def get_columns(self, table_name) -> Response: - params = { - 'embed': 'tryharder' - } - stream_endpoint = build_stream_url_last_event(self.basic_url, table_name) - response = requests.get(stream_endpoint, params=params, headers=self.headers, verify=self.tlsverify) - entry = None - if response is not None and response.status_code == 200: - json_response = response.json() - if json_response is not None and len(json_response) > 0: - entry = entry_to_df(json_response["entries"][0]) - if entry is None: - return Response( - RESPONSE_TYPE.ERROR, - "Could not retrieve JSON event data to infer column types." - ) - data = [] - for k, v in entry.items(): - data.append([k, v.dtypes.name]) - df = pd.DataFrame(data, columns=['Field', 'Type']) - return Response( - RESPONSE_TYPE.TABLE, - df - ) - - -def parse_sql(sql, dialect='sqlite'): - # remove ending semicolon and spaces - sql = re.sub(r'[\s;]+$', '', sql) - - from mindsdb_sql_parser.lexer import MindsDBLexer - from mindsdb_sql_parser.parser import MindsDBParser - lexer, parser = MindsDBLexer(), MindsDBParser() - - tokens = lexer.tokenize(sql) - ast = parser.parse(tokens) - return ast diff --git a/mindsdb/integrations/handlers/eventstoredb_handler/icon.svg b/mindsdb/integrations/handlers/eventstoredb_handler/icon.svg deleted file mode 100644 index 60ffe1bb521..00000000000 --- a/mindsdb/integrations/handlers/eventstoredb_handler/icon.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/eventstoredb_handler/utils/helpers.py b/mindsdb/integrations/handlers/eventstoredb_handler/utils/helpers.py deleted file mode 100644 index 5c2a1cd6324..00000000000 --- a/mindsdb/integrations/handlers/eventstoredb_handler/utils/helpers.py +++ /dev/null @@ -1,53 +0,0 @@ -from urllib.parse import urlunparse, urljoin -import re -import pandas as pd -import json -import base64 - -""" -From connection settings in MindsDB to an EventStoreDB AtomPub HTTP URL -#https://python.readthedocs.io/en/stable/library/urllib.parse.html#urllib.parse.urlunparse -""" - - -def build_basic_url(scheme, host, port): - netloc = host + ":" + str(port) - url = urlunparse([ - scheme, - netloc, - "", "", "", ""]) - return url - - -def build_health_url(basic_url): - return urljoin(basic_url, "/health/live") - - -def build_streams_url(basic_url): - return urljoin(basic_url, "streams/%24streams") - - -def build_stream_url(basic_url, stream_name): - return urljoin(basic_url, "streams/" + stream_name) # TODO: quote stream_name? - - -def build_stream_url_last_event(basic_url, stream_name): - return urljoin(basic_url, "streams/" + stream_name + "/head/backward/1") - - -def build_next_url(link_url, read_batch_size): - return re.sub(r"/(\d+)$", "/" + str(read_batch_size), link_url) - - -def entry_to_df(entry): - # All events in EventStoreDB have the following: - fields = ['eventId', 'eventType', 'eventNumber'] - df = pd.DataFrame([[entry['eventId'], entry['eventType'], entry['eventNumber']]], - columns=fields) - data = pd.json_normalize(json.loads(entry['data']), sep='_') - return df.merge(data, how='cross') - - -def get_auth_string(username, password): - credentials = username + ':' + password - return 'Basic ' + str(base64.b64encode(credentials.encode('utf-8')).decode('utf-8')) diff --git a/mindsdb/integrations/handlers/faunadb_handler/README.md b/mindsdb/integrations/handlers/faunadb_handler/README.md deleted file mode 100644 index 6549bf1c1b6..00000000000 --- a/mindsdb/integrations/handlers/faunadb_handler/README.md +++ /dev/null @@ -1,82 +0,0 @@ -# FaunaDB Handler - -This is the implementation of the FaunaDB for MindsDB. - -## FaunaDB - -FaunaDB is a distributed document-relational database delivered as a cloud API. Fauna is a fast, reliable, consistent relational database. - -## Implementation - -This handler uses `faunadb` python library connect to a faunadb instance. - -The required arguments to establish a connection are: - -* `fauna_secret`: the secret key for connecting to a faunadb instance -either -* `fauna_endpoint`: the port to use when connecting -or -* `fauna_scheme`: the protocol used in the faunadb's instance -* `fauna_domain`: the domain on which the instance is hosted -* `fauna_port`: the port on which the instance is listening - - -## Usage - -In order to make use of this handler and connect to a hosted FaunaDB instance in Cloud, the following syntax can be used: - -```sql -CREATE DATABASE fauna_dev -WITH ENGINE = 'faunadb', -PARAMETERS = { - "fauna_secret": "kdvozJsm9LhYkCYtH2VbX55AUFQFQPZNAA", - "fauna_endpoint": "https://db.fauna.com:443" -}; -``` - -Another option is to use seperate config fields like scheme(protocol), the domain and the port of the instance of the faunadb database: - -```sql -CREATE DATABASE fauna_dev -WITH ENGINE = "faunadb", -PARAMETERS = { - "fauna_secret": "kdvozJsm9LhYkCYtH2VbX55AUFQFQPZNAA", - "fauna_scheme": "https", - "fauna_domain": "db.fauna.com", - "fauna_port": 443 -} -``` - -You can insert data into a new collection like so: - -```sql -CREATE TABLE fauna_dev.books - (SELECT * FROM other.source); -``` - -You can query a collection within your FaunaDB instance as follows: - -```sql -SELECT * FROM fauna_dev.books; -``` - -You can insert into a collection i.e. create documents in faunadb as follows: - -```sql -INSERT INTO fauna_dev.books (name, author) -VALUES - ("The Hobbit", "J.R.R Tolkein"), - ("Good Omens", "Neil Gaiman"); -``` -OR -dump as a json data - -```sql -INSERT INTO fauna_dev.books (data) -VALUES ( - '[ - {"name": "The Hobbit", "author": "J.R.R. Tolkein"}, - {"name": "Good Omens", "author": "Neil Gaiman"} - ]' -); -``` diff --git a/mindsdb/integrations/handlers/faunadb_handler/__about__.py b/mindsdb/integrations/handlers/faunadb_handler/__about__.py deleted file mode 100644 index cffa14bf609..00000000000 --- a/mindsdb/integrations/handlers/faunadb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB FaunaDB handler" -__package_name__ = "mindsdb_faunadb_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for FaunaDB" -__author__ = "Meet Gor" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/faunadb_handler/__init__.py b/mindsdb/integrations/handlers/faunadb_handler/__init__.py deleted file mode 100644 index 75c7a6c5b9a..00000000000 --- a/mindsdb/integrations/handlers/faunadb_handler/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version -from .connection_args import connection_args, connection_args_example -try: - from .faunadb_handler import FaunaDBHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "FaunaDB" -name = "faunadb" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/faunadb_handler/connection_args.py b/mindsdb/integrations/handlers/faunadb_handler/connection_args.py deleted file mode 100644 index 8010f40a25c..00000000000 --- a/mindsdb/integrations/handlers/faunadb_handler/connection_args.py +++ /dev/null @@ -1,39 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - fauna_secret={ - "type": ARG_TYPE.STR, - "description": "faunadb secret", - "required": True, - "secret": True - }, - fauna_scheme={ - "type": ARG_TYPE.STR, - "description": "faunadb scheme(http/https)", - "required": False, - }, - fauna_domain={ - "type": ARG_TYPE.STR, - "description": "faunadb instance domain", - "required": False, - }, - fauna_port={ - "type": ARG_TYPE.INT, - "description": "faunadb instance port", - "required": False, - }, - fauna_endpoint={ - "type": ARG_TYPE.STR, - "description": "faunadb instance endpoint", - "required": False, - }, -) - -connection_args_example = OrderedDict( - fauna_scheme="https", - fauna_domain="db.fauna.com", - fauna_port=443, -) diff --git a/mindsdb/integrations/handlers/faunadb_handler/faunadb_handler.py b/mindsdb/integrations/handlers/faunadb_handler/faunadb_handler.py deleted file mode 100644 index 80752aca662..00000000000 --- a/mindsdb/integrations/handlers/faunadb_handler/faunadb_handler.py +++ /dev/null @@ -1,267 +0,0 @@ -import json -import pandas as pd -from typing import List - -from faunadb import query as q -from faunadb.client import FaunaClient -from mindsdb_sql_parser import Select, Insert, CreateTable, Delete -from mindsdb_sql_parser.ast.select.star import Star -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.integrations.libs.response import ( - RESPONSE_TYPE, - HandlerResponse as Response, - HandlerStatusResponse as StatusResponse, -) -from mindsdb.integrations.libs.base import DatabaseHandler - -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class FaunaDBHandler(DatabaseHandler): - """This handler handles connection and execution of the FaunaDB statements.""" - - name = "faunadb" - - def __init__(self, name: str, **kwargs): - super().__init__(name) - - self._connection_data = kwargs.get("connection_data") - - self._client_config = { - "fauna_secret": self._connection_data.get("fauna_secret"), - "fauna_scheme": self._connection_data.get("fauna_scheme"), - "fauna_domain": self._connection_data.get("fauna_domain"), - "fauna_port": self._connection_data.get("fauna_port"), - "fauna_endpoint": self._connection_data.get("fauna_endpoint"), - } - - scheme, domain, port, endpoint = ( - self._client_config["fauna_scheme"], - self._client_config["fauna_domain"], - self._client_config["fauna_port"], - self._client_config["fauna_endpoint"], - ) - - # should have the secret - if (self._client_config["fauna_secret"]) is None: - raise Exception("FaunaDB secret is required for FaunaDB connection!") - # either scheme + domain + port or endpoint is required - # but not both - if not endpoint and not (scheme and domain and port): - raise Exception( - "Either scheme + domain + port or endpoint is required for FaunaDB connection!" - ) - elif endpoint and (scheme or domain or port): - raise Exception( - "Either scheme + domain + port or endpoint is required for FaunaDB connection, but not both!" - ) - - self._client = None - self.is_connected = False - self.connect() - - def _get_client(self): - client_config = self._client_config - if client_config is None: - raise Exception("Client config is not set!") - - if client_config["fauna_endpoint"] is not None: - return FaunaClient( - secret=client_config["fauna_secret"], - endpoint=client_config["fauna_endpoint"], - ) - else: - return FaunaClient( - secret=client_config["fauna_secret"], - scheme=client_config["fauna_scheme"], - domain=client_config["fauna_domain"], - port=client_config["fauna_port"], - ) - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self): - """Connect to a FaunaDB database.""" - if self.is_connected is True: - return self._client - - try: - self._client = self._get_client() - self.is_connected = True - return self._client - except Exception as e: - logger.error(f"Error connecting to FaunaDB client, {e}!") - self.is_connected = False - - def disconnect(self): - """Close the database connection.""" - - if self.is_connected is False: - return - - self._client = None - self.is_connected = False - - def check_connection(self): - """Check the connection to the FaunaDB database.""" - response_code = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self._client.ping() - response_code.success = True - except Exception as e: - logger.error(f"Error connecting to FaunaDB , {e}!") - response_code.error_message = str(e) - finally: - if response_code.success is True and need_to_close: - self.disconnect() - if response_code.success is False and self.is_connected is True: - self.is_connected = False - - return response_code - - def query(self, query: ASTNode) -> Response: - """Render and execute a SQL query. - - Args: - query (ASTNode): The SQL query. - - Returns: - Response: The query result. - """ - - if isinstance(query, Select): - collection = str(query.from_table) - fields = query.targets - conditions = query.where - offset = query.offset - limit = query.limit - # TODO: research how to parse individual columns from document - # fields = [f.to_string().split()[2] for f in fields] - result = self.select(collection, fields, conditions, offset, limit) - - elif isinstance(query, Insert): - collection = str(query.table) - fields = [col.name for col in query.columns] - values = query.values - self.insert(collection, fields, values) - return Response(resp_type=RESPONSE_TYPE.OK) - - elif isinstance(query, CreateTable): - collection_name = str(query.name) - self.create_table(collection_name) - return Response(resp_type=RESPONSE_TYPE.OK) - - elif isinstance(query, Delete): - collection_name = str(query.table) - conditions = query.where - self.delete_document(collection_name, conditions) - return Response(resp_type=RESPONSE_TYPE.OK) - """ - # NOT Working for integration tables yet - elif isinstance(query, DropTables): - collection_name = str(query.tables) - self.drop_table(collection_name) - """ - - df = pd.json_normalize(result['data']) - return Response(RESPONSE_TYPE.TABLE, df) - - def select( - self, - table_name: str, - columns: List[str] = None, - conditions: List[str] = None, - offset: int = None, - limit: int = None, - ) -> dict: - # select * from db_name.collection_name - if len(columns) > 0 and isinstance(columns[0], Star): - fauna_query = q.map_( - q.lambda_("ref", q.get(q.var("ref"))), - q.paginate(q.documents(q.collection(str(table_name)))), - ) - else: - # select id, name ,etc from db_name.collection_name - fauna_query = q.map_( - q.lambda_("doc", {"data": q.select(columns, q.var("doc"))}), - q.paginate(q.documents(q.collection(table_name))), - ) - - return self._client.query(fauna_query) - - def insert(self, table_name: str, fields, values) -> Response: - if len(fields) == 1 and fields[0] == "data": - for value in values: - value = json.loads(value[0]) - if isinstance(value, dict): - value = [value] - for data in value: - self._client.query( - q.create( - q.collection(table_name), - {"data": data}, - ) - ) - else: - for value in values: - data = {f: v for f, v in zip(fields, value)} - self._client.query( - q.create( - q.collection(table_name), - {"data": data}, - ) - ) - - def create_table(self, table_name: str, if_not_exists=True) -> Response: - """ - Create a collection with the given name in the FaunaDB database. - """ - fauna_query = q.create_collection({"name": table_name}) - self._client.query(fauna_query) - - def delete_document(self, table_name: str, conditions: List[str]) -> Response: - """ - Delete a document with the given id in the FaunaDB database. - """ - # get the id of the document (only = operator supported right now, can add more) - ref = conditions.args[1].value - fauna_query = q.delete(q.ref(q.collection(table_name), ref)) - self._client.query(fauna_query) - - def drop_table(self, table_name: str, if_exists=True) -> Response: - """ - Delete a collection from the FaunaDB database. - """ - fauna_query = q.delete(q.collection(table_name)) - self._client.query(fauna_query) - return Response(resp_type=RESPONSE_TYPE.OK) - - def get_tables(self) -> Response: - """ - Get the list of collections in the FaunaDB database. - """ - try: - result = self._client.query(q.paginate(q.collections())) - collections = [] - for collection in result["data"]: - collections.append(collection.id()) - return Response( - resp_type=RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - collections, - columns=["table_name"], - ), - ) - except Exception as e: - logger.error(f"Error getting tables from FaunaDB: {e}") - return Response( - resp_type=RESPONSE_TYPE.ERROR, - error_message=f"Error getting tables from FaunaDB: {e}", - ) diff --git a/mindsdb/integrations/handlers/faunadb_handler/icon.svg b/mindsdb/integrations/handlers/faunadb_handler/icon.svg deleted file mode 100644 index 45dd0ac7527..00000000000 --- a/mindsdb/integrations/handlers/faunadb_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/faunadb_handler/requirements.txt b/mindsdb/integrations/handlers/faunadb_handler/requirements.txt deleted file mode 100644 index 736a34bfd81..00000000000 --- a/mindsdb/integrations/handlers/faunadb_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -faunadb \ No newline at end of file diff --git a/mindsdb/integrations/handlers/faunadb_handler/tests/__init__.py b/mindsdb/integrations/handlers/faunadb_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/faunadb_handler/tests/test_faunadb_handler.py b/mindsdb/integrations/handlers/faunadb_handler/tests/test_faunadb_handler.py deleted file mode 100644 index 5ef7b0ee854..00000000000 --- a/mindsdb/integrations/handlers/faunadb_handler/tests/test_faunadb_handler.py +++ /dev/null @@ -1,41 +0,0 @@ -import unittest -from mindsdb_sql_parser import parse_sql - -from mindsdb.api.executor.data_types.response_type import ( - RESPONSE_TYPE, -) -from mindsdb.integrations.handlers.faunadb_handler.faunadb_handler import ( - FaunaDBHandler, -) - - -class FaunadbHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "fauna_secret": "fnAFQFQPZNAAUYkCYkdvozJsm9tH2VbX55AULhsH", - "fauna_endpoint": "https://db.fauna.com:443/", - } - } - cls.handler = FaunaDBHandler("test_faunadb_handler", **cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_check_connection(self): - self.handler.check_connection() - self.assertTrue(self.handler.is_connected) - - def test_2_select(self): - query = parse_sql("SELECT * FROM books;") - res = self.handler.query(query) - assert res.type is RESPONSE_TYPE.TABLE - - def test_3_describe_db(self): - res = self.handler.get_tables() - assert res.type is RESPONSE_TYPE.TABLE - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/file_handler/file_handler.py b/mindsdb/integrations/handlers/file_handler/file_handler.py index c6c66408caa..6a1fc443ee4 100644 --- a/mindsdb/integrations/handlers/file_handler/file_handler.py +++ b/mindsdb/integrations/handlers/file_handler/file_handler.py @@ -7,11 +7,15 @@ from mindsdb_sql_parser.ast import CreateTable, DropTables, Insert, Select, Identifier from mindsdb_sql_parser.ast.base import ASTNode +from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE from mindsdb.api.executor.utilities.sql import query_dfs from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import RESPONSE_TYPE -from mindsdb.integrations.libs.response import HandlerResponse as Response -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse +from mindsdb.integrations.libs.response import ( + RESPONSE_TYPE, + HandlerResponse as Response, + HandlerStatusResponse as StatusResponse, + INF_SCHEMA_COLUMNS_NAMES_SET, +) from mindsdb.utilities import log @@ -211,16 +215,23 @@ def get_tables(self) -> Response: def get_columns(self, table_name) -> Response: file_meta = self.file_controller.get_file_meta(table_name) + if file_meta is None: + result = Response( + RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET)) + ) + result.to_columns_table_response(map_type_fn=lambda _: MYSQL_DATA_TYPE.TEXT) + return result result = Response( RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame( [ { - "Field": x["name"].strip() if isinstance(x, dict) else x.strip(), - "Type": "str", + "COLUMN_NAME": x["name"].strip() if isinstance(x, dict) else x.strip(), + "DATA_TYPE": "str", } for x in file_meta["columns"] ] ), ) + result.to_columns_table_response(map_type_fn=lambda _: MYSQL_DATA_TYPE.TEXT) return result diff --git a/mindsdb/integrations/handlers/financial_modeling_prep_handler/README.MD b/mindsdb/integrations/handlers/financial_modeling_prep_handler/README.MD deleted file mode 100644 index e05f43d6a54..00000000000 --- a/mindsdb/integrations/handlers/financial_modeling_prep_handler/README.MD +++ /dev/null @@ -1,80 +0,0 @@ -# Financial Modeling Prep API Handler - -This handler integrates with the [Financial Modeling Prep API](https://site.financialmodelingprep.com/developer/docs) to allow users to use ML for financial analysis and predictions. - -### Connect to the Financial Modeling Prep API -We start by creating a database to connect to the Financial Modeling Prep API. -Retrieve an api key from: (https://site.financialmodelingprep.com/developer/docs) - -~~~~sql -CREATE DATABASE fmp_datasource -WITH - ENGINE = 'FINANCIAL_MODELING_PREP' - PARAMETERS = { - "api_key": - }; -~~~~ - -Now, you can use this established connection to query your database as follows: - -### Select Data - -~~~~sql -SELECT * -FROM fmp_datasource.historical_prices -WHERE symbol = 'AAPL' -~~~~ - -| Date | Open | High | Low | Close | Adj Close | Volume | Unadjusted Volume | Change | Change Percent | VWAP | Change Over Time | -|------------|--------|--------|--------|--------|-----------|----------|-------------------|--------|----------------|---------|------------------| -| 2024-05-13 | 185.435| 187.1 | 184.62 | 186.93 | 186.93 | 50931607 | 50931607 | 1.5 | 0.80621242 | 186.22 | 0.0080621242 | -| 2024-05-10 | 184.9 | 185.09 | 182.13 | 183.05 | 183.05 | 50759496 | 50759496 | -1.85 | -1 | 183.7925| -0.01 | -| 2024-05-09 | 182.56 | 184.66 | 182.11 | 184.57 | 184.32 | 48982972 | 48982972 | 2.01 | 1.1 | 183.475 | 0.011 | -| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ...| ... | -| 2019-07-17 | 183.45 | 184.9 | 181.32 | 182.4 | 182.15 | 77305771 | 77305771 | -1.05 | -0.57236 | 183.0175| -0.0057236 | - -### Use LIMIT Clause -~~~~sql -SELECT * -FROM fmp_datasource.historical_prices -WHERE symbol = 'AAPL' -LIMIT 5 -~~~~ - -| Date | Open | High | Low | Close | Adj Close | Volume | Unadjusted Volume | Change | Change Percent | VWAP | Change Over Time | -|------------|--------|--------|--------|--------|-----------|----------|-------------------|--------|----------------|---------|------------------| -| 2024-05-13 | 185.435| 187.1 | 184.62 | 186.93 | 186.93 | 50931607 | 50931607 | 1.5 | 0.80621242 | 186.22 | 0.0080621242 | -| 2024-05-10 | 184.9 | 185.09 | 182.13 | 183.05 | 183.05 | 50759496 | 50759496 | -1.85 | -1 | 183.7925| -0.01 | -| 2024-05-09 | 182.56 | 184.66 | 182.11 | 184.57 | 184.32 | 48982972 | 48982972 | 2.01 | 1.1 | 183.475 | 0.011 | -| 2024-05-08 | 182.85 | 183.07 | 181.45 | 182.74 | 182.49 | 45057087 | 45057087 | -0.11 | -0.0601586 | 182.5275| -0.000601586 | -| 2024-05-07 | 183.45 | 184.9 | 181.32 | 182.4 | 182.15 | 77305771 | 77305771 | -1.05 | -0.57236 | 183.0175| -0.0057236 | - - -To customize specified date range, (from_date, to_date): -~~~~sql -SELECT * -FROM fmp_datasource.historical_prices -WHERE symbol = 'AAPL'; -AND from_date = '2023-10-10' -AND to_date = '2023-12-10' -~~~~ - -### Response for AAPL Daily Chart Data - -Displays closing price of the company's stock for each day in a specified date range - -| Date | Close | -|------------|--------| -| 2023-12-08 | 195.71 | -| 2023-12-07 | 194.27 | -| 2023-12-06 | 192.32 | -| 2023-12-05 | 193.42 | -| 2023-12-04 | 189.43 | -| 2023-12-01 | 191.24 | -| 2023-11-30 | 189.95 | -| ... | ... | -| 2023-11-20 | 191.45 | -| 2023-11-17 | 189.69 | -| 2023-11-16 | 189.71 | - - diff --git a/mindsdb/integrations/handlers/financial_modeling_prep_handler/__about__.py b/mindsdb/integrations/handlers/financial_modeling_prep_handler/__about__.py deleted file mode 100644 index bf7d40c9001..00000000000 --- a/mindsdb/integrations/handlers/financial_modeling_prep_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Financial Modeling Prep handler' -__package_name__ = 'mindsdb_financial_modeling_prep_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Financial Modeling Prep API" -__author__ = 'Riyana Dutta' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2024- mindsdb' diff --git a/mindsdb/integrations/handlers/financial_modeling_prep_handler/__init__.py b/mindsdb/integrations/handlers/financial_modeling_prep_handler/__init__.py deleted file mode 100644 index f0bbdb52765..00000000000 --- a/mindsdb/integrations/handlers/financial_modeling_prep_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .financial_modeling_handler import FinancialModelingHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Financial_Modeling_Prep' -name = "financial_modeling_prep" -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/financial_modeling_prep_handler/financial_modeling_handler.py b/mindsdb/integrations/handlers/financial_modeling_prep_handler/financial_modeling_handler.py deleted file mode 100644 index 0d354490cd1..00000000000 --- a/mindsdb/integrations/handlers/financial_modeling_prep_handler/financial_modeling_handler.py +++ /dev/null @@ -1,56 +0,0 @@ -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) -from mindsdb.integrations.handlers.financial_modeling_prep_handler.financial_modeling_tables import HistoricalPriceTable - -from mindsdb.utilities import log -import requests - -logger = log.getLogger(__name__) - - -class FinancialModelingHandler(APIHandler): - - name = "financial_modeling_prep" - - def __init__(self, name, connection_data: dict, **kwargs): - super().__init__(name) - - self.api_key = None - self.connection_data = connection_data - if "api_key" not in connection_data: - raise Exception( - "FINANCIAL_MODELING engine requires an API key. Retrieve an API key from https://site.financialmodelingprep.com/developer. See financial_modeling_prep_handler/README.MD on how to include API key in query." - ) - self.api_key = connection_data['api_key'] - self.client = None - self.is_connected = False - - historical_prices = HistoricalPriceTable(self) - self._register_table('historical_prices', historical_prices) - - def connect(self): - self.is_connected = True - base_url = "https://financialmodelingprep.com/api/v3/historical-price-full/" - return base_url - - def check_connection(self) -> StatusResponse: - """ Check connection to the handler - Returns: - HandlerStatusResponse - """ - base_url = 'https://financialmodelingprep.com/api/v3/search' - param = { - 'query': 'AA', - 'apikey': self.api_key, - 'limit': 5 - } - - response = requests.get(base_url, param) - if response.status_code == 200: - return StatusResponse(success=True) - else: - raise Exception( - "API key provided in query is not valid. Retrieve a valid API key from https://site.financialmodelingprep.com/developer. See financial_modeling_prep_handler/README.MD on how to include API key in query." - ) diff --git a/mindsdb/integrations/handlers/financial_modeling_prep_handler/financial_modeling_tables.py b/mindsdb/integrations/handlers/financial_modeling_prep_handler/financial_modeling_tables.py deleted file mode 100644 index 12b23593da7..00000000000 --- a/mindsdb/integrations/handlers/financial_modeling_prep_handler/financial_modeling_tables.py +++ /dev/null @@ -1,89 +0,0 @@ -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb_sql_parser import ast - -from typing import Dict, List - -import pandas as pd - -from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, - RESPONSE_TYPE -) -import requests - - -class HistoricalPriceTable(APITable): - - def _get_historical_price_endpoint_params_from_conditions(self, conditions: List) -> Dict: - params = {} - for op, arg1, arg2 in conditions: - if arg1 == 'symbol': - if op != '=': - raise NotImplementedError - params['symbol'] = arg2 - if arg1 == "from_date": - if op != '=': - raise NotImplementedError - params['from'] = arg2 - if arg1 == "to_date": - if op != '=': - raise NotImplementedError - params['to'] = arg2 - - return params - - def select(self, query: ast.Select) -> pd.DataFrame: - """Selects data from the FinancialModeling API and returns it as a pandas DataFrame. - - Returns dataframe representing the FinancialModeling API results. - - Args: - query (ast.Select): Given SQL SELECT query - """ - conditions = extract_comparison_conditions(query.where) - params = self._get_historical_price_endpoint_params_from_conditions(conditions) - - if query.limit and query.limit.value: - limit_value = query.limit.value - params['limit'] = limit_value - - historical_prices = self.get_historical_price_chart(params=params) - - return historical_prices - - def get_historical_price_chart(self, params: Dict = None) -> pd.DataFrame: - base_url = self.handler.connect() - if 'symbol' not in params: - raise ValueError('Missing "symbol" param') - symbol = params['symbol'] - params.pop('symbol') - - limitParam = False - limit = 0 - if 'limit' in params: - limit = params['limit'] - params.pop('limit') - limitParam = True - - url = f"{base_url}{symbol}" # https://financialmodelingprep.com/api/v3/historical-price-full/ - param = {'apikey': self.handler.api_key, **params} - - response = requests.get(url, param) - historical_data = response.json() - historical = historical_data.get("historical") - - if limitParam: - return pd.DataFrame(historical).head(limit) - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - historical - ) - ) - - if historical: - return pd.DataFrame(historical) - else: - return pd.DataFrame() diff --git a/mindsdb/integrations/handlers/financial_modeling_prep_handler/icon.svg b/mindsdb/integrations/handlers/financial_modeling_prep_handler/icon.svg deleted file mode 100644 index 69a4a715d77..00000000000 --- a/mindsdb/integrations/handlers/financial_modeling_prep_handler/icon.svg +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mindsdb/integrations/handlers/financial_modeling_prep_handler/tests/test_financial_modeling_handler.py b/mindsdb/integrations/handlers/financial_modeling_prep_handler/tests/test_financial_modeling_handler.py deleted file mode 100644 index 07af232cdb7..00000000000 --- a/mindsdb/integrations/handlers/financial_modeling_prep_handler/tests/test_financial_modeling_handler.py +++ /dev/null @@ -1,28 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.financial_modeling_prep_handler.financial_modeling_handler import FinancialModelingHandler -from mindsdb.integrations.libs.response import RESPONSE_TYPE - - -class FinancialModelingHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - connection_data = { - "api_key": "TdP0vQC4NIfRFIl6d31ZbhpbWLsSEccA" - } - cls.handler = FinancialModelingHandler('test_financial_handler', connection_data) - - def test_0_select_query(self): - query = "SELECT * FROM fmp_datasource.historical_prices WHERE symbol = 'AAPL'" - result = self.handler.query(query) - print(result) - assert result.type is RESPONSE_TYPE.TABLE - - def test_1_select_limit_query(self): - query = "SELECT * FROM fmp_datasource.historical_prices WHERE symbol = 'AAPL' LIMIT 5" - result = self.handler.query(query) - print(result) - assert result.data_frame.shape[0] == 5 - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/firebird_handler/README.md b/mindsdb/integrations/handlers/firebird_handler/README.md deleted file mode 100644 index ccf93116fca..00000000000 --- a/mindsdb/integrations/handlers/firebird_handler/README.md +++ /dev/null @@ -1,36 +0,0 @@ -# Firebird Handler - -This is the implementation of the Firebird handler for MindsDB. - -## Firebird -Firebird is a relational database offering many ANSI SQL standard features that runs on Linux, Windows, and a variety of Unix platforms. Firebird offers excellent concurrency, high performance, and powerful language support for stored procedures and triggers. It has been used in production systems, under a variety of names, since 1981. -
-https://firebirdsql.org/en/about-firebird/#:~:text=Firebird%20is%20a%20relational%20database,for%20stored%20procedures%20and%20triggers. - -## Implementation -This handler was implemented using the `fdb` library, the Python driver for Firebird. - -The required arguments to establish a connection are, -* `host`: the host name or IP address of the Firebird server -* `database`: the port to use when connecting with the Firebird server -* `user`: the user to authenticate the user with the Firebird server -* `password`: the password to authenticate the user with the Firebird server - -## Usage -In order to make use of this handler and connect to a Firebird server in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE firebird_datasource -WITH -engine='firebird', -parameters={ - "host": "localhost", - "database": r"C:\Users\minura\Documents\mindsdb\example.fdb", - "user": "sysdba", - "password": "password" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM firebird_datasource.example_tbl -~~~~ \ No newline at end of file diff --git a/mindsdb/integrations/handlers/firebird_handler/__about__.py b/mindsdb/integrations/handlers/firebird_handler/__about__.py deleted file mode 100644 index 107dce66217..00000000000 --- a/mindsdb/integrations/handlers/firebird_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Firebird handler' -__package_name__ = 'mindsdb_firebird_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Firebird" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/firebird_handler/__init__.py b/mindsdb/integrations/handlers/firebird_handler/__init__.py deleted file mode 100644 index 907e03c88ee..00000000000 --- a/mindsdb/integrations/handlers/firebird_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .firebird_handler import FirebirdHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Firebird' -name = 'firebird' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/firebird_handler/connection_args.py b/mindsdb/integrations/handlers/firebird_handler/connection_args.py deleted file mode 100644 index 0598d1a26ad..00000000000 --- a/mindsdb/integrations/handlers/firebird_handler/connection_args.py +++ /dev/null @@ -1,34 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Firebird server.' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': """ - The database name to use when connecting with the Firebird server. NOTE: use double backslashes (\\) for the - database path on a Windows machine. - """ - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Firebird server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the Firebird server.', - 'secret': True - } -) - -connection_args_example = OrderedDict( - host='localhost', - database='/temp/test.db', - user='sysdba', - password='password' -) diff --git a/mindsdb/integrations/handlers/firebird_handler/firebird_handler.py b/mindsdb/integrations/handlers/firebird_handler/firebird_handler.py deleted file mode 100644 index 23150506891..00000000000 --- a/mindsdb/integrations/handlers/firebird_handler/firebird_handler.py +++ /dev/null @@ -1,240 +0,0 @@ -from typing import Optional - -import pandas as pd -import fdb - -from mindsdb_sql_parser import parse_sql -from sqlalchemy_firebird.base import FBDialect -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.base import DatabaseHandler - -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -logger = log.getLogger(__name__) - - -class FirebirdHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Firebird statements. - """ - - name = 'firebird' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = 'firebird' - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - if self.is_connected is True: - return self.connection - - self.connection = fdb.connect( - host=self.connection_data['host'], - database=self.connection_data['database'], - user=self.connection_data['user'], - password=self.connection_data['password'], - ) - self.is_connected = True - - return self.connection - - def disconnect(self): - """ - Close any existing connections. - """ - - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return self.is_connected - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to Firebird {self.connection_data["database"]}, {e}!') - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - cursor = connection.cursor() - - try: - cursor.execute(query) - result = cursor.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, - columns=[x[0] for x in cursor.description] - ) - ) - else: - connection.commit() - response = Response(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f'Error running query: {query} on {self.connection_data["database"]}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - cursor.close() - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - renderer = SqlalchemyRender(FBDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - - query = """ - SELECT RDB$RELATION_NAME - FROM RDB$RELATIONS - WHERE (RDB$SYSTEM_FLAG <> 1 OR RDB$SYSTEM_FLAG IS NULL) AND RDB$VIEW_BLR IS NULL - ORDER BY RDB$RELATION_NAME; - """ - result = self.native_query(query) - df = result.data_frame - df[df.columns[0]] = df[df.columns[0]].apply(lambda row: row.strip()) - result.data_frame = df.rename(columns={df.columns[0]: 'table_name'}) - return result - - def get_columns(self, table_name: str) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - - query = f""" - SELECT - RF.RDB$FIELD_NAME FIELD_NAME, - CASE F.RDB$FIELD_TYPE - WHEN 7 THEN - CASE F.RDB$FIELD_SUB_TYPE - WHEN 0 THEN 'SMALLINT' - WHEN 1 THEN 'NUMERIC(' || F.RDB$FIELD_PRECISION || ', ' || (-F.RDB$FIELD_SCALE) || ')' - WHEN 2 THEN 'DECIMAL' - END - WHEN 8 THEN - CASE F.RDB$FIELD_SUB_TYPE - WHEN 0 THEN 'INTEGER' - WHEN 1 THEN 'NUMERIC(' || F.RDB$FIELD_PRECISION || ', ' || (-F.RDB$FIELD_SCALE) || ')' - WHEN 2 THEN 'DECIMAL' - END - WHEN 9 THEN 'QUAD' - WHEN 10 THEN 'FLOAT' - WHEN 12 THEN 'DATE' - WHEN 13 THEN 'TIME' - WHEN 14 THEN 'CHAR(' || (TRUNC(F.RDB$FIELD_LENGTH / CH.RDB$BYTES_PER_CHARACTER)) || ') ' - WHEN 16 THEN - CASE F.RDB$FIELD_SUB_TYPE - WHEN 0 THEN 'BIGINT' - WHEN 1 THEN 'NUMERIC(' || F.RDB$FIELD_PRECISION || ', ' || (-F.RDB$FIELD_SCALE) || ')' - WHEN 2 THEN 'DECIMAL' - END - WHEN 27 THEN 'DOUBLE' - WHEN 35 THEN 'TIMESTAMP' - WHEN 37 THEN 'VARCHAR(' || (TRUNC(F.RDB$FIELD_LENGTH / CH.RDB$BYTES_PER_CHARACTER)) || ')' - WHEN 40 THEN 'CSTRING' || (TRUNC(F.RDB$FIELD_LENGTH / CH.RDB$BYTES_PER_CHARACTER)) || ')' - WHEN 45 THEN 'BLOB_ID' - WHEN 261 THEN 'BLOB SUB_TYPE ' || F.RDB$FIELD_SUB_TYPE - ELSE 'RDB$FIELD_TYPE: ' || F.RDB$FIELD_TYPE || '?' - END FIELD_TYPE, - IIF(COALESCE(RF.RDB$NULL_FLAG, 0) = 0, NULL, 'NOT NULL') FIELD_NULL, - CH.RDB$CHARACTER_SET_NAME FIELD_CHARSET, - DCO.RDB$COLLATION_NAME FIELD_COLLATION, - COALESCE(RF.RDB$DEFAULT_SOURCE, F.RDB$DEFAULT_SOURCE) FIELD_DEFAULT, - F.RDB$VALIDATION_SOURCE FIELD_CHECK, - RF.RDB$DESCRIPTION FIELD_DESCRIPTION - FROM RDB$RELATION_FIELDS RF - JOIN RDB$FIELDS F ON (F.RDB$FIELD_NAME = RF.RDB$FIELD_SOURCE) - LEFT OUTER JOIN RDB$CHARACTER_SETS CH ON (CH.RDB$CHARACTER_SET_ID = F.RDB$CHARACTER_SET_ID) - LEFT OUTER JOIN RDB$COLLATIONS DCO ON ((DCO.RDB$COLLATION_ID = F.RDB$COLLATION_ID) AND (DCO.RDB$CHARACTER_SET_ID = F.RDB$CHARACTER_SET_ID)) - WHERE (RF.RDB$RELATION_NAME = '{table_name.upper()}') AND (COALESCE(RF.RDB$SYSTEM_FLAG, 0) = 0) - ORDER BY RF.RDB$FIELD_POSITION; - """ - result = self.native_query(query) - df = result.data_frame - result.data_frame = df.rename(columns={'FIELD_NAME': 'column_name', 'FIELD_TYPE': 'data_type'}) - return result diff --git a/mindsdb/integrations/handlers/firebird_handler/icon.svg b/mindsdb/integrations/handlers/firebird_handler/icon.svg deleted file mode 100644 index 3249fb80610..00000000000 --- a/mindsdb/integrations/handlers/firebird_handler/icon.svg +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/firebird_handler/requirements.txt b/mindsdb/integrations/handlers/firebird_handler/requirements.txt deleted file mode 100644 index 256195764ab..00000000000 --- a/mindsdb/integrations/handlers/firebird_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -fdb -sqlalchemy-firebird >= 2.0.0, <3.0.0 diff --git a/mindsdb/integrations/handlers/firebird_handler/tests/__init__.py b/mindsdb/integrations/handlers/firebird_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/firebird_handler/tests/test_firebird_handler.py b/mindsdb/integrations/handlers/firebird_handler/tests/test_firebird_handler.py deleted file mode 100644 index 4fd922ea6ad..00000000000 --- a/mindsdb/integrations/handlers/firebird_handler/tests/test_firebird_handler.py +++ /dev/null @@ -1,37 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.firebird_handler.firebird_handler import FirebirdHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class FirebirdHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "localhost", - "database": r"C:\Users\minura\Documents\mindsdb\test.fdb", - "user": "sysdba", - "password": "password" - } - } - cls.handler = FirebirdHandler('test_firebird_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM test_tbl" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_4_get_columns(self): - columns = self.handler.get_columns('test_tbl') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/frappe_handler/README.md b/mindsdb/integrations/handlers/frappe_handler/README.md deleted file mode 100644 index bcddd501096..00000000000 --- a/mindsdb/integrations/handlers/frappe_handler/README.md +++ /dev/null @@ -1,50 +0,0 @@ -# Frappe API Handler - -This handler integrates with the [Frappe API](https://frappeframework.com/docs/v14/user/en/api/rest) to create and read Frappe Documents. - - -## Connect to the Frappe API -We start by creating a database to connect to the Frappe API. You'll need an [access token](https://frappeframework.com/docs/v14/user/en/api/rest) and the domain you want to send API requests to. - -Example -``` -CREATE DATABASE my_frappe -WITH - ENGINE = 'frappe' - PARAMETERS = { - "access_token": "TOKEN_HERE", - "domain": "DOMAIN_HERE" // e.g. https://mindsdbfrappe.com - }; -``` - -## Select Documents -To see if the connection was successful, try searching for all documents of a certain type. Currently, only the name is provided. You can see all of your document types at the URL `[YOUR_FRAPPE_DOMAIN]/app/doctype` - -``` -SELECT * -FROM my_frappe.documents -WHERE doctype = 'Expense Claim'; -``` - -Each row should look like this: - -| doctype | data | -|--------------| ----------------------| -| Expense Claim| { "name": "Claim 1" } | - -To get a full document, provide the name along with the type: - -``` -SELECT * -FROM my_frappe.documents -WHERE doctype = 'Expense Claim' AND name = 'Claim 1' -``` - -## Insert Documents - -To create a new document, insert it as a JSON string ([see creating documents](https://frappeframework.com/docs/v14/user/en/api/rest#create)) - -``` -INSERT INTO my_frappe.documents (doctype, data) -VALUES ('Expense Claim', '{ "posting_date": "2023-05-15", "company": "MindsDB", "amount": 100" }') -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/frappe_handler/__about__.py b/mindsdb/integrations/handlers/frappe_handler/__about__.py deleted file mode 100644 index d978c53b77e..00000000000 --- a/mindsdb/integrations/handlers/frappe_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Frappe handler' -__package_name__ = 'mindsdb_frappe_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for the Frappe API" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/frappe_handler/__init__.py b/mindsdb/integrations/handlers/frappe_handler/__init__.py deleted file mode 100644 index 024ee1a806c..00000000000 --- a/mindsdb/integrations/handlers/frappe_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .frappe_handler import FrappeHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Frappe' -name = 'frappe' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/frappe_handler/frappe_client.py b/mindsdb/integrations/handlers/frappe_handler/frappe_client.py deleted file mode 100644 index 07992f73d17..00000000000 --- a/mindsdb/integrations/handlers/frappe_handler/frappe_client.py +++ /dev/null @@ -1,111 +0,0 @@ -import json -import requests -from typing import Dict, List - - -class FrappeClient(object): - """Client to interact with the Frappe API. - - Attributes: - domain (str): Path to Frappe domain to use (e.g. https://mindsdbfrappe.com). - access_token (str): Frappe authorization token to use for all API requests. - """ - - def __init__( - self, - domain: str, - access_token: str): - self.domain = domain - self.base_url = f'{self.domain}/api' - self.access_token = access_token - - self.headers = { - 'Authorization': f'token {self.access_token}', - } - - def get_document(self, doctype: str, name: str) -> Dict: - """Gets a document matching the given doctype from Frappe. - - See https://frappeframework.com/docs/v14/user/en/api/rest#listing-documents - Args: - doctype (str): The document type to retrieve. - name (str): Name of the document. - """ - document_response = requests.get( - f'{self.base_url}/resource/{doctype}/{name}', - headers=self.headers) - if not document_response.ok: - document_response.raise_for_status() - return document_response.json()['data'] - - def get_documents(self, doctype: str, limit: int = None, fields: List[str] = None, filters: List[List] = None) -> List[Dict]: - """Gets all documents matching the given doctype from Frappe. - - See https://frappeframework.com/docs/v14/user/en/api/rest#listing-documents - Args: - doctype (str): The document type to retrieve. - limit (int): At most, how many messages to return. - filters (List[List]): List of filters in the form [field, operator, value] e.g. ["amount", ">", 50] - """ - params = { - 'fields': json.dumps(["*"]) - } - if limit is not None: - params['limit_page_length'] = limit - if filters is not None: - params['filters'] = json.dumps(filters) - if fields is not None: - params['fields'] = json.dumps(fields) - documents_response = requests.get( - f'{self.base_url}/resource/{doctype}/', - params=params, - headers=self.headers, - allow_redirects=False) - if documents_response.is_redirect: - # We have to manually redirect to preserve the 'Authorization' header. - # See https://github.com/request/request/pull/1184/commits/210b326fd8625f358e06c59dc11e74468b1de515. - redirect_url = documents_response.headers.get('location', None) - if redirect_url is None: - raise requests.HTTPError('Could not find redirect URL') - documents_response = requests.get( - redirect_url, - params=params, - headers=self.headers, - allow_redirects=False) - - if not documents_response.ok: - documents_response.raise_for_status() - return documents_response.json()['data'] - - def post_document( - self, - doctype: str, - data: Dict): - """Creates a new document of the given doctype. - See https://frappeframework.com/docs/v14/user/en/api/rest#listing-documents - - Args: - doctype (str): Type of the document to create. - data (Dict): Document object. - """ - post_response = requests.post( - f'{self.base_url}/resource/{doctype}', - json=data, - headers=self.headers) - if not post_response.ok: - if 400 <= post_response.status_code < 600: - raise requests.HTTPError(f'{post_response.reason}: {post_response.text}', response=post_response) - return post_response.json()['data'] - - def ping(self) -> bool: - """Sends a basic request to the Frappe API to see if it succeeds. - - Returns whether or not the connection to the Frappe API is valid. - See https://frappeframework.com/docs/v14/user/en/api/rest#1-token-based-authentication - """ - - # No ping or similar endpoint exists, so we'll try getting the logged in user. - user_response = requests.get( - f'{self.base_url}/method/frappe.auth.get_logged_user', - headers=self.headers) - return user_response.ok diff --git a/mindsdb/integrations/handlers/frappe_handler/frappe_handler.py b/mindsdb/integrations/handlers/frappe_handler/frappe_handler.py deleted file mode 100644 index c446be4fc28..00000000000 --- a/mindsdb/integrations/handlers/frappe_handler/frappe_handler.py +++ /dev/null @@ -1,210 +0,0 @@ -import json -import pandas as pd -import datetime as dt -from typing import Dict - -from mindsdb.integrations.handlers.frappe_handler.frappe_tables import FrappeDocumentsTable -from mindsdb.integrations.handlers.frappe_handler.frappe_client import FrappeClient -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, -) -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - -logger = log.getLogger(__name__) - - -class FrappeHandler(APIHandler): - """A class for handling connections and interactions with the Frappe API. - - Attributes: - client (FrappeClient): The `FrappeClient` object for interacting with the Frappe API. - is_connected (bool): Whether or not the API client is connected to Frappe. - domain (str): Frappe domain to send API requests to. - access_token (str): OAuth token to use for authentication. - """ - - def __init__(self, name: str = None, **kwargs): - """Registers all API tables and prepares the handler for an API connection. - - Args: - name: (str): The handler name to use - """ - super().__init__(name) - self.client = None - self.is_connected = False - - args = kwargs.get('connection_data', {}) - if 'access_token' not in args: - raise ValueError('"access_token" parameter required for authentication') - if 'domain' not in args: - raise ValueError('"domain" parameter required to connect to your Frappe instance') - self.access_token = args['access_token'] - self.domain = args['domain'] - - document_data = FrappeDocumentsTable(self) - self._register_table('documents', document_data) - self.connection_data = args - - def back_office_config(self): - tools = { - 'register_sales_invoice': 'have to be used by assistant to register a sales invoice. Input is JSON object serialized as a string. Due date have to be passed in format: "yyyy-mm-dd".', - 'check_company_exists': 'useful to check the company is exist. Input is company', - 'check_expense_type': 'useful to check the expense_type is exist. Input is expense_type', - 'check_customer': 'useful to check the customer is exist. Input is customer', - 'check_item_code': 'have to be used to check the item code. Input is item_code', - } - return { - 'tools': tools, - } - - def register_sales_invoice(self, data): - """ - input is: - { - "due_date": "2023-05-31", - "customer": "ksim", - "items": [ - { - "name": "T-shirt--", - "description": "T-shirt", - "quantity": 1 - } - ] - } - """ - invoice = json.loads(data) - date = dt.datetime.strptime(invoice['due_date'], '%Y-%m-%d') - if date <= dt.datetime.today(): - return 'Error: due_date have to be in the future' - - for item in invoice['items']: - # rename column - item['qty'] = item['quantity'] - del item['quantity'] - - # add required fields - item['uom'] = "Nos" - item['conversion_factor'] = 1 - - income_account = self.connection_data.get('income_account', "Sales Income - C8") - item['income_account'] = income_account - - try: - self.connect() - self.client.post_document('Sales Invoice', invoice) - except Exception as e: - return f"Error: {e}" - return "Success" - - def check_item_code(self, item_code): - self.connect() - result = self.client.get_documents('Item', filters=[['item_code', '=', item_code]]) - if len(result) == 1: - return True - return "Item doesn't exist: please use different name" - - def check_company_exists(self, name): - self.connect() - result = self.client.get_documents('Company', filters=[['name', '=', name]]) - if len(result) == 1: - return True - return "Company doesn't exist: please use different name" - - def check_expense_type(self, name): - self.connect() - result = self.client.get_documents('Expense Claim Type', filters=[['name', '=', name]]) - if len(result) == 1: - return True - return "Expense Claim Type doesn't exist: please use different name" - - def check_customer(self, name): - self.connect() - result = self.client.get_documents('Customer', filters=[['name', '=', name]]) - if len(result) == 1: - return True - return "Customer doesn't exist" - - def connect(self) -> FrappeClient: - """Creates a new API client if needed and sets it as the client to use for requests. - - Returns newly created Frappe API client, or current client if already set. - """ - if self.is_connected is True and self.client: - return self.client - - if self.domain and self.access_token: - self.client = FrappeClient(self.domain, self.access_token) - - self.is_connected = True - return self.client - - def check_connection(self) -> StatusResponse: - """Checks connection to Frappe API by sending a ping request. - - Returns StatusResponse indicating whether or not the handler is connected. - """ - - response = StatusResponse(False) - - try: - client = self.connect() - client.ping() - response.success = True - - except Exception as e: - logger.error(f'Error connecting to Frappe API: {e}!') - response.error_message = e - - self.is_connected = response.success - return response - - def native_query(self, query: str = None) -> Response: - ast = parse_sql(query) - return self.query(ast) - - def _document_to_dataframe_row(self, doctype, document: Dict) -> Dict: - return { - 'doctype': doctype, - 'data': json.dumps(document) - } - - def _get_document(self, params: Dict = None) -> pd.DataFrame: - client = self.connect() - doctype = params['doctype'] - document = client.get_document(doctype, params['name']) - return pd.DataFrame.from_records([self._document_to_dataframe_row(doctype, document)]) - - def _get_documents(self, params: Dict = None) -> pd.DataFrame: - client = self.connect() - doctype = params['doctype'] - limit = params.get('limit', None) - filters = params.get('filters', None) - fields = params.get('fields', None) - documents = client.get_documents(doctype, limit=limit, fields=fields, filters=filters) - return pd.DataFrame.from_records([self._document_to_dataframe_row(doctype, d) for d in documents]) - - def _create_document(self, params: Dict = None) -> pd.DataFrame: - client = self.connect() - doctype = params['doctype'] - new_document = client.post_document(doctype, json.loads(params['data'])) - return pd.DataFrame.from_records([self._document_to_dataframe_row(doctype, new_document)]) - - def call_frappe_api(self, method_name: str = None, params: Dict = None) -> pd.DataFrame: - """Calls the Frappe API method with the given params. - - Returns results as a pandas DataFrame. - - Args: - method_name (str): Method name to call (e.g. get_document) - params (Dict): Params to pass to the API call - """ - if method_name == 'get_documents': - return self._get_documents(params) - if method_name == 'get_document': - return self._get_document(params) - if method_name == 'create_document': - return self._create_document(params) - raise NotImplementedError('Method name {} not supported by Frappe API Handler'.format(method_name)) diff --git a/mindsdb/integrations/handlers/frappe_handler/frappe_tables.py b/mindsdb/integrations/handlers/frappe_handler/frappe_tables.py deleted file mode 100644 index e4100c3d3f0..00000000000 --- a/mindsdb/integrations/handlers/frappe_handler/frappe_tables.py +++ /dev/null @@ -1,82 +0,0 @@ -import pandas as pd -from typing import List - -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb_sql_parser import ast - - -class FrappeDocumentsTable(APITable): - - def select(self, query: ast.Select) -> pd.DataFrame: - """Selects data from the Frappe API and returns it as a pandas DataFrame. - - Returns dataframe representing the Frappe API results. - - Args: - query (ast.Select): Given SQL SELECT query - """ - conditions = extract_comparison_conditions(query.where) - - params = {} - filters = [] - for op, arg1, arg2 in conditions: - if arg1 == 'doctype': - if op != '=': - raise NotImplementedError - params['doctype'] = arg2 - elif arg1 == 'name': - params['name'] = arg2 - else: - filters.append([arg1, op, arg2]) - - if 'doctype' not in params: - raise ValueError('"doctype" parameter required') - - if query.limit: - params['limit'] = query.limit.value - if filters: - params['filters'] = filters - - if 'name' in params: - document_data = self.handler.call_frappe_api( - method_name='get_document', - params=params - ) - else: - document_data = self.handler.call_frappe_api( - method_name='get_documents', - params=params - ) - - # Only return the columns we need to. - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = document_data.columns - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(document_data) == 0: - return pd.DataFrame([], columns=columns) - - # Remove columns not part of select. - for col in set(document_data.columns).difference(set(columns)): - document_data = document_data.drop(col, axis=1) - - return document_data - - def insert(self, query: ast.Insert) -> pd.DataFrame: - columns = [col.name for col in query.columns] - - for row in query.values: - params = dict(zip(columns, row)) - - self.handler.call_frappe_api('create_document', params) - - def get_columns(self) -> List: - """Gets all columns to be returned in pandas DataFrame responses""" - return ['doctype', 'data'] diff --git a/mindsdb/integrations/handlers/frappe_handler/icon.svg b/mindsdb/integrations/handlers/frappe_handler/icon.svg deleted file mode 100644 index cae7eb8de07..00000000000 --- a/mindsdb/integrations/handlers/frappe_handler/icon.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/frappe_handler/requirements.txt b/mindsdb/integrations/handlers/frappe_handler/requirements.txt deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/gcs_handler/README.md b/mindsdb/integrations/handlers/gcs_handler/README.md deleted file mode 100644 index d6317b75f9d..00000000000 --- a/mindsdb/integrations/handlers/gcs_handler/README.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -title: Google Cloud Storage -sidebarTitle: Google Cloud Storage ---- - -This documentation describes the integration of MindsDB with [Google Cloud Storage](https://cloud.google.com/storage), an object storage service that offers industry-leading scalability, data availability, security, and performance. - -## Prerequisites - -1. Before proceeding, ensure that MindsDB is installed locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect BigQuery to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to your GCS bucket from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE gcs_datasource -WITH - engine = 'gcs', - parameters = { - "bucket": "", -- optional - "service_account_keys": "/tmp/keys.json" - }; -``` - -Required connection parameters include the following: - -- `service_account_keys`: The full path to the service account key file. -- `service_account_json`: The content of a JSON file defined by the `service_account_keys` parameter. - -Optional connection parameters include the following: - -* `bucket`: The name of the GCS bucket. If it is not set: all available buckets will be used (can slow down, getting list of files) - - - One of `service_account_keys` or `service_account_json` has to be provided to - establish a connection to GCS. If both are provided, `service_account_keys` will be considered. - - -## Usage - -Retrieve data from a specified object (file) in the GCS bucket by providing the integration name and the object key: - -```sql -SELECT * -FROM gcs_datasource.`my-file.csv`; -LIMIT 10; -``` - -Retrieve list of files (without filtering by extension): - -```sql -SELECT * -FROM gcs_datasource.files LIMIT 10 -``` - -Retrieve a list of files with their content (the content column needs to be requested explicitly): - -```sql -SELECT path, content -FROM gcs_datasource.files LIMIT 10 -``` - - -Wrap the object key in backticks (\`) to avoid any issues parsing the SQL statements provided. This is especially important when the object key contains spaces, special characters or prefixes, such as `my-folder/my-file.csv`. - -At the moment, the supported file formats are CSV, TSV, JSON, and Parquet. - - - -The above examples utilize `gcs_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the GCS bucket. -* **Checklist**: - 1. Make sure the GCS bucket exists. - 2. Confirm that provided service account credentials are correct. Try making a direct connection to the GCS bucket using the gcloud CLI. - 3. Ensure a stable network between MindsDB and GCP. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing object names containing spaces, special characters or prefixes. -* **Checklist**: - 1. Ensure object names with spaces, special characters or prefixes are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel/travel_data.csv - * Incorrect: SELECT * FROM integration.'travel/travel_data.csv' - * Correct: SELECT * FROM integration.\`travel/travel_data.csv\` - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/gcs_handler/__about__.py b/mindsdb/integrations/handlers/gcs_handler/__about__.py deleted file mode 100644 index ad2d3fdddcd..00000000000 --- a/mindsdb/integrations/handlers/gcs_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB GCS handler' -__package_name__ = 'mindsdb_gcs_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Google Cloud Storage" -__author__ = 'Abhilash K R' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/gcs_handler/__init__.py b/mindsdb/integrations/handlers/gcs_handler/__init__.py deleted file mode 100644 index 6cf01295f87..00000000000 --- a/mindsdb/integrations/handlers/gcs_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .gcs_handler import GcsHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Google Cloud Storage' -name = 'gcs' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/gcs_handler/connection_args.py b/mindsdb/integrations/handlers/gcs_handler/connection_args.py deleted file mode 100644 index 779d5e3974b..00000000000 --- a/mindsdb/integrations/handlers/gcs_handler/connection_args.py +++ /dev/null @@ -1,29 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - -connection_args = OrderedDict( - bucket={ - 'type': ARG_TYPE.STR, - 'description': 'The name of the GCS bucket.', - 'label': 'GCS Bucket' - }, - service_account_keys={ - 'type': ARG_TYPE.PATH, - 'description': 'Path to the service account JSON file', - 'label': 'Path to the service account JSON file', - 'secret': True - }, - service_account_json={ - 'type': ARG_TYPE.DICT, - 'description': 'Content of service account JSON file', - 'label': 'Content of service account JSON file', - 'secret': True - } -) - - -connection_args_example = OrderedDict( - bucket='my-bucket', - service_account_keys='/Users/sam/Downloads/svc.json' -) diff --git a/mindsdb/integrations/handlers/gcs_handler/gcs_handler.py b/mindsdb/integrations/handlers/gcs_handler/gcs_handler.py deleted file mode 100644 index 31bb0f02b77..00000000000 --- a/mindsdb/integrations/handlers/gcs_handler/gcs_handler.py +++ /dev/null @@ -1,375 +0,0 @@ -from contextlib import contextmanager - -import json -import duckdb -import pandas as pd -import fsspec -import google.auth -from google.cloud import storage -from typing import Text, Dict, Optional, List -from duckdb import DuckDBPyConnection - -from mindsdb.integrations.handlers.gcs_handler.gcs_tables import ( - ListFilesTable, - FileTable -) -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.ast import Select, Identifier, Insert, Star, Constant - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -from mindsdb.integrations.libs.api_handler import APIHandler - -logger = log.getLogger(__name__) - - -class GcsHandler(APIHandler): - """ - This handler handles connection and execution of the SQL statements on GCS. - """ - - name = 'gcs' - - supported_file_formats = ['csv', 'tsv', 'json', 'parquet'] - - def __init__(self, name: Text, connection_data: Optional[Dict], **kwargs): - """ - Initializes the handler. - - Args: - name (Text): The name of the handler instance. - connection_data (Dict): The connection data required to connect to the GCS account. - kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - self.is_select_query = False - self.service_account_json = None - self.connection = None - - if 'service_account_keys' not in self.connection_data and 'service_account_json' not in self.connection_data: - raise ValueError('service_account_keys or service_account_json parameter must be provided.') - - if 'service_account_json' in self.connection_data: - self.service_account_json = self.connection_data["service_account_json"] - - if 'service_account_keys' in self.connection_data: - with open(self.connection_data["service_account_keys"], "r") as f: - self.service_account_json = json.loads(f.read()) - - self.is_connected = False - - self._files_table = ListFilesTable(self) - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> DuckDBPyConnection: - """ - Establishes a connection to the GCS account via DuckDB. - - Raises: - KeyError: If the required connection parameters are not provided. - - Returns: - DuckDBPyConnection : A client object to the GCS account. - """ - if self.is_connected is True: - return self.connection - - # Connect to GCS and configure mandatory credentials. - self.connection = self._connect_storage_client() - self.is_connected = True - - return self.connection - - @contextmanager - def _connect_duckdb(self): - """ - Creates temporal duckdb database which is able to connect to the GCS account. - Have to be used as context manager - - Returns: - DuckDBPyConnection - """ - # Connect to GCS via DuckDB. - duckdb_conn = duckdb.connect(":memory:") - - # Configure mandatory credentials. - credentials, project_id = google.auth.load_credentials_from_dict(self.service_account_json) - gcs = fsspec.filesystem("gcs", project=project_id, credentials=credentials) - duckdb_conn = duckdb.connect() - duckdb_conn.register_filesystem(gcs) - - try: - yield duckdb_conn - finally: - duckdb_conn.close() - - def _connect_storage_client(self) -> storage.Client: - """ - Establishes a connection to the GCS account via google-cloud-storage. - - Returns: - storage.Client: A client object to the GCS account. - """ - return storage.Client.from_service_account_info(self.service_account_json) - - def disconnect(self): - """ - Closes the connection to the GCP account if it's currently open. - """ - if not self.is_connected: - return - self.connection.close() - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the GCS bucket. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - # Check connection via storage client. - try: - storage_client = self._connect_storage_client() - if 'bucket' in self.connection_data: - storage_client.get_bucket(self.connection_data['bucket']) - else: - storage_client.list_buckets() - response.success = True - storage_client.close() - except Exception as e: - logger.error(f'Error connecting to GCS with the given credentials, {e}!') - response.error_message = str(e) - - if response.success and need_to_close: - self.disconnect() - - elif not response.success and self.is_connected: - self.is_connected = False - - return response - - def _get_bucket(self, key): - if 'bucket' in self.connection_data: - return self.connection_data['bucket'], key - - # get bucket from first part of the key - ar = key.split('/') - return ar[0], '/'.join(ar[1:]) - - def read_as_table(self, key) -> pd.DataFrame: - """ - Read object as dataframe. Uses duckdb - """ - - bucket, key = self._get_bucket(key) - - with self._connect_duckdb() as connection: - - cursor = connection.execute(f"SELECT * FROM 'gs://{bucket}/{key}'") - - return cursor.fetchdf() - - def _read_as_content(self, key) -> None: - """ - Read object as content - """ - bucket, key = self._get_bucket(key) - - client = self.connect() - - bucket = client.bucket(bucket) - blob = bucket.blob(key) - return blob.download_as_string() - - def add_data_to_table(self, key, df) -> None: - """ - Writes the table to a file in the gcs bucket. - - Raises: - CatalogException: If the table does not exist in the DuckDB connection. - """ - - # Check if the file exists in the gcs bucket. - bucket, key = self._get_bucket(key) - - storage_client = self._connect_storage_client() - bucketObj = storage_client.bucket(bucket) - stats = storage.Blob(bucket=bucketObj, name=key).exists(storage_client) - storage_client.close() - if not stats: - raise Exception(f'Error querying the file {key} in the bucket {bucket}!') - - with self._connect_duckdb() as connection: - # copy - connection.execute(f"CREATE TABLE tmp_table AS SELECT * FROM 'gs://{bucket}/{key}'") - - # insert - connection.execute("INSERT INTO tmp_table BY NAME SELECT * FROM df") - - # upload - connection.execute(f"COPY tmp_table TO 'gs://{bucket}/{key}'") - - def query(self, query: ASTNode) -> Response: - """ - Executes a SQL query represented by an ASTNode and retrieves the data. - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Raises: - ValueError: If the file format is not supported or the file does not exist in the GCS bucket. - - Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. - """ - - self.connect() - - if isinstance(query, Select): - table_name = query.from_table.parts[-1] - - if table_name == 'files': - table = self._files_table - df = table.select(query) - - # add content - has_content = False - for target in query.targets: - if isinstance(target, Identifier) and target.parts[-1].lower() == 'content': - has_content = True - break - if has_content: - df['content'] = df['path'].apply(self._read_as_content) - else: - extension = table_name.split('.')[-1] - if extension not in self.supported_file_formats: - logger.error(f'The file format {extension} is not supported!') - raise ValueError(f'The file format {extension} is not supported!') - - table = FileTable(self, table_name=table_name) - df = table.select(query) - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=df - ) - elif isinstance(query, Insert): - table_name = query.table.parts[-1] - table = FileTable(self, table_name=table_name) - table.insert(query) - response = Response(RESPONSE_TYPE.OK) - else: - raise NotImplementedError - - return response - - def get_objects(self, limit=None, buckets=None) -> List[dict]: - storage_client = self._connect_storage_client() - if "bucket" in self.connection_data: - add_bucket_to_name = False - scan_buckets = [self.connection_data["bucket"]] - else: - add_bucket_to_name = True - scan_buckets = [b.name for b in storage_client.list_buckets()] - - objects = [] - for bucket in scan_buckets: - if buckets is not None and bucket not in buckets: - continue - - blobs = storage_client.list_blobs(bucket) - if not blobs: - continue - - for blob in blobs: - if blob.storage_class != 'STANDARD': - continue - - obj = {} - obj['Bucket'] = bucket - if add_bucket_to_name: - # bucket is part of the name - obj['Key'] = f'{bucket}/{blob.name}' - objects.append(obj) - if limit is not None and len(objects) >= limit: - break - - return objects - - def get_tables(self) -> Response: - """ - Retrieves a list of tables (objects) in the gcs bucket. - - Each object is considered a table. Only the supported file formats are considered as tables. - - Returns: - Response: A response object containing the list of tables and views, formatted as per the `Response` class. - """ - - # Get only the supported file formats. - # Wrap the object names with backticks to prevent SQL syntax errors. - supported_names = [ - f"`{obj['Key']}`" - for obj in self.get_objects() - if obj['Key'].split('.')[-1] in self.supported_file_formats - ] - - # virtual table with list of files - supported_names.insert(0, 'files') - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - supported_names, - columns=['table_name'] - ) - ) - - return response - - def get_columns(self, table_name: str) -> Response: - """ - Retrieves column details for a specified table (object) in the gcs bucket. - - Args: - table_name (Text): The name of the table for which to retrieve column information. - - Raises: - ValueError: If the 'table_name' is not a valid string. - - Returns: - Response: A response object containing the column details, formatted as per the `Response` class. - """ - query = Select( - targets=[Star()], - from_table=Identifier(parts=[table_name]), - limit=Constant(1) - ) - - result = self.query(query) - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - { - 'column_name': result.data_frame.columns, - 'data_type': [data_type if data_type != 'object' else 'string' for data_type in result.data_frame.dtypes] - } - ) - ) - - return response diff --git a/mindsdb/integrations/handlers/gcs_handler/gcs_tables.py b/mindsdb/integrations/handlers/gcs_handler/gcs_tables.py deleted file mode 100644 index ce74f98b4aa..00000000000 --- a/mindsdb/integrations/handlers/gcs_handler/gcs_tables.py +++ /dev/null @@ -1,50 +0,0 @@ -from mindsdb.integrations.libs.api_handler import APIResource -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator -import pandas as pd -from typing import List - - -class ListFilesTable(APIResource): - - def list(self, - targets: List[str] = None, - conditions: List[FilterCondition] = None, - limit: int = None, - *args, **kwargs) -> pd.DataFrame: - - buckets = None - for condition in conditions: - if condition.column == 'bucket': - if condition.op == FilterOperator.IN: - buckets = condition.value - elif condition.op == FilterOperator.EQUAL: - buckets = [condition.value] - condition.applied = True - - data = [] - for obj in self.handler.get_objects(limit=limit, buckets=buckets): - path = obj['Key'] - path = path.replace('`', '') - item = { - 'path': path, - 'bucket': obj['Bucket'], - 'name': path[path.rfind('/') + 1:], - 'extension': path[path.rfind('.') + 1:] - } - - data.append(item) - - return pd.DataFrame(data=data, columns=self.get_columns()) - - def get_columns(self) -> List[str]: - return ["path", "name", "extension", "bucket", "content"] - - -class FileTable(APIResource): - - def list(self, targets: List[str] = None, table_name=None, *args, **kwargs) -> pd.DataFrame: - return self.handler.read_as_table(table_name) - - def add(self, data, table_name=None): - df = pd.DataFrame(data) - return self.handler.add_data_to_table(table_name, df) diff --git a/mindsdb/integrations/handlers/gcs_handler/icon.svg b/mindsdb/integrations/handlers/gcs_handler/icon.svg deleted file mode 100644 index d30e0030858..00000000000 --- a/mindsdb/integrations/handlers/gcs_handler/icon.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/gcs_handler/requirements.txt b/mindsdb/integrations/handlers/gcs_handler/requirements.txt deleted file mode 100644 index a950cb7780e..00000000000 --- a/mindsdb/integrations/handlers/gcs_handler/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -fsspec -gcsfs -google-auth -google-cloud-storage -aiohttp>=3.13.3 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/mindsdb/integrations/handlers/gitlab_handler/README.md b/mindsdb/integrations/handlers/gitlab_handler/README.md deleted file mode 100644 index d8ab4df33d3..00000000000 --- a/mindsdb/integrations/handlers/gitlab_handler/README.md +++ /dev/null @@ -1,72 +0,0 @@ -# GitLab Handler - -This is the implementation of the GitLab handler for MindsDB. This interface support to connect to GitLab API and pull data into MindsDB - -## GitLab Handler Implementation - -This handler was implemented using the [python-gitlab](https://github.com/python-gitlab/python-gitlab) library. -python-gitlab is a Python library that wraps GitLab API. - -## GitLab Handler Initialization - -The GitLab handler is initialized with the following parameters: - -- `repository`: a required name of a GitLab repository to connect to -- `api_key`: an optional GitLab API key to use for authentication -- `url`: an optional GitLab server URL (defaults to https://gitlab.com) - -## Implemented Features - - [x] GitLab Issues Table for a given Repository - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection -- [x] GitLab Merge Requests Table for a given Repository - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - - -## Usage -In order to make use of this handler and connect to a gitlab api in MindsDB, the following syntax can be used, - -~~~~sql -CREATE DATABASE mindsdb_gitlab -WITH ENGINE = 'gitlab', -PARAMETERS = { - "repository": "gitlab-org/gitlab", - "api_key": "api_key", -- optional GitLab API key - "url": "https://gitlab.com" -- optional GitLab server URL (defaults to https://gitlab.com) -}; -~~~~ - -Now, you can use this established connection to query your table as follows, -~~~~sql -SELECT * FROM mindsdb_gitlab.issues; -~~~~ - -~~~~sql -SELECT number, state, creator, assignee, title, created, labels - FROM mindsdb_gitlab.issues - WHERE state="opened" - ORDER BY created ASC, creator DESC - LIMIT 10; -~~~~ - -~~~~sql -SELECT number, state, creator, reviewers, title, created, has_conflicts - FROM mindsdb_gitlab.merge_requests - WHERE state="merged" - ORDER BY created ASC, creator DESC - LIMIT 10; -~~~~ - -## What is next?? - -Add support for: - -- GitLab Branches, Releases, Branches tables for a given Repository diff --git a/mindsdb/integrations/handlers/gitlab_handler/__about__.py b/mindsdb/integrations/handlers/gitlab_handler/__about__.py deleted file mode 100644 index a97e0597a87..00000000000 --- a/mindsdb/integrations/handlers/gitlab_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB GitLab handler" -__package_name__ = "mindsdb_gitlab_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for GitLab" -__author__ = "Senali Dilumika" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/gitlab_handler/__init__.py b/mindsdb/integrations/handlers/gitlab_handler/__init__.py deleted file mode 100644 index e8c91ba8c20..00000000000 --- a/mindsdb/integrations/handlers/gitlab_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .gitlab_handler import GitlabHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "GitLab" -name = "gitlab" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", "version", "name", "type", "title", "description", - "import_error", "icon_path", -] diff --git a/mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py b/mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py deleted file mode 100644 index cae572732d8..00000000000 --- a/mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +++ /dev/null @@ -1,88 +0,0 @@ -import gitlab - -from mindsdb.integrations.handlers.gitlab_handler.gitlab_tables import GitlabIssuesTable, GitlabMergeRequestsTable -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) - -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - -logger = log.getLogger(__name__) - - -class GitlabHandler(APIHandler): - """The GitLab handler implementation""" - - def __init__(self, name: str, **kwargs): - """constructor - Args: - name (str): the handler name - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.repository = connection_data["repository"] - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - gitlab_issues_data = GitlabIssuesTable(self) - gitlab_merge_requests_data = GitlabMergeRequestsTable(self) - self._register_table("issues", gitlab_issues_data) - self._register_table("merge_requests", gitlab_merge_requests_data) - - def connect(self) -> StatusResponse: - """Set up the connections required by the handler - Returns: - HandlerStatusResponse - """ - - connection_kwargs = {} - - if self.connection_data.get("url", None): - connection_kwargs["url"] = self.connection_data["url"] - - if self.connection_data.get("api_key", None): - connection_kwargs["private_token"] = self.connection_data["api_key"] - - self.connection = gitlab.Gitlab(**connection_kwargs) - self.is_connected = True - - return self.connection - - def check_connection(self) -> StatusResponse: - """Check connection to the handler - Returns: - HandlerStatusResponse - """ - response = StatusResponse(False) - - try: - self.connect() - if self.connection_data.get("api_key", None): - logger.info("Authenticated as user") - else: - logger.info("Proceeding without an API key") - - response.success = True - except Exception as e: - logger.error(f"Error connecting to GitLab API: {e}!") - response.error_message = e - - self.is_connected = response.success - - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process raw query. - Args: - query (str): query in a native format - Returns: - HandlerResponse - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/gitlab_handler/gitlab_tables.py b/mindsdb/integrations/handlers/gitlab_handler/gitlab_tables.py deleted file mode 100644 index 7f38c831c0f..00000000000 --- a/mindsdb/integrations/handlers/gitlab_handler/gitlab_tables.py +++ /dev/null @@ -1,390 +0,0 @@ -import pandas as pd - -from typing import List - -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb.utilities import log - -from mindsdb_sql_parser import ast - -logger = log.getLogger(__name__) - - -class GitlabIssuesTable(APITable): - """The GitLab Issue Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the GitLab "List repository issues" API - Args: - query: SELECT - Returns: - DataFrame - Raises: - ValueError - """ - - conditions = extract_comparison_conditions(query.where) - - if query.limit: - total_results = query.limit.value - else: - total_results = 20 - - issues_kwargs = {} - order_by_conditions = {} - - if query.order_by and len(query.order_by) > 0: - order_by_conditions["columns"] = [] - order_by_conditions["ascending"] = [] - - for an_order in query.order_by: - if an_order.field.parts[1] in self.get_columns(): - order_by_conditions["columns"].append(an_order.field.parts[1]) - - if an_order.direction == "ASC": - order_by_conditions["ascending"].append(True) - else: - order_by_conditions["ascending"].append(False) - else: - raise ValueError( - f"Order by unknown column {an_order.field.parts[1]}" - ) - - for a_where in conditions: - if a_where[1] == "state": - if a_where[0] != "=": - raise ValueError("Unsupported where operation for state") - if a_where[2] not in ["opened", "closed", "all"]: - raise ValueError( - f"Unsupported where argument for state {a_where[2]}" - ) - - issues_kwargs["state"] = a_where[2] - - continue - if a_where[1] == "labels": - if a_where[0] != "=": - raise ValueError("Unsupported where operation for state") - - issues_kwargs["labels"] = a_where[2].split(",") - - continue - if a_where[1] in ["assignee", "creator"]: - if a_where[0] != "=": - raise ValueError(f"Unsupported where operation for {a_where[1]}") - - issues_kwargs[a_where[1]] = a_where[2] - else: - raise ValueError(f"Unsupported where argument {a_where[1]}") - - self.handler.connect() - - gitlab_issues_df = pd.DataFrame(columns=self.get_columns()) - - issues_kwargs["per_page"] = total_results - issues_kwargs["get_all"] = False - while True: - try: - for issue in self.handler.connection.projects.get( - self.handler.repository - ).issues.list(**issues_kwargs): - - logger.debug(f"Processing issue {issue.iid}") - - gitlab_issues_df = pd.concat( - [ - gitlab_issues_df, - pd.DataFrame( - [ - { - "number": issue.iid, - "title": issue.title, - "state": issue.state, - "creator": issue.author["name"], - "closed_by": issue.closed_by - if issue.closed_by - else None, - "labels": ",".join( - [label for label in issue.labels] - ), - "assignees": ",".join( - [ - assignee["name"] - for assignee in issue.assignees - ] - ), - "body": issue.description, - "created": issue.created_at, - "updated": issue.updated_at, - "closed": issue.closed_at, - } - ] - ), - ] - ) - - if gitlab_issues_df.shape[0] >= total_results: - break - except IndexError: - break - - if gitlab_issues_df.shape[0] >= total_results: - break - else: - break - - selected_columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - if len(gitlab_issues_df) == 0: - gitlab_issues_df = pd.DataFrame([], columns=selected_columns) - else: - gitlab_issues_df.columns = self.get_columns() - for col in set(gitlab_issues_df.columns).difference(set(selected_columns)): - gitlab_issues_df = gitlab_issues_df.drop(col, axis=1) - - if len(order_by_conditions.get("columns", [])) > 0: - gitlab_issues_df = gitlab_issues_df.sort_values( - by=order_by_conditions["columns"], - ascending=order_by_conditions["ascending"], - ) - - return gitlab_issues_df - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - Returns - ------- - List[str]: list of columns - """ - - return [ - "number", - "title", - "state", - "creator", - "closed_by", - "labels", - "assignees", - "body", - "created", - "updated", - "closed", - ] - - -class GitlabMergeRequestsTable(APITable): - """The GitLab Merge Requests Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the GitLab "List repository rerge requests" API - Args: - query: SELECT - Returns: - DataFrame - Raises: - ValueError - """ - - conditions = extract_comparison_conditions(query.where) - - if query.limit: - total_results = query.limit.value - else: - total_results = 20 - - merge_requests_kwargs = {} - order_by_conditions = {} - - if query.order_by and len(query.order_by) > 0: - order_by_conditions["columns"] = [] - order_by_conditions["ascending"] = [] - - for an_order in query.order_by: - if an_order.field.parts[1] in self.get_columns(): - order_by_conditions["columns"].append(an_order.field.parts[1]) - - if an_order.direction == "ASC": - order_by_conditions["ascending"].append(True) - else: - order_by_conditions["ascending"].append(False) - else: - raise ValueError( - f"Order by unknown column {an_order.field.parts[1]}" - ) - - for a_where in conditions: - if a_where[1] == "state": - if a_where[0] != "=": - raise ValueError("Unsupported where operation for state") - if a_where[2] not in ["opened", "closed", "merged", "all"]: - raise ValueError( - f"Unsupported where argument for state {a_where[2]}" - ) - - merge_requests_kwargs["state"] = a_where[2] - - continue - if a_where[1] == "labels": - if a_where[0] != "=": - raise ValueError("Unsupported where operation for labels") - - merge_requests_kwargs["labels"] = a_where[2].split(",") - - continue - if a_where[1] in ["target_branch", "source_branch"]: - if a_where[0] != "=": - raise ValueError(f"Unsupported where operation for {a_where[1]}") - - merge_requests_kwargs[a_where[1]] = a_where[2] - else: - raise ValueError(f"Unsupported where argument {a_where[1]}") - - self.handler.connect() - - gitlab_merge_requests_df = pd.DataFrame(columns=self.get_columns()) - - merge_requests_kwargs["per_page"] = total_results - merge_requests_kwargs["get_all"] = False - while True: - try: - for merge_request in self.handler.connection.projects.get( - self.handler.repository - ).mergerequests.list(**merge_requests_kwargs): - - logger.debug(f"Processing merge request {merge_request.iid}") - - gitlab_merge_requests_df = pd.concat( - [ - gitlab_merge_requests_df, - pd.DataFrame( - [ - { - "number": merge_request.iid, - "title": merge_request.title, - "state": merge_request.state, - "creator": merge_request.author["name"], - "closed_by": merge_request.closed_by - if merge_request.closed_by - else None, - "mergeed_by": merge_request.merge_user["name"] - if merge_request.merge_user - else None, - "labels": ",".join( - [label for label in merge_request.labels] - ), - "assignees": ",".join( - [ - assignee["name"] - for assignee in merge_request.assignees - ] - ), - "reviewers": ",".join( - [ - reviewer["name"] - for reviewer in merge_request.reviewers - ] - ), - "body": merge_request.description, - "target_branch": merge_request.target_branch, - "source_branch": merge_request.source_branch, - "upvotes": merge_request.upvotes, - "downvotes": merge_request.downvotes, - "draft": merge_request.draft, - "work_in_progress": merge_request.work_in_progress, - "milestone": merge_request.milestone["state"] - if merge_request.milestone - else None, - "merge_status": merge_request.merge_status, - "detailed_merge_status": merge_request.detailed_merge_status, - "user_notes_count": merge_request.user_notes_count, - "has_conflicts": merge_request.has_conflicts, - "blocking_discussions_resolved": merge_request.blocking_discussions_resolved, - "created": merge_request.created_at, - "updated": merge_request.updated_at, - "closed": merge_request.closed_at, - "merged": merge_request.merged_at, - } - ] - ), - ] - ) - - if gitlab_merge_requests_df.shape[0] >= total_results: - break - except IndexError: - break - - if gitlab_merge_requests_df.shape[0] >= total_results: - break - else: - break - - selected_columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - if len(gitlab_merge_requests_df) == 0: - gitlab_merge_requests_df = pd.DataFrame([], columns=selected_columns) - else: - gitlab_merge_requests_df.columns = self.get_columns() - for col in set(gitlab_merge_requests_df.columns).difference(set(selected_columns)): - gitlab_merge_requests_df = gitlab_merge_requests_df.drop(col, axis=1) - - if len(order_by_conditions.get("columns", [])) > 0: - gitlab_merge_requests_df = gitlab_merge_requests_df.sort_values( - by=order_by_conditions["columns"], - ascending=order_by_conditions["ascending"], - ) - - return gitlab_merge_requests_df - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - Returns - ------- - List[str]: list of columns - """ - - return [ - "number", - "title", - "state", - "creator", - "closed_by", - "mergeed_by", - "labels", - "assignees", - "reviewers", - "body", - "target_branch", - "source_branch", - "upvotes", - "downvotes", - "draft", - "work_in_progress", - "milestone", - "merge_status", - "detailed_merge_status", - "user_notes_count", - "has_conflicts", - "blocking_discussions_resolved", - "created", - "updated", - "closed", - "merged", - ] diff --git a/mindsdb/integrations/handlers/gitlab_handler/icon.svg b/mindsdb/integrations/handlers/gitlab_handler/icon.svg deleted file mode 100644 index 840a59981c0..00000000000 --- a/mindsdb/integrations/handlers/gitlab_handler/icon.svg +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/gitlab_handler/requirements.txt b/mindsdb/integrations/handlers/gitlab_handler/requirements.txt deleted file mode 100644 index 67de9d9568d..00000000000 --- a/mindsdb/integrations/handlers/gitlab_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -python-gitlab \ No newline at end of file diff --git a/mindsdb/integrations/handlers/gong_handler/README.md b/mindsdb/integrations/handlers/gong_handler/README.md deleted file mode 100644 index b116f1f77a9..00000000000 --- a/mindsdb/integrations/handlers/gong_handler/README.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -title: Gong -sidebarTitle: Gong ---- - -This documentation describes the integration of MindsDB with [Gong](https://www.gong.io/), a conversation intelligence platform that captures, analyzes, and provides insights from customer conversations. -The integration allows MindsDB to access call recordings, transcripts, analytics, and other conversation data from Gong and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect Gong to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). -3. Obtain a Gong API key from your [Gong API settings page](https://app.gong.io/settings/api-keys). - -## Connection - -Establish a connection to Gong from MindsDB by executing the following SQL command and providing its handler name as an engine. - -### Using Bearer Token (Recommended) - -```sql -CREATE DATABASE gong_datasource -WITH - ENGINE = 'gong', - PARAMETERS = { - "api_key": "your_gong_api_key_here" - }; -``` - -### Using Basic Authentication - -```sql -CREATE DATABASE gong_datasource -WITH - ENGINE = 'gong', - PARAMETERS = { - "access_key": "your_access_key", - "secret_key": "your_secret_key" - }; -``` - -Required connection parameters include the following: - -**Authentication (choose one method):** - -* `api_key`: Bearer token for authentication (recommended) -* `access_key` + `secret_key`: Basic authentication credentials (alternative method) - -Optional connection parameters include the following: - -* `base_url`: Gong API base URL. This parameter defaults to `https://api.gong.io`. -* `timeout`: Request timeout in seconds. This parameter defaults to `30`. - - -If both authentication methods are provided, basic auth (`access_key` + `secret_key`) takes precedence. - - -## Usage - -The following usage examples utilize `gong_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - -### Available Tables - -The Gong handler provides access to the following tables: - -* `calls` - Access call recordings and metadata -* `users` - Get user information and permissions -* `analytics` - Access AI-generated conversation insights -* `transcripts` - Get full conversation transcripts - -### Basic Queries - -Retrieve recent calls with date filters (recommended for best performance): - -```sql -SELECT * -FROM gong_datasource.calls -WHERE date >= '2024-01-01' AND date < '2024-02-01' -ORDER BY date DESC -LIMIT 20; -``` - -Get all users in your organization: - -```sql -SELECT user_id, name, email, role, status -FROM gong_datasource.users -LIMIT 100; -``` - -Get analytics for calls with high sentiment scores: - -```sql -SELECT call_id, sentiment_score, key_phrases, topics -FROM gong_datasource.analytics -WHERE sentiment_score > 0.7 - AND date >= '2024-01-01' -LIMIT 50; -``` - -Get transcripts for a specific call: - -```sql -SELECT speaker, timestamp, text -FROM gong_datasource.transcripts -WHERE call_id = '12345' -ORDER BY timestamp; -``` - -### Advanced Queries with JOINs - -Get calls with their sentiment analysis: - -```sql -SELECT - c.title, - c.date, - c.duration, - a.sentiment_score, - a.key_phrases -FROM gong_datasource.calls c -JOIN gong_datasource.analytics a ON c.call_id = a.call_id -WHERE c.date >= '2024-01-01' AND c.date < '2024-02-01' -ORDER BY a.sentiment_score DESC -LIMIT 25; -``` - -Find calls where specific keywords were mentioned: - -```sql -SELECT - c.title, - c.date, - t.speaker, - t.text -FROM gong_datasource.calls c -JOIN gong_datasource.transcripts t ON c.call_id = t.call_id -WHERE c.date >= '2024-01-01' - AND t.text LIKE '%pricing%' -LIMIT 50; -``` - -Get user performance with call sentiment: - -```sql -SELECT - u.name, - u.email, - c.call_id, - c.title, - a.sentiment_score -FROM gong_datasource.users u -JOIN gong_datasource.calls c ON u.user_id = c.user_id -JOIN gong_datasource.analytics a ON c.call_id = a.call_id -WHERE c.date >= '2024-01-01' - AND a.sentiment_score > 0.8 -LIMIT 100; -``` - -### Querying the Data Catalog - -The handler supports `INFORMATION_SCHEMA` queries for AI agents to discover available tables and columns: - -```sql --- Get all available tables -SELECT * -FROM INFORMATION_SCHEMA.TABLES -WHERE TABLE_SCHEMA = 'gong_datasource'; - --- Get columns for a specific table -SELECT * -FROM INFORMATION_SCHEMA.COLUMNS -WHERE TABLE_SCHEMA = 'gong_datasource' - AND TABLE_NAME = 'calls'; - --- Get foreign key relationships -SELECT * -FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE -WHERE TABLE_SCHEMA = 'gong_datasource' - AND CONSTRAINT_NAME LIKE 'fk_%'; -``` - - -The above examples utilize `gong_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Data Schema - -### calls Table - -| Column | Description | -|--------|-------------| -| `call_id` | Unique identifier for the call (Primary Key) | -| `title` | Call title or description | -| `date` | Call date and time (ISO-8601 format) | -| `duration` | Call duration in seconds | -| `recording_url` | URL to the call recording | -| `call_type` | Type of call (e.g., "sales", "demo") | -| `user_id` | ID of the user who made the call | -| `participants` | Comma-separated list of participants | -| `status` | Call status | - -### users Table - -| Column | Description | -|--------|-------------| -| `user_id` | Unique identifier for the user (Primary Key) | -| `name` | User's full name | -| `email` | User's email address | -| `role` | User's role in the organization | -| `permissions` | Comma-separated list of user permissions | -| `status` | User status | - -### analytics Table - -| Column | Description | -|--------|-------------| -| `call_id` | Reference to the call (Primary Key, Foreign Key to calls.call_id) | -| `sentiment_score` | Sentiment analysis score | -| `topic_score` | Topic detection score | -| `key_phrases` | Comma-separated list of key phrases | -| `topics` | Comma-separated list of detected topics | -| `emotions` | Comma-separated list of detected emotions | -| `confidence_score` | Confidence score for the analysis | - -### transcripts Table - -| Column | Description | -|--------|-------------| -| `segment_id` | Unique identifier for the transcript segment (Primary Key) | -| `call_id` | Reference to the call (Foreign Key to calls.call_id) | -| `speaker` | Name of the speaker | -| `timestamp` | Timestamp of the transcript segment (ISO-8601 format) | -| `text` | Transcribed text | -| `confidence` | Confidence score for the transcription | - -## Troubleshooting - - -`Authentication Error` - -* **Symptoms**: Failure to connect MindsDB with Gong. -* **Checklist**: - 1. Verify that your Gong API key is valid and not expired. - 2. Ensure you have the necessary permissions in Gong to access the API. - 3. Check that your API key has access to the specific data you're querying. - 4. If using basic authentication, verify both `access_key` and `secret_key` are correct. - - - -`Empty Results or Missing Data` - -* **Symptoms**: Queries return no results or incomplete data. -* **Checklist**: - 1. Verify that date filters are included in your query (required for calls, analytics, transcripts). - 2. Check that the date range includes data (analytics and transcripts have ~1 hour lag). - 3. Ensure call_id exists when querying transcripts for a specific call. - 4. Verify that your Gong account has data for the requested time period. - - - -`Slow Query Performance` - -* **Symptoms**: Queries take a long time to execute. -* **Checklist**: - 1. Add date filters to limit the data range (essential for large datasets). - 2. Use LIMIT to restrict the number of results. - 3. Filter by call_id when querying transcripts. - 4. Avoid querying transcripts without filters (can return thousands of rows per call). - diff --git a/mindsdb/integrations/handlers/gong_handler/__about__.py b/mindsdb/integrations/handlers/gong_handler/__about__.py deleted file mode 100644 index 8fe4039f271..00000000000 --- a/mindsdb/integrations/handlers/gong_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Gong handler" -__package_name__ = "mindsdb_gong_handler" -__version__ = "0.0.1" -__description__ = "Gong conversation intelligence platform handler for MindsDB" -__author__ = "MindsDB Inc" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023- mindsdb" diff --git a/mindsdb/integrations/handlers/gong_handler/__init__.py b/mindsdb/integrations/handlers/gong_handler/__init__.py deleted file mode 100644 index 529250c94c7..00000000000 --- a/mindsdb/integrations/handlers/gong_handler/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description - -try: - from .gong_handler import GongHandler as Handler - from .connection_args import connection_args, connection_args_example - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Gong" -name = "gong" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY - -__all__ = [ - "Handler", - "version", - "name", - "type", - "support_level", - "title", - "description", - "import_error", - "icon_path", - "connection_args_example", - "connection_args", -] diff --git a/mindsdb/integrations/handlers/gong_handler/connection_args.py b/mindsdb/integrations/handlers/gong_handler/connection_args.py deleted file mode 100644 index 58d73c8d166..00000000000 --- a/mindsdb/integrations/handlers/gong_handler/connection_args.py +++ /dev/null @@ -1,37 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - api_key={ - "type": ARG_TYPE.PWD, - "description": "Gong API key for authentication.", - "secret": True, - "required": False, - "label": "API Key", - }, - # Basic Authentication with Access Key + Secret Key (Option 2) - access_key={ - "type": ARG_TYPE.STR, - "description": "Gong Access Key for basic authentication (if not using OAuth).", - "secret": True, - "required": False, - "label": "Access Key", - }, - secret_key={ - "type": ARG_TYPE.PWD, - "description": "Gong Secret Key for basic authentication (if not using OAuth).", - "secret": True, - "required": False, - "label": "Secret Key", - }, - base_url={ - "type": ARG_TYPE.STR, - "description": "Gong API base URL (optional, defaults to production).", - "required": False, - "label": "Base URL", - }, -) - -connection_args_example = OrderedDict(api_key="your_gong_api_key_here", base_url="https://api.gong.io") diff --git a/mindsdb/integrations/handlers/gong_handler/constants.py b/mindsdb/integrations/handlers/gong_handler/constants.py deleted file mode 100644 index aa70d74f490..00000000000 --- a/mindsdb/integrations/handlers/gong_handler/constants.py +++ /dev/null @@ -1,150 +0,0 @@ -""" -Constants and metadata for the Gong handler. -""" - - -def get_gong_api_info(handler_name: str) -> str: - return f""" - # Gong Handler Usage Guide: {handler_name} - - ## CRITICAL Performance Requirements - - **ALWAYS use date filters on analytics and (ideally) on calls/transcripts** - - Analytics defaults to last 7 days if no date filters are provided - - Calls/transcripts can be very large; add WHERE date ... or call_id filters - - Example: WHERE c.date >= '2024-01-01' AND c.date < '2024-01-31' - - **ALWAYS use LIMIT with transcripts** - - Transcripts can contain thousands of rows per call - - Example: LIMIT 100 - - **Query by call_id when fetching transcripts** - - First get call IDs from `calls` (with date filters), then fetch transcripts for those IDs - - Avoids massive data transfers - - ## Query Strategy by User Intent - - **"calls" / "meetings"** β†’ query `{handler_name}.calls` - - **"sentiment" / "topics" / "what was discussed"** β†’ query `{handler_name}.analytics` JOIN `{handler_name}.calls` - - Analytics contains AI-generated insights about call content - - **"what did X say" / keyword search in speech** β†’ query `{handler_name}.transcripts` JOIN `{handler_name}.calls` - - Transcripts contain exact words spoken - - Always filter by call_id and/or a narrow date range - - **"sales reps" / "users" / "team"** β†’ query `{handler_name}.users` - - Independent table, no JOIN needed - - ## Efficient JOIN Pattern - - Start with the smallest, filtered dataset first: - ```sql - SELECT c.title, a.sentiment_score - FROM {handler_name}.calls c - JOIN {handler_name}.analytics a ON c.call_id = a.call_id - WHERE c.date >= '2024-01-01' AND c.date < '2024-02-01' - LIMIT 50; - ``` - """ - - -GONG_TABLES_METADATA = { - "calls": { - "name": "calls", - "type": "api_table", - "description": "Call records from Gong with basic metadata including date, duration, participants, and status", - "columns": [ - {"name": "call_id", "type": "str", "description": "Unique identifier for the call"}, - {"name": "title", "type": "str", "description": "Call title or subject"}, - {"name": "date", "type": "str", "description": "Call date (ISO 8601 format, YYYY-MM-DD)"}, - {"name": "duration", "type": "int", "description": "Call duration in seconds"}, - {"name": "recording_url", "type": "str", "description": "URL to the call recording (if available)"}, - {"name": "call_type", "type": "str", "description": "Call type/system classification"}, - {"name": "user_id", "type": "str", "description": "Primary user/owner of the call"}, - {"name": "participants", "type": "str", "description": "Comma-separated list of call participants"}, - {"name": "status", "type": "str", "description": "Call status (scheduled, completed, etc.)"}, - ], - "filterable_columns": ["date", "status"], - "api_endpoint": "/v2/calls", - "supports_pagination": True, - }, - "users": { - "name": "users", - "type": "api_table", - "description": "User information including names, emails, roles, and permissions", - "columns": [ - {"name": "user_id", "type": "str", "description": "Unique identifier for the user"}, - {"name": "name", "type": "str", "description": "User's full name"}, - {"name": "email", "type": "str", "description": "User's email address"}, - {"name": "role", "type": "str", "description": "User's role in the organization"}, - {"name": "permissions", "type": "str", "description": "User's permission levels"}, - {"name": "status", "type": "str", "description": "User status (active/inactive)"}, - ], - "filterable_columns": ["email", "status"], - "api_endpoint": "/v2/users", - "supports_pagination": True, - }, - "analytics": { - "name": "analytics", - "type": "api_table", - "description": "Advanced call analytics including sentiment analysis, topics, key phrases, and interaction scores", - "columns": [ - {"name": "call_id", "type": "str", "description": "Unique identifier for the call"}, - {"name": "sentiment_score", "type": "float", "description": "Overall sentiment score (0-1)"}, - {"name": "topic_score", "type": "float", "description": "Topic relevance score (0-1)"}, - {"name": "key_phrases", "type": "str", "description": "Comma-separated list of key phrases identified"}, - {"name": "topics", "type": "str", "description": "Comma-separated list of topics discussed"}, - {"name": "emotions", "type": "str", "description": "Emotional analysis metrics"}, - {"name": "confidence_score", "type": "str", "description": "AI confidence score for analytics"}, - ], - "filterable_columns": ["date"], - "api_endpoint": "/v2/calls/extensive", - "supports_pagination": True, - "notes": "Defaults to last 7 days if no date filters are provided.", - }, - "transcripts": { - "name": "transcripts", - "type": "api_table", - "description": "Call transcripts with speaker identification, timestamps, and confidence scores", - "columns": [ - {"name": "call_id", "type": "str", "description": "Unique identifier for the call"}, - {"name": "speaker", "type": "str", "description": "Speaker identifier"}, - {"name": "timestamp", "type": "int", "description": "Timestamp in milliseconds from call start"}, - {"name": "text", "type": "str", "description": "Transcript text for this segment"}, - {"name": "confidence", "type": "float", "description": "Transcription confidence score"}, - {"name": "segment_id", "type": "str", "description": "Unique segment identifier"}, - ], - "filterable_columns": ["call_id", "text"], - "api_endpoint": "/v2/calls/transcript", - "supports_pagination": True, - "notes": "Fetch transcripts for specific call IDs; always filter by call_id and/or narrow date range.", - }, -} - - -GONG_PRIMARY_KEYS = { - "calls": {"column_name": "call_id", "constraint_name": "pk_calls_call_id"}, - "users": {"column_name": "user_id", "constraint_name": "pk_users_user_id"}, - "analytics": {"column_name": "call_id", "constraint_name": "pk_analytics_call_id"}, - "transcripts": {"column_name": "segment_id", "constraint_name": "pk_transcripts_segment_id"}, -} - -GONG_FOREIGN_KEYS = { - "analytics": [ - { - "column_name": "call_id", - "foreign_table_name": "calls", - "foreign_column_name": "call_id", - "constraint_name": "fk_analytics_call_id", - } - ], - "transcripts": [ - { - "column_name": "call_id", - "foreign_table_name": "calls", - "foreign_column_name": "call_id", - "constraint_name": "fk_transcripts_call_id", - } - ], -} diff --git a/mindsdb/integrations/handlers/gong_handler/gong_handler.py b/mindsdb/integrations/handlers/gong_handler/gong_handler.py deleted file mode 100644 index cb276e723bb..00000000000 --- a/mindsdb/integrations/handlers/gong_handler/gong_handler.py +++ /dev/null @@ -1,343 +0,0 @@ -import requests -from typing import Any, Dict, List, Optional - -from mindsdb_sql_parser import parse_sql - -from mindsdb.integrations.handlers.gong_handler.gong_tables import ( - GongCallsTable, - GongUsersTable, - GongAnalyticsTable, - GongTranscriptsTable, -) -from mindsdb.integrations.handlers.gong_handler.constants import ( - get_gong_api_info, - GONG_TABLES_METADATA, - GONG_PRIMARY_KEYS, - GONG_FOREIGN_KEYS, -) -from mindsdb.integrations.libs.api_handler import MetaAPIHandler -from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, - HandlerStatusResponse as StatusResponse, - RESPONSE_TYPE, -) -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class GongHandler(MetaAPIHandler): - """ - This handler handles the connection and execution of SQL statements on Gong. - """ - - name = "gong" - - def __init__(self, name: str, connection_data: Dict, **kwargs: Any) -> None: - """ - Initializes the handler. - - Args: - name (Text): The name of the handler instance. - connection_data (Dict): The connection data required to connect to the Gong API. - kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - self.base_url = connection_data.get("base_url", "https://api.gong.io") - self.timeout = connection_data.get("timeout", 30) # Default 30 second timeout - - # Support both bearer token and access key + secret key - self.bearer_token = connection_data.get("api_key") - self.access_key = connection_data.get("access_key") - self.secret_key = connection_data.get("secret_key") - - # Register core tables - self._register_table("calls", GongCallsTable(self)) - self._register_table("users", GongUsersTable(self)) - self._register_table("analytics", GongAnalyticsTable(self)) - self._register_table("transcripts", GongTranscriptsTable(self)) - - def connect(self) -> requests.Session: - """ - Establishes a connection to the Gong API. - - Raises: - ValueError: If the required connection parameters are not provided. - Exception: If a connection error occurs. - - Returns: - requests.Session: A session object for making API requests. - """ - if self.is_connected is True: - return self.connection - - if self.access_key and self.secret_key: - auth_method = "basic" - elif self.bearer_token: - auth_method = "bearer" - else: - raise ValueError("Either bearer_token or (access_key + secret_key) is required to connect to Gong API.") - - try: - self.connection = requests.Session() - - if auth_method == "basic": - # Basic authentication with access key + secret key - self.connection.auth = (self.access_key, self.secret_key) - self.connection.headers.update({"Content-Type": "application/json", "Accept": "application/json"}) - else: - # Bearer token authentication - self.connection.headers.update( - { - "Authorization": f"Bearer {self.bearer_token}", - "Content-Type": "application/json", - "Accept": "application/json", - } - ) - - test_response = self.connection.get(f"{self.base_url}/v2/users", timeout=self.timeout) - test_response.raise_for_status() - - self.is_connected = True - return self.connection - - except Exception as e: - self.is_connected = False - logger.error(f"Error connecting to Gong API: {e}") - raise - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the Gong API. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - - try: - self.connect() - # Test the connection by making a simple API call - test_response = self.connection.get(f"{self.base_url}/v2/users") - test_response.raise_for_status() - response.success = True - except Exception as e: - logger.error(f"Connection check to Gong failed: {e}") - response.error_message = str(e) - - self.is_connected = response.success - return response - - def native_query(self, query: str) -> Response: - """ - Executes a native query on Gong and returns the result. - - Args: - query (Text): The SQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - try: - ast = parse_sql(query) - return self.query(ast) - except Exception as e: - logger.error(f"Error running query: {query} on Gong: {e}") - return Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e)) - - def call_gong_api(self, endpoint: str, method: str = "GET", params: Dict = None, json: Dict = None) -> Dict: - """ - Makes a call to the Gong API. - - Args: - endpoint (str): The API endpoint to call. - method (str): HTTP method (GET or POST). - params (Dict): Query parameters for the API call (for GET requests). - json (Dict): JSON payload for POST requests. - - Returns: - Dict: The API response. - """ - if not self.is_connected: - self.connect() - - url = f"{self.base_url}{endpoint}" - - if method.upper() == "POST": - response = self.connection.post(url, json=json, timeout=self.timeout) - else: - response = self.connection.get(url, params=params, timeout=self.timeout) - - response.raise_for_status() - return response.json() - - def meta_get_handler_info(self, **kwargs) -> str: - """ - Retrieves information about the Gong API handler design and implementation. - - Returns: - str: A string containing information about the handler's design and implementation. - """ - return get_gong_api_info(self.name) - - def meta_get_tables(self, table_names: Optional[List[str]] = None, **kwargs) -> Response: - """ - Retrieves metadata for the specified tables (or all tables if no list is provided). - - Note: Gong API doesn't provide a metadata/schema discovery endpoint, so we use - the handler's registered tables combined with static metadata from constants. - - Args: - table_names (List): A list of table names for which to retrieve metadata. - - Returns: - Response: A response object containing the table metadata. - """ - import pandas as pd - - metadata_list = [] - - # Get metadata for requested tables (or all if none specified) - # Use registered tables to ensure we only return tables that are actually available - for table_name in self._tables.keys(): - if (table_names is None or table_name in table_names) and table_name in GONG_TABLES_METADATA: - metadata = GONG_TABLES_METADATA[table_name] - metadata_list.append( - { - "table_name": metadata["name"], - "table_type": metadata["type"], - "description": metadata["description"], - "api_endpoint": metadata["api_endpoint"], - "supports_pagination": metadata["supports_pagination"], - "notes": metadata.get("notes", ""), - } - ) - - df = pd.DataFrame(metadata_list) - return Response(RESPONSE_TYPE.TABLE, df) - - def meta_get_columns(self, table_names: Optional[List[str]] = None, **kwargs) -> Response: - """ - Retrieves column metadata for the specified tables (or all tables if no list is provided). - - Note: Column schemas are derived from the table's get_columns() method when available, - falling back to static metadata from constants. - - Args: - table_names (List): A list of table names for which to retrieve column metadata. - - Returns: - Response: A response object containing the column metadata. - """ - import pandas as pd - - column_metadata_list = [] - - # Get column metadata for requested tables (or all if none specified) - for table_name in self._tables.keys(): - if (table_names is None or table_name in table_names) and table_name in GONG_TABLES_METADATA: - metadata = GONG_TABLES_METADATA[table_name] - - # Try to get live columns from the table class - table_instance = self._tables[table_name] - if hasattr(table_instance, "get_columns"): - live_columns = table_instance.get_columns() - - # Match live columns with metadata - for column_name in live_columns: - # Find column metadata - column_meta = next( - (col for col in metadata["columns"] if col["name"] == column_name), - {"type": "str", "description": f"Column {column_name}"}, - ) - - column_metadata_list.append( - { - "table_name": table_name, - "column_name": column_name, - "data_type": column_meta.get("type", "str"), - "description": column_meta.get("description", ""), - "is_filterable": column_name in metadata.get("filterable_columns", []), - } - ) - else: - # Fallback to static metadata - for column in metadata["columns"]: - column_metadata_list.append( - { - "table_name": table_name, - "column_name": column["name"], - "data_type": column["type"], - "description": column["description"], - "is_filterable": column["name"] in metadata.get("filterable_columns", []), - } - ) - - df = pd.DataFrame(column_metadata_list) - return Response(RESPONSE_TYPE.TABLE, df) - - def meta_get_primary_keys(self, table_names: Optional[List[str]] = None, **kwargs) -> Response: - """ - Retrieves primary key metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (List): A list of table names for which to retrieve primary key metadata. - - Returns: - Response: A response object containing the primary key metadata. - """ - import pandas as pd - - pk_list = [] - - # Get primary key metadata for requested tables (or all if none specified) - for table_name in self._tables.keys(): - if (table_names is None or table_name in table_names) and table_name in GONG_PRIMARY_KEYS: - pk_info = GONG_PRIMARY_KEYS[table_name] - pk_list.append( - { - "TABLE_NAME": table_name, - "COLUMN_NAME": pk_info["column_name"], - "CONSTRAINT_NAME": pk_info["constraint_name"], - } - ) - - df = pd.DataFrame(pk_list) - return Response(RESPONSE_TYPE.TABLE, df) - - def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None, **kwargs) -> Response: - """ - Retrieves foreign key metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (List): A list of table names for which to retrieve foreign key metadata. - - Returns: - Response: A response object containing the foreign key metadata. - """ - import pandas as pd - - fk_list = [] - - # Get foreign key metadata for requested tables (or all if none specified) - for table_name in self._tables.keys(): - if (table_names is None or table_name in table_names) and table_name in GONG_FOREIGN_KEYS: - for fk_info in GONG_FOREIGN_KEYS[table_name]: - fk_list.append( - { - "TABLE_NAME": table_name, - "COLUMN_NAME": fk_info["column_name"], - "FOREIGN_TABLE_NAME": fk_info["foreign_table_name"], - "FOREIGN_COLUMN_NAME": fk_info["foreign_column_name"], - "CONSTRAINT_NAME": fk_info["constraint_name"], - } - ) - - df = pd.DataFrame(fk_list) - return Response(RESPONSE_TYPE.TABLE, df) diff --git a/mindsdb/integrations/handlers/gong_handler/gong_tables.py b/mindsdb/integrations/handlers/gong_handler/gong_tables.py deleted file mode 100644 index b08b5e1537d..00000000000 --- a/mindsdb/integrations/handlers/gong_handler/gong_tables.py +++ /dev/null @@ -1,611 +0,0 @@ -from typing import List, Dict, Any, Callable, Union -from datetime import datetime, timedelta, timezone -import pandas as pd - -from mindsdb.integrations.libs.api_handler import APIResource -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, SortColumn -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -def normalize_datetime_to_iso8601(dt_value: Union[str, datetime, None]) -> str: - """ - Normalize various datetime formats to ISO 8601 with UTC timezone (Z suffix). - - Args: - dt_value: Datetime value as string, datetime object, or None - - Returns: - ISO 8601 formatted string with Z suffix (e.g., "2024-01-15T10:30:00Z") - - Raises: - ValueError: If the datetime string cannot be parsed - """ - if dt_value is None: - return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - - if isinstance(dt_value, datetime): - # If naive (no timezone), assume UTC - if dt_value.tzinfo is None: - dt_value = dt_value.replace(tzinfo=timezone.utc) - # Convert to UTC if not already - elif dt_value.tzinfo != timezone.utc: - dt_value = dt_value.astimezone(timezone.utc) - return dt_value.strftime("%Y-%m-%dT%H:%M:%SZ") - - if isinstance(dt_value, str): - dt_str = dt_value.strip() - - # Already in correct format with Z suffix - if dt_str.endswith("Z") and "T" in dt_str: - return dt_str - - # Has timezone offset like +00:00 or -05:00 - if dt_str.endswith(("00", "30")) and ("+" in dt_str[-6:] or dt_str[-6:-3] == "-"): - try: - dt_obj = datetime.fromisoformat(dt_str) - return dt_obj.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - except ValueError: - pass - - formats_to_try = [ - "%Y-%m-%dT%H:%M:%S", # ISO without timezone - "%Y-%m-%d %H:%M:%S", # Common format - "%Y-%m-%d", # Date only (assume start of day UTC) - "%Y/%m/%d", # Alternative date format - "%d-%m-%Y", # European date format - "%m/%d/%Y", # US date format - ] - - for fmt in formats_to_try: - try: - dt_obj = datetime.strptime(dt_str, fmt) - dt_obj = dt_obj.replace(tzinfo=timezone.utc) - return dt_obj.strftime("%Y-%m-%dT%H:%M:%SZ") - except ValueError: - continue - - try: - dt_obj = datetime.fromisoformat(dt_str.replace("Z", "+00:00")) - return dt_obj.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - except ValueError: - raise ValueError(f"Unable to parse datetime string: {dt_str}") - - raise ValueError(f"Unsupported datetime type: {type(dt_value)}") - - -def paginate_api_call( - api_call: Callable, - result_key: str, - limit: int = None, - params: Dict[str, Any] = None, - json_body: Dict[str, Any] = None, - cursor_path: str = "records.cursor", - cursor_param: str = "cursor", - max_pages: int = 100, -) -> List[Dict]: - """ - Helper function to paginate through API responses. - - Args: - api_call: Function to call the API (should accept params and/or json) - result_key: Key in response containing the data items - limit: Maximum number of items to fetch - params: Initial query parameters for GET requests - json_body: Initial JSON body for POST requests - cursor_path: Dot-notation path to cursor in response (e.g., "records.cursor") - cursor_param: Parameter name for passing cursor to next request - max_pages: Maximum number of pages to fetch (safety guard) - - Returns: - List of items from paginated API calls - """ - all_items = [] - seen_cursors = set() - cursor = None - page_count = 0 - - params = params or {} - json_body = json_body or {} - - while page_count < max_pages and (not limit or len(all_items) < limit): - page_count += 1 - - if cursor: - if json_body: - current_json = json_body.copy() - current_json[cursor_param] = cursor - response = api_call(json=current_json) - else: - current_params = params.copy() - current_params[cursor_param] = cursor - response = api_call(params=current_params) - else: - if json_body: - response = api_call(json=json_body.copy()) - else: - response = api_call(params=params.copy()) - - items_batch = response.get(result_key, []) - - if not items_batch: - break - - items_added_this_batch = 0 - for item in items_batch: - if limit and len(all_items) >= limit: - break - - item_str = str(sorted(item.items())) if isinstance(item, dict) else str(item) - if item_str not in seen_cursors: - all_items.append(item) - seen_cursors.add(item_str) - items_added_this_batch += 1 - - if limit and len(all_items) >= limit: - break - - next_cursor = response - for key in cursor_path.split("."): - next_cursor = next_cursor.get(key, {}) if isinstance(next_cursor, dict) else None - if next_cursor is None: - break - - if not next_cursor: - break - - if next_cursor == cursor: - logger.warning(f"API returned identical cursor: {cursor}. Stopping pagination.") - break - - cursor_str = str(next_cursor) - if cursor_str in seen_cursors: - logger.warning(f"Detected cursor cycle at: {cursor_str}. Stopping pagination.") - break - - seen_cursors.add(cursor_str) - cursor = next_cursor - - if page_count >= max_pages: - logger.warning(f"Reached maximum page limit ({max_pages}). There may be more data available.") - - return all_items - - -class GongCallsTable(APIResource): - """The Gong Calls Table implementation""" - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - ) -> pd.DataFrame: - """Pulls data from the Gong Calls API - - Returns - ------- - pd.DataFrame - Gong calls matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - api_params = {} - if conditions: - for condition in conditions: - if condition.column == "date" and condition.op == FilterOperator.GREATER_THAN: - api_params["fromDateTime"] = normalize_datetime_to_iso8601(condition.value) - condition.applied = True - elif condition.column == "date" and condition.op == FilterOperator.LESS_THAN: - api_params["toDateTime"] = normalize_datetime_to_iso8601(condition.value) - condition.applied = True - - try: - all_calls = paginate_api_call( - api_call=lambda params: self.handler.call_gong_api("/v2/calls", params=params), - result_key="calls", - limit=limit, - params=api_params, - ) - - data = [] - for call in all_calls: - started = call.get("started", "") - date = started.split("T")[0] if started else "" - - item = { - "call_id": call.get("id"), - "title": call.get("title"), - "date": date, - "duration": call.get("duration"), - "recording_url": call.get("url", ""), - "call_type": call.get("system"), - "user_id": call.get("primaryUserId"), - "participants": ",".join([p.get("name", "") for p in call.get("participants", [])]), - "status": call.get("status"), - } - data.append(item) - - df = pd.DataFrame(data) - - if conditions: - for condition in conditions: - if not condition.applied and condition.column in df.columns: - if condition.op == FilterOperator.EQUAL: - df = df[df[condition.column] == condition.value] - condition.applied = True - - if sort: - for col in sort: - if col.column in df.columns: - df = df.sort_values(by=col.column, ascending=col.ascending, na_position="last") - col.applied = True - break - return df - - except Exception as e: - logger.error(f"Error fetching calls from Gong API: {e}") - raise - - def get_columns(self) -> List[str]: - """Returns the columns of the calls table""" - return [ - "call_id", - "title", - "date", - "duration", - "recording_url", - "call_type", - "user_id", - "participants", - "status", - ] - - -class GongUsersTable(APIResource): - """The Gong Users Table implementation""" - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - ) -> pd.DataFrame: - """Pulls data from the Gong Users API - - Returns - ------- - pd.DataFrame - Gong users matching the query - """ - - api_params = {} - - try: - # Use pagination helper to fetch users - all_users = paginate_api_call( - api_call=lambda params: self.handler.call_gong_api("/v2/users", params=params), - result_key="users", - limit=limit, - params=api_params, - ) - - # Process the limited data - data = [] - for user in all_users: - # Safely concatenate names - handle None values - first_name = user.get("firstName") or "" - last_name = user.get("lastName") or "" - full_name = f"{first_name} {last_name}".strip() - - item = { - "user_id": user.get("id"), - "name": full_name, - "email": user.get("emailAddress", ""), - "role": user.get("title", ""), - "permissions": ",".join(user.get("permissions", [])), - "status": "active" if user.get("active", False) else "inactive", - } - data.append(item) - - df = pd.DataFrame(data) - - if conditions: - for condition in conditions: - if condition.column in df.columns: - if condition.op == FilterOperator.EQUAL: - df = df[df[condition.column] == condition.value] - condition.applied = True - - if sort: - for col in sort: - if col.column in df.columns: - df = df.sort_values(by=col.column, ascending=col.ascending, na_position="last") - col.applied = True - break - - if limit is not None: - df = df.head(limit) - - return df - - except Exception as e: - logger.error(f"Error fetching users from Gong API: {e}") - raise - - def get_columns(self) -> List[str]: - """Returns the columns of the users table""" - return ["user_id", "name", "email", "role", "permissions", "status"] - - -class GongAnalyticsTable(APIResource): - """The Gong Analytics Table implementation""" - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - ) -> pd.DataFrame: - """Pulls data from the Gong Analytics API - - Returns - ------- - pd.DataFrame - Gong analytics matching the query - """ - - try: - # Default to last 7 days if no date filters provided - default_from = datetime.now(timezone.utc) - timedelta(days=7) - default_to = datetime.now(timezone.utc) - - payload = { - "filter": { - "fromDateTime": normalize_datetime_to_iso8601(default_from), - "toDateTime": normalize_datetime_to_iso8601(default_to), - }, - "contentSelector": { - "exposedFields": { - "content": { - "brief": True, - "outline": True, - "highlights": True, - "callOutcome": True, - "topics": True, - "trackers": True, - }, - "interaction": {"personInteractionStats": True, "questions": True}, - } - }, - } - - if conditions: - for condition in conditions: - if condition.column == "date" and condition.op == FilterOperator.GREATER_THAN: - payload["filter"]["fromDateTime"] = normalize_datetime_to_iso8601(condition.value) - condition.applied = True - elif condition.column == "date" and condition.op == FilterOperator.LESS_THAN: - payload["filter"]["toDateTime"] = normalize_datetime_to_iso8601(condition.value) - condition.applied = True - - # Fetch calls using improved pagination helper with POST support - calls_data = paginate_api_call( - api_call=lambda **kwargs: self.handler.call_gong_api("/v2/calls/extensive", method="POST", **kwargs), - result_key="calls", - limit=limit, - json_body=payload, - cursor_path="records.cursor", - cursor_param="cursor", - max_pages=100, - ) - - # Process each call to extract analytics - all_analytics = [] - for call in calls_data: - # Extract analytics from extensive call data - content = call.get("content", {}) - interaction = call.get("interaction", {}) - metadata = call.get("metaData", {}) - - # Sentiment and Emotion from InteractionStats - person_stats = interaction.get("interactionStats", []) - sentiment_score = 0 - emotions = "" # Initialize emotions - - if person_stats: - stats_dict = {stat["name"]: stat["value"] for stat in interaction.get("interactionStats", [])} - sentiment_score = ( - stats_dict.get("Talk Ratio", 0) - + stats_dict.get("Patience", 0) - + min(stats_dict.get("Interactivity", 0) / 10, 1.0) - ) / 3 - emotions = f"Talk:{stats_dict.get('Talk Ratio', 0)}, Patience:{stats_dict.get('Patience', 0)}, Interactivity:{stats_dict.get('Interactivity', 0)}" - - # Topics from AI analysis - topics = content.get("topics", []) - topic_names = [ - topic.get("name", "") - for topic in topics - if isinstance(topic, dict) and topic.get("duration", 0) > 0 - ] - - # Key phrases from AI - trackers = content.get("trackers", []) - key_phrases = [tracker.get("name", "") for tracker in trackers if tracker.get("count", 0) > 0] - - # Topic scoring based on relevance - prevent division by zero - topic_score = 0 - if topics: - valid_topics = [topic for topic in topics if isinstance(topic, dict)] - if valid_topics: - total_topic_duration = sum([topic.get("duration", 0) for topic in valid_topics]) - avg_topic_duration = total_topic_duration / len(valid_topics) - call_duration = metadata.get("duration", 0) - if call_duration > 0: - topic_score = avg_topic_duration / call_duration - - item = { - "call_id": metadata.get("id"), - "sentiment_score": round(sentiment_score, 3), - "topic_score": round(topic_score, 3), - "key_phrases": ", ".join(key_phrases), - "topics": ", ".join(topic_names), - "emotions": emotions, - "confidence_score": "", - } - all_analytics.append(item) - - df = pd.DataFrame(all_analytics) - - # Apply non-date filtering at DataFrame level - if conditions: - for condition in conditions: - if not condition.applied and condition.column in df.columns: - if condition.op == FilterOperator.EQUAL: - df = df[df[condition.column] == condition.value] - condition.applied = True - - # Apply sorting at DataFrame level - if sort: - for col in sort: - if col.column in df.columns: - df = df.sort_values(by=col.column, ascending=col.ascending, na_position="last") - col.applied = True - break - - if limit is not None: - df = df.head(limit) - - return df - - except Exception as e: - logger.error(f"Error fetching analytics from Gong API: {e}") - raise - - def get_columns(self) -> List[str]: - """Returns the columns of the analytics table""" - return ["call_id", "sentiment_score", "topic_score", "key_phrases", "topics", "emotions", "confidence_score"] - - -class GongTranscriptsTable(APIResource): - """The Gong Transcripts Table implementation""" - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - ) -> pd.DataFrame: - """Pulls data from the Gong Transcripts API - - Returns - ------- - pd.DataFrame - Gong transcripts matching the query - """ - - try: - calls_api_params = {} - - if conditions: - for condition in conditions: - if condition.column == "date" and condition.op == FilterOperator.GREATER_THAN: - calls_api_params["fromDateTime"] = normalize_datetime_to_iso8601(condition.value) - condition.applied = True - elif condition.column == "date" and condition.op == FilterOperator.LESS_THAN: - calls_api_params["toDateTime"] = normalize_datetime_to_iso8601(condition.value) - condition.applied = True - - # Fetch call IDs using pagination helper - calls_fetch_limit = limit if limit else 100 - all_calls = paginate_api_call( - api_call=lambda params: self.handler.call_gong_api("/v2/calls", params=params), - result_key="calls", - limit=calls_fetch_limit, - params=calls_api_params, - ) - all_call_ids = [call.get("id") for call in all_calls if call.get("id")] - - if not all_call_ids: - return pd.DataFrame() - call_ids_to_fetch = all_call_ids[:limit] if limit else all_call_ids - - # Fetch transcripts using improved pagination helper with POST support - payload = {"filter": {"callIds": call_ids_to_fetch}} - call_transcripts = paginate_api_call( - api_call=lambda **kwargs: self.handler.call_gong_api("/v2/calls/transcript", method="POST", **kwargs), - result_key="callTranscripts", - limit=None, # Get all transcripts for the specified calls - json_body=payload, - cursor_path="records.cursor", - cursor_param="cursor", - max_pages=50, - ) - - # Process transcripts - all_transcript_data = [] - for call_transcript in call_transcripts: - call_id = call_transcript.get("callId") - transcript_segments = call_transcript.get("transcript", []) - - segment_counter = 0 - - for speaker_block in transcript_segments: - speaker_id = speaker_block.get("speakerId") - sentences = speaker_block.get("sentences", []) - - for sentence in sentences: - segment_counter += 1 - - item = { - "call_id": call_id, - "speaker": speaker_id, - "timestamp": sentence.get("start"), - "text": sentence.get("text"), - "confidence": sentence.get("confidence"), - "segment_id": f"{call_id}_{segment_counter}", - } - all_transcript_data.append(item) - - df = pd.DataFrame(all_transcript_data) - - if conditions: - for condition in conditions: - if not condition.applied and condition.column in df.columns: - if condition.op == FilterOperator.EQUAL: - df = df[df[condition.column] == condition.value] - condition.applied = True - elif condition.op == FilterOperator.LIKE or condition.op == FilterOperator.CONTAINS: - if condition.column == "text": - df = df[df[condition.column].str.contains(condition.value, case=False, na=False)] - condition.applied = True - - if sort: - for col in sort: - if col.column in df.columns: - df = df.sort_values(by=col.column, ascending=col.ascending, na_position="last") - col.applied = True - break - - if limit is not None: - df = df.head(limit) - return df - - except Exception as e: - logger.error(f"Error fetching transcripts from Gong API: {e}") - raise - - def get_columns(self) -> List[str]: - """Returns the columns of the transcripts table""" - return ["call_id", "speaker", "timestamp", "text", "confidence", "segment_id"] diff --git a/mindsdb/integrations/handlers/gong_handler/icon.svg b/mindsdb/integrations/handlers/gong_handler/icon.svg deleted file mode 100644 index ccad214fbc1..00000000000 --- a/mindsdb/integrations/handlers/gong_handler/icon.svg +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/gong_handler/requirements.txt b/mindsdb/integrations/handlers/gong_handler/requirements.txt deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/google_books_handler/README.md b/mindsdb/integrations/handlers/google_books_handler/README.md deleted file mode 100644 index 9513222c087..00000000000 --- a/mindsdb/integrations/handlers/google_books_handler/README.md +++ /dev/null @@ -1,89 +0,0 @@ -# Google Books API Integration - -This handler integrates with the [Google Books API](https://developers.google.com/books/docs/overview) to allow you to -make book and bookshelf data available to use for model training and predictions. - -## Example: Automate your book recommendations - -To see how the Google Books handler is used, let's walk through a simple example to create a model to predict -your future book recommendations. - -## Connect to the Google Books API - -We start by creating a database to connect to the Google Books API. Currently, there is no need for an API key: - -However, you will need to have a Google account and have enabled the Google Books API. -Also, you will need to have the credentials -in a json file. You can find more information on how to do -this [here](https://developers.google.com/identity/protocols/oauth2/service-account). - -**Optional:** The credentials file can be stored in the google_books handler folder in -the [mindsdb/integrations/google_books_handler](mindsdb/integrations/handlers/google_books_handler) directory. - -~~~~sql -CREATE -DATABASE my_books -WITH ENGINE = 'google_books', -parameters = { - 'credentials': 'C:\\Users\\panagiotis\\Desktop\\GitHub\\mindsdb\\mindsdb\\integrations\\handlers\\google_books_handler\\credentials.json' -}; -~~~~ - -This creates a database called my_books. This database ships with a table called bookshelves and a table called volumes -that we can use to search for -info related to the users bookshelves and volumes. - -## Searching for bookshelves in SQL - -Let's get a list of bookshelves in our account. - -~~~~sql -SELECT id, - title, - description -FROM my_books.bookshelves -WHERE userId = '1001' - AND title = 'My Bookshelf' -~~~~ - -or - -~~~~sql -SELECT id, - title, - description -FROM my_books.bookshelves -WHERE shelf > 10 - AND shelf < 20 -~~~~ - -**Note**: If you have specified only one aspect of the comparison (`>` or `<`), then the `minShelf` will be `maxShelf` - -10 ( -if `minShelf` is -not defined) and the `maxShelf` will be `minShelf` + 10 (if `maxShelf` is not defined). - -## Searching for volumes in SQL - -Let's get a list of volumes in our account. - -~~~~sql -SELECT id, - title, - description -FROM my_books.volumes -WHERE q = 'Harry Potter' -~~~~ - -## Creating a model to predict future book recommendations - -Now we can use ML for book recommendations, -reading history analysis, and other automations based on our Google Books activity. - -~~~~sql -CREATE -PREDICTOR recommend_books -FROM my_books.volumes -PREDICT - title, - description -~~~~ \ No newline at end of file diff --git a/mindsdb/integrations/handlers/google_books_handler/__about__.py b/mindsdb/integrations/handlers/google_books_handler/__about__.py deleted file mode 100644 index 816b87694d8..00000000000 --- a/mindsdb/integrations/handlers/google_books_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Google Books handler' -__package_name__ = 'mindsdb_google_books_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for the Google Books API" -__author__ = 'Panagiotis-Alexios Spanakis' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/google_books_handler/__init__.py b/mindsdb/integrations/handlers/google_books_handler/__init__.py deleted file mode 100644 index 1777f257859..00000000000 --- a/mindsdb/integrations/handlers/google_books_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .google_books_handler import GoogleBooksHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Google Books' -name = 'google_books' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/google_books_handler/google_books_handler.py b/mindsdb/integrations/handlers/google_books_handler/google_books_handler.py deleted file mode 100644 index 4f04cdd0ff4..00000000000 --- a/mindsdb/integrations/handlers/google_books_handler/google_books_handler.py +++ /dev/null @@ -1,186 +0,0 @@ -import os -from google.auth.transport.requests import Request -from google.oauth2 import service_account -from google.oauth2.credentials import Credentials -from googleapiclient.discovery import build -import pandas as pd -from pandas import DataFrame -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from .google_books_tables import BookshelvesTable, VolumesTable -from mindsdb.integrations.libs.api_handler import APIHandler, FuncParser -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class GoogleBooksHandler(APIHandler): - """ - A class for handling connections and interactions with the Google Books API. - """ - - name = "google_books" - - def __init__(self, name: str, **kwargs): - """ - Initialize the Google Books API handler. - Args: - name (str): name of the handler - kwargs (dict): additional arguments - """ - super().__init__(name) - self.token = None - self.service = None - self.connection_data = kwargs.get("connection_data", {}) - self.credentials_file = self.connection_data.get("credentials", None) - self.credentials = None - self.scopes = ["https://www.googleapis.com/auth/books"] - self.is_connected = False - self.connection = None - bookshelves = BookshelvesTable(self) - self.bookshelves = bookshelves - self._register_table("bookshelves", bookshelves) - volumes = VolumesTable(self) - self.volumes = volumes - self._register_table("volumes", volumes) - - def connect(self): - """ - Set up any connections required by the handler - Should return output of check_connection() method after attempting - connection. Should switch self.is_connected. - Returns: - HandlerStatusResponse - """ - - if self.is_connected is True: - return self.service - if self.credentials_file: - if os.path.exists("token_books.json"): - self.credentials = Credentials.from_authorized_user_file("token_books.json", self.scopes) - if not self.credentials or not self.credentials.valid: - if self.credentials and self.credentials.expired and self.credentials.refresh_token: - self.credentials.refresh(Request()) - else: - self.credentials = service_account.Credentials.from_service_account_file( - self.credentials_file, scopes=self.scopes - ) - # Save the credentials for the next run - with open("token_books.json", "w") as token: - token.write(self.credentials.to_json()) - self.service = build("books", "v1", credentials=self.credentials) - return self.service - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f"Error connecting to Google Books API: {e}!") - response.error_message = e - - self.is_connected = response.success - return response - - def native_query(self, query: str = None) -> Response: - """ - Receive raw query and act upon it somehow. - Args: - query (Any): query in native format (str for sql databases, - api's json etc) - Returns: - HandlerResponse - """ - method_name, params = FuncParser().from_string(query) - - df = self.call_application_api(method_name, params) - - return Response(RESPONSE_TYPE.TABLE, data_frame=df) - - def get_bookshelves(self, params: dict = None) -> DataFrame: - """ - Get bookshelf from Google Books API - Args: - params (dict): query parameters - Returns: - DataFrame - """ - service = self.connect() - minShelf = None - maxShelf = None - if params["shelf"]: - shelf = int(params["shelf"]) - if params["source"]: - df = ( - service.mylibrary() - .bookshelves() - .get(shelf=shelf, userid=params["userid"], source=params["source"]) - .execute() - ) - else: - df = service.mylibrary().bookshelves().get(shelf=shelf, userid=params["userid"]).execute() - - df = pd.DataFrame(df, columns=self.bookshelves.get_columns()) - return df - elif not params["minShelf"] and params["maxShelf"]: - minShelf = int(params["maxShelf"]) - 10 - maxShelf = int(params["maxShelf"]) - elif not params["maxShelf"] and params["minShelf"]: - minShelf = int(params["minShelf"]) - maxShelf = int(params["minShelf"]) + 10 - elif params["maxShelf"] and params["minShelf"]: - minShelf = int(params["minShelf"]) - maxShelf = int(params["maxShelf"]) - - args = { - key: value for key, value in params.items() if key not in ["minShelf", "maxShelf"] and value is not None - } - bookshelves = service.bookshelves().list(userid=params["userid"], **args).execute() - - df = pd.DataFrame(bookshelves["items"], columns=self.bookshelves.get_columns()) - if minShelf is not None or maxShelf is not None: - # Drop bookshelves that are not in the id range - df = df.drop(df[(df["id"] < minShelf) | (df["id"] > maxShelf)].index) - - return df - - def get_volumes(self, params: dict = None) -> DataFrame: - """ - Get volumes from Google Books API - Args: - params (dict): query parameters - Returns: - DataFrame - """ - service = self.connect() - args = {key: value for key, value in params.items() if value is not None} - volumes = service.volumes().list(**args).execute() - df = pd.DataFrame(volumes["items"], columns=self.volumes.get_columns()) - return df - - def call_application_api(self, method_name: str = None, params: dict = None) -> DataFrame: - """ - Call Google Books API and map the data to pandas DataFrame - Args: - method_name (str): method name - params (dict): query parameters - Returns: - DataFrame - """ - if method_name == "get_bookshelves": - return self.get_bookshelves(params) - elif method_name == "get_volumes": - return self.get_volumes(params) - else: - raise NotImplementedError(f"Unknown method {method_name}") diff --git a/mindsdb/integrations/handlers/google_books_handler/google_books_tables.py b/mindsdb/integrations/handlers/google_books_handler/google_books_tables.py deleted file mode 100644 index e226f33ce64..00000000000 --- a/mindsdb/integrations/handlers/google_books_handler/google_books_tables.py +++ /dev/null @@ -1,182 +0,0 @@ -import pandas as pd -from mindsdb_sql_parser import ast -from pandas import DataFrame - -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions - - -class VolumesTable(APITable): - """ - A class for handling the volumes table. - """ - - def select(self, query: ast.Select) -> DataFrame: - """ - Gets all info about the wanted contents of a bookshelf. - - Args: - query (ast.Select): SQL query to parse. - - Returns: - Response: Response object containing the results. - """ - - # Parse the query to get the conditions. - conditions = extract_comparison_conditions(query.where) - # Get the parameters for the request. - params = {} - for op, arg1, arg2 in conditions: - if op != '=': - raise NotImplementedError - if arg1 == 'q' or arg1 == 'download' or arg1 == 'langRestrict' \ - or arg1 == 'printType'\ - or arg1 == 'source' or arg1 == 'partner' \ - or arg1 == 'showPreorders' or arg1 == 'startIndex': - params[arg1] = arg2 - elif arg1 == 'filter': - if arg2 not in ['ebooks', 'free-ebooks', 'full', 'paid-ebooks', 'partial']: - raise NotImplementedError - params[arg1] = arg2 - elif arg1 == 'libraryRestrict': - if arg2 not in ['my-library', 'no-restrictions']: - raise NotImplementedError - params[arg1] = arg2 - elif arg1 == 'printType': - if arg2 not in ['all', 'books', 'magazines']: - raise NotImplementedError - params[arg1] = arg2 - elif arg1 == 'projection': - if arg2 not in ['lite', 'full']: - raise NotImplementedError - params[arg1] = arg2 - else: - raise NotImplementedError - - # Get the order by from the query. - if query.order_by is not None: - if query.order_by[0].value == 'newest': - params['orderBy'] = 'newest' - elif query.order_by[0].value == 'relevance': - params['orderBy'] = 'relevance' - else: - raise NotImplementedError - - if query.limit is not None: - params['maxResults'] = query.limit.value - - # Get the volumes from the Google Books API. - bookshelves = self.handler.\ - call_application_api(method_name='get_volumes', params=params) - - selected_columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - if len(bookshelves) == 0: - bookshelves = pd.DataFrame([], columns=selected_columns) - else: - bookshelves.columns = self.get_columns() - for col in set(bookshelves.columns).difference(set(selected_columns)): - bookshelves = bookshelves.drop(col, axis=1) - return bookshelves - - def get_columns(self) -> list: - """ - Gets the columns of the table. - - Returns: - list: List of column names. - """ - return [ - 'kind', - 'id', - 'etag', - 'selfLink', - 'volumeInfo', - 'userInfo' - 'saleInfo', - 'accessInfo', - 'searchInfo', - ] - - -class BookshelvesTable(APITable): - """ - A class for handling the bookshelves table. - """ - - def select(self, query: ast.Select) -> DataFrame: - """ - Gets all info about the wanted bookshelves. - - Args: - query (ast.Select): SQL query to parse. - - Returns: - Response: Response object containing the results. - """ - - # Parse the query to get the conditions. - conditions = extract_comparison_conditions(query.where) - # Get the parameters for the request. - params = {} - for op, arg1, arg2 in conditions: - if arg1 == 'userId' or arg1 == 'source' or arg1 == 'fields': - if op != '=': - raise NotImplementedError - params[arg1] = arg2 - elif arg1 == 'shelf': - if op == '=': - params[arg1] = arg2 - elif op == '>': - params['minShelf'] = arg2 - elif op == '<': - params['maxShelf'] = arg2 - else: - raise NotImplementedError - else: - raise NotImplementedError - - # Get the bookshelves from the Google Books API. - bookshelves = self.handler.\ - call_application_api(method_name='get_bookshelves', params=params) - - selected_columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - if len(bookshelves) == 0: - bookshelves = pd.DataFrame([], columns=selected_columns) - else: - bookshelves.columns = self.get_columns() - for col in set(bookshelves.columns).difference(set(selected_columns)): - bookshelves = bookshelves.drop(col, axis=1) - return bookshelves - - def get_columns(self) -> list: - """Gets all columns to be returned in pandas DataFrame responses""" - return [ - 'kind', - 'id', - 'selfLink', - 'title', - 'description', - 'access', - 'updated', - 'created', - 'volumeCount', - 'volumesLastUpdated' - ] diff --git a/mindsdb/integrations/handlers/google_books_handler/icon.svg b/mindsdb/integrations/handlers/google_books_handler/icon.svg deleted file mode 100644 index ab6017406c3..00000000000 --- a/mindsdb/integrations/handlers/google_books_handler/icon.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/google_books_handler/requirements.txt b/mindsdb/integrations/handlers/google_books_handler/requirements.txt deleted file mode 100644 index d13929813b0..00000000000 --- a/mindsdb/integrations/handlers/google_books_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -google-api-python-client -google-auth \ No newline at end of file diff --git a/mindsdb/integrations/handlers/google_books_handler/tests/__init__.py b/mindsdb/integrations/handlers/google_books_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/google_books_handler/tests/test_google_books_handler.py b/mindsdb/integrations/handlers/google_books_handler/tests/test_google_books_handler.py deleted file mode 100644 index 823bb5b173e..00000000000 --- a/mindsdb/integrations/handlers/google_books_handler/tests/test_google_books_handler.py +++ /dev/null @@ -1,41 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.google_books_handler.google_books_handler import GoogleBooksHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class GoogleBooksHandlerTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "credentials": "C:\\Users\\panagiotis\\Desktop\\GitHub\\mindsdb\\mindsdb\\integrations\\handlers" - "\\google_books_handler\\credentials.json" - } - } - cls.handler = GoogleBooksHandler('test_google_books_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_2_select_volume_query(self): - query = "SELECT summary FROM my_books.volumes WHERE q = 'Harry Potter'" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_3_select_bookshelves_query(self): - query = "SELECT title FROM my_books.bookshelves WHERE shelf > 1" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_4_get_columns(self): - columns = self.handler.get_columns('id') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/google_content_shopping_handler/README.md b/mindsdb/integrations/handlers/google_content_shopping_handler/README.md deleted file mode 100644 index d984c07d03b..00000000000 --- a/mindsdb/integrations/handlers/google_content_shopping_handler/README.md +++ /dev/null @@ -1,128 +0,0 @@ -# Google Content API for Shopping Integration - -This handler integrates with -the [Google Content API for Shopping](https://developers.google.com/shopping-content/guides/quickstart) -to allow you to use Google Search data in your SQL queries. - -## Example: Predicting price of product based on Google Content API for Shopping activity - -To see how the Google Content API for Shopping handler is used, let's walk through a simple example to create a model to predict -the price of a product based on Shopping activity. - -## Connect to the Content API for Shopping - -We start by creating a database to connect to the Google Content API for Shopping. Currently, there is no need for an -API key: - -However, you will need to have a Google account and have enabled the Google Content API for Shopping. -Also, you will need to have the credentials in a json file. -You can find more information on how to do -this [here](https://developers.google.com/shopping-content/guides/quickstart/setting-up-a-client-library). - -**Optional:** The credentials file can be stored in the google_content_shopping handler folder in -the [mindsdb/integrations/google_content_shopping_handler](mindsdb/integrations/handlers/google_content_shopping_handler) -directory. - -~~~~sql -CREATE -DATABASE my_content -WITH ENGINE = 'google_content_shopping', -parameters = { - 'credentials': 'C:\\Users\\panagiotis\\Desktop\\GitHub\\mindsdb\\mindsdb\\integrations\\handlers\\google_content_shopping_handler\\credentials.json', - 'merchant_id': '1234567890' -}; -~~~~ - -This creates a database called my_content. This database ships with a table called AccountsTable, OrderTable and with a -table called ProductsTable that we can use to search for - -## Get the list of accounts - -Let's get an account. - -~~~~sql -SELECT * -FROM my_content.Accounts -WHERE accountId = 123456789 -~~~~ - -## Remove an account - -Now let's remove an account. - -~~~~sql -DELETE -FROM my_content.Accounts -WHERE accountId = 123456789 -~~~~ - -## Get the list of orders - -Let's test by getting the list of orders. - -~~~~sql -SELECT * -FROM my_content.Orders -WHERE placedDateStart = '2020-10-01' - AND placedDateEnd = '2020-10-31' -~~~~ - -## Delete some orders - -Now let's delete some orders. - -~~~~sql -DELETE -FROM my_content.Orders -WHERE orderId > 123 - AND orderId < 456 -~~~~ - -## Get the list of products - -Let's get the list of products. - -~~~~sql -SELECT * -FROM my_content.Products -WHERE productId > 123456789 -~~~~ - -## Update a product - -Now let's update some products. - -~~~~sql -UPDATE my_content.Products -SET title = 'New Title' -WHERE productId > 123456789 - AND updateMask = 'title' -~~~~ - -## Delete a product - -Now let's delete some products. - -~~~~sql -DELETE -FROM my_content.Products -WHERE productId > 123456789 -~~~~ - -### Note - -If you have specified only one aspect of the comparison (`>` or `<`), then the `start_id` will be `end_id` - 10 ( -if `start_id` is -not defined) and the `end_id` will be `start_id` + 10 (if `end_id` is defined). - -## Creating a model to predict future product prices - -Now we can use machine learning for sales predictions, inventory management, -product recommendations, and other automations. - -~~~~sql -CREATE -PREDICTOR future_product_prices -FROM my_content.Products -PREDICT price -~~~~ \ No newline at end of file diff --git a/mindsdb/integrations/handlers/google_content_shopping_handler/__about__.py b/mindsdb/integrations/handlers/google_content_shopping_handler/__about__.py deleted file mode 100644 index f2f070240e6..00000000000 --- a/mindsdb/integrations/handlers/google_content_shopping_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Google Content API for Shopping handler' -__package_name__ = 'mindsdb_google_content_shopping_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for the Google Content API for Shopping" -__author__ = 'Panagiotis-Alexios Spanakis' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/google_content_shopping_handler/__init__.py b/mindsdb/integrations/handlers/google_content_shopping_handler/__init__.py deleted file mode 100644 index 955d6250976..00000000000 --- a/mindsdb/integrations/handlers/google_content_shopping_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .google_content_shopping_handler import GoogleContentShoppingHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Google Content Shopping' -name = 'google_content_shopping' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path', 'connection_args', 'connection_args_example' -] diff --git a/mindsdb/integrations/handlers/google_content_shopping_handler/connection_args.py b/mindsdb/integrations/handlers/google_content_shopping_handler/connection_args.py deleted file mode 100644 index 5974487bbe0..00000000000 --- a/mindsdb/integrations/handlers/google_content_shopping_handler/connection_args.py +++ /dev/null @@ -1,20 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - credentials={ - 'type': ARG_TYPE.PATH, - 'description': 'The path to the credentials file. If not specified, the default credentials are used.' - }, - merchant_id={ - 'type': ARG_TYPE.STR, - 'description': 'The merchant ID for the Google Content API.' - }, -) - -connection_args_example = OrderedDict( - credentials='/path/to/credentials.json', - merchant_id='1234567890' -) diff --git a/mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_handler.py b/mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_handler.py deleted file mode 100644 index 359884c011e..00000000000 --- a/mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_handler.py +++ /dev/null @@ -1,397 +0,0 @@ -import json - -import pandas as pd -from pandas import DataFrame -from google.auth.transport.requests import Request -from google.oauth2 import service_account -from google.oauth2.credentials import Credentials -from googleapiclient.discovery import build -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from .google_content_shopping_tables import AccountsTable, OrdersTable, ProductsTable -from mindsdb.integrations.libs.api_handler import APIHandler, FuncParser -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class GoogleContentShoppingHandler(APIHandler): - """ - A class for handling connections and interactions with the Google Content API for Shopping. - """ - - name = "google_content_shopping" - - def __init__(self, name: str, **kwargs): - """ - Initialize the Google Content API for Shopping handler. - Args: - name (str): name of the handler - kwargs (dict): additional arguments - """ - super().__init__(name) - self.token = None - self.service = None - self.connection_data = kwargs.get("connection_data", {}) - self.fs_storage = kwargs["file_storage"] - self.credentials_file = self.connection_data.get("credentials", None) - self.merchant_id = self.connection_data.get("merchant_id", None) - self.credentials = None - self.scopes = ["https://www.googleapis.com/auth/content"] - self.is_connected = False - accounts = AccountsTable(self) - self.accounts = accounts - self._register_table("Accounts", accounts) - orders = OrdersTable(self) - self.orders = orders - self._register_table("Orders", orders) - products = ProductsTable(self) - self.products = products - self._register_table("Products", products) - - def connect(self): - """ - Set up any connections required by the handler - Should return output of check_connection() method after attempting - connection. Should switch self.is_connected. - Returns: - HandlerStatusResponse - """ - if self.is_connected is True: - return self.service - if self.credentials_file: - try: - json_str_bytes = self.fs_storage.file_get("token_content.json") - json_str = json_str_bytes.decode() - self.credentials = Credentials.from_authorized_user_info(info=json.loads(json_str), scopes=self.scopes) - except Exception: - self.credentials = None - if not self.credentials or not self.credentials.valid: - if self.credentials and self.credentials.expired and self.credentials.refresh_token: - self.credentials.refresh(Request()) - else: - self.credentials = service_account.Credentials.from_service_account_file( - self.credentials_file, scopes=self.scopes - ) - # Save the credentials for the next run - json_str = self.credentials.to_json() - self.fs_storage.file_set("token_content.json", json_str.encode()) - self.service = build("content", "v2.1", credentials=self.credentials) - return self.service - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler - Returns: - HandlerStatusResponse - """ - response = StatusResponse(False) - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f"Error connecting to Google Content API for Shopping: {e}!") - response.error_message = e - - self.is_connected = response.success - return response - - def native_query(self, query: str = None) -> Response: - """ - Receive raw query and act upon it somehow. - Args: - query (Any): query in native format (str for sql databases, - api's json etc) - Returns: - HandlerResponse - """ - method_name, params = FuncParser().from_string(query) - - df = self.call_application_api(method_name, params) - - return Response(RESPONSE_TYPE.TABLE, data_frame=df) - - def get_accounts(self, params: dict = None) -> DataFrame: - """ - Get accounts - Args: - params (dict): query parameters - Returns: - DataFrame - """ - service = self.connect() - page_token = None - accounts = pd.DataFrame(columns=self.accounts.get_columns()) - if params["account_id"]: - result = service.accounts().get(merchantId=self.merchant_id, accountId=params["account_id"]).execute() - accounts = pd.DataFrame(result, columns=self.accounts.get_columns()) - return accounts - while True: - result = service.accounts().list(merchantId=self.merchant_id, page_token=page_token, **params).execute() - accounts = pd.concat( - [accounts, pd.DataFrame(result["resources"], columns=self.accounts.get_columns())], ignore_index=True - ) - page_token = result.get("nextPageToken") - if not page_token: - break - - if params["startId"] and params["endId"]: - start_id = int(params["startId"]) - end_id = int(params["endId"]) - elif params["startId"]: - start_id = int(params["startId"]) - end_id = start_id + 10 - elif params["endId"]: - end_id = int(params["endId"]) - start_id = end_id - 10 - else: - raise Exception("startId or endId must be specified") - - accounts = accounts.drop(accounts[(accounts["id"] < start_id) | (accounts["id"] > end_id)].index) - - return accounts - - def delete_accounts(self, params: dict = None) -> DataFrame: - """ - Delete accounts - Args: - params (dict): query parameters - Returns: - DataFrame - """ - service = self.connect() - args = {} - if params["force"]: - args = {"force": params["force"]} - if params["accountId"]: - result = ( - service.accounts().delete(merchantId=self.merchant_id, accountId=params["accountId"], **args).execute() - ) - return result - else: - df = pd.DataFrame(columns=["accountId", "status"]) - if not params["startId"]: - start_id = int(params["endId"]) - 10 - elif not params["endId"]: - end_id = int(params["startId"]) + 10 - else: - start_id = int(params["startId"]) - end_id = int(params["endId"]) - - for i in range(start_id, end_id): - service.accounts().delete(merchantId=self.merchant_id, accountId=i, **args).execute() - df = pd.concat([df, pd.DataFrame([{"accountId": str(i), "status": "deleted"}])], ignore_index=True) - return df - - def get_orders(self, params: dict = None) -> DataFrame: - """ - Get orders - Args: - params (dict): query parameters - Returns: - DataFrame - """ - service = self.connect() - page_token = None - orders = pd.DataFrame(columns=self.orders.get_columns()) - args = { - key: value - for key, value in params.items() - if key in ["maxResults", "statuses", "acknowledged", "placedDateStart", "placedDateEnd", "orderBy"] - and value is not None - } - if params["order_id"]: - result = service.orders().get(merchantId=self.merchant_id, orderId=params["order_id"], **args).execute() - orders = pd.DataFrame(result, columns=self.orders.get_columns()) - return orders - while True: - result = service.orders().list(merchantId=self.merchant_id, page_token=page_token, **args).execute() - orders = pd.concat( - [orders, pd.DataFrame(result["resources"], columns=self.orders.get_columns())], ignore_index=True - ) - page_token = result.get("nextPageToken") - if not page_token: - break - - if params["startId"] and params["endId"]: - start_id = int(params["startId"]) - end_id = int(params["endId"]) - elif params["startId"]: - start_id = int(params["startId"]) - end_id = start_id + 10 - elif params["endId"]: - end_id = int(params["endId"]) - start_id = end_id - 10 - else: - raise Exception("startId or endId must be specified") - - orders = orders.drop(orders[(orders["id"] < start_id) | (orders["id"] > end_id)].index) - - return orders - - def delete_orders(self, params: dict = None) -> DataFrame: - """ - Delete orders - Args: - params (dict): query parameters - Returns: - DataFrame - """ - service = self.connect() - if params["order_id"]: - result = service.orders().delete(merchantId=self.merchant_id, orderId=params["order_id"]).execute() - return result - else: - df = pd.DataFrame(columns=["orderId", "status"]) - if not params["startId"]: - start_id = int(params["endId"]) - 10 - elif not params["endId"]: - end_id = int(params["startId"]) + 10 - else: - start_id = int(params["startId"]) - end_id = int(params["endId"]) - - for i in range(start_id, end_id): - service.orders().delete(merchantId=self.merchant_id, orderId=i).execute() - df = pd.concat([df, pd.DataFrame([{"orderId": str(i), "status": "deleted"}])], ignore_index=True) - return df - - def get_products(self, params: dict = None) -> DataFrame: - """ - Get products - Args: - params (dict): query parameters - Returns: - DataFrame - """ - service = self.connect() - page_token = None - products = pd.DataFrame(columns=self.products.get_columns()) - if params["product_id"]: - result = service.products().get(merchantId=self.merchant_id, productId=params["product_id"]).execute() - products = pd.DataFrame(result, columns=self.products.get_columns()) - return products - while True: - result = service.products().list(merchantId=self.merchant_id, page_token=page_token).execute() - products = pd.concat( - [products, pd.DataFrame(result["resources"], columns=self.products.get_columns())], ignore_index=True - ) - page_token = result.get("nextPageToken") - if not page_token: - break - - if params["startId"] and params["endId"]: - start_id = int(params["startId"]) - end_id = int(params["endId"]) - elif params["startId"]: - start_id = int(params["startId"]) - end_id = start_id + 10 - elif params["endId"]: - end_id = int(params["endId"]) - start_id = end_id - 10 - else: - raise Exception("startId or endId must be specified") - - products = products.drop(products[(products["id"] < start_id) | (products["id"] > end_id)].index) - - return products - - def update_products(self, params: dict = None) -> DataFrame: - """ - Update products - Args: - params (dict): query parameters - Returns: - DataFrame - """ - body = {key: value for key, value in params.items() if key in self.products.get_columns()} - service = self.connect() - if params["product_id"]: - result = ( - service.products() - .update( - merchantId=self.merchant_id, - productId=params["product_id"], - updateMask=params["updateMask"], - body=body, - ) - .execute() - ) - - return result - else: - df = pd.DataFrame(columns=["productId", "status"]) - if not params["startId"]: - start_id = int(params["endId"]) - 10 - elif not params["endId"]: - end_id = int(params["startId"]) + 10 - else: - start_id = int(params["startId"]) - end_id = int(params["endId"]) - - for i in range(start_id, end_id): - service.products().update( - merchantId=self.merchant_id, productId=i, updateMask=params["updateMask"], body=body - ).execute() - df = pd.concat([df, pd.DataFrame([{"productId": str(i), "status": "updated"}])], ignore_index=True) - return df - - def delete_products(self, params: dict = None) -> DataFrame: - """ - Delete products - Args: - params (dict): query parameters - Returns: - DataFrame - """ - service = self.connect() - args = {key: value for key, value in params.items() if key in ["feedId"] and value is not None} - if params["product_id"]: - result = ( - service.products().delete(merchantId=self.merchant_id, productId=params["product_id"], **args).execute() - ) - return result - else: - df = pd.DataFrame(columns=["productId", "status"]) - if not params["startId"]: - start_id = int(params["endId"]) - 10 - elif not params["endId"]: - end_id = int(params["startId"]) + 10 - else: - start_id = int(params["startId"]) - end_id = int(params["endId"]) - - for i in range(start_id, end_id): - service.products().delete(merchantId=self.merchant_id, productId=i, **args).execute() - df = pd.concat([df, pd.DataFrame([{"productId": str(i), "status": "deleted"}])], ignore_index=True) - return df - - def call_application_api(self, method_name: str = None, params: dict = None) -> DataFrame: - """ - Call Google Search API and map the data to pandas DataFrame - Args: - method_name (str): method name - params (dict): query parameters - Returns: - DataFrame - """ - if method_name == "get_accounts": - return self.get_accounts(params) - elif method_name == "delete_accounts": - return self.delete_accounts(params) - elif method_name == "get_orders": - return self.get_orders(params) - elif method_name == "delete_orders": - return self.delete_orders(params) - elif method_name == "get_products": - return self.get_products(params) - elif method_name == "update_products": - return self.update_products(params) - elif method_name == "delete_products": - return self.delete_products(params) - else: - raise NotImplementedError(f"Unknown method {method_name}") diff --git a/mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_tables.py b/mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_tables.py deleted file mode 100644 index 9d642a58593..00000000000 --- a/mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_tables.py +++ /dev/null @@ -1,432 +0,0 @@ -import pandas as pd -from mindsdb_sql_parser import ast -from pandas import DataFrame - -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.date_utils import parse_utc_date -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions - - -class AccountsTable(APITable): - """ - Table class for the Google Content API for Shopping Accounts table. - """ - - def select(self, query: ast.Select) -> DataFrame: - """ - Lists the sub-accounts in your Merchant Center account. - - Args: - query (ast.Select): SQL query to parse. - - Returns: - Response: Response object containing the results. - """ - - # Parse the query to get the conditions. - conditions = extract_comparison_conditions(query.where) - # Get the start and end times from the conditions. - params = {} - accepted_params = ['view', 'label', 'name'] - for op, arg1, arg2 in conditions: - if arg1 == 'accountId': - if op == '=': - params[arg1] = arg2 - elif op == '>': - params['startId'] = arg2 - elif op == '<': - params['endId'] = arg2 - else: - raise NotImplementedError - if arg1 in accepted_params: - if op != '=': - raise NotImplementedError - params[arg1] = arg2 - else: - raise NotImplementedError - - if query.limit is not None: - params['maxResults'] = query.limit.value - - # Get the accounts from the API. - accounts = self.handler. \ - call_application_api(method_name='get_accounts', params=params) - - selected_columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - if len(accounts) == 0: - accounts = pd.DataFrame([], columns=selected_columns) - else: - accounts.columns = self.get_columns() - for col in set(accounts.columns).difference(set(selected_columns)): - accounts = accounts.drop(col, axis=1) - return accounts - - def delete(self, query: ast.Delete): - """ - Deletes accounts from your Merchant Center account. - - Args: - query (ast.Delete): SQL query to parse. - - Returns: - Response: Response object containing the results. - """ - - # Parse the query to get the conditions. - conditions = extract_comparison_conditions(query.where) - # Get the start and end times from the conditions. - params = {} - for op, arg1, arg2 in conditions: - if arg1 == 'accountId': - if op == '=': - params[arg1] = arg2 - elif op == '>': - params['startId'] = arg2 - elif op == '<': - params['endId'] = arg2 - elif arg1 == 'force': - if op != '=': - raise NotImplementedError - params[arg1] = arg2 - else: - raise NotImplementedError - - # Delete the events in the Google Calendar API. - self.handler.call_application_api(method_name='delete_accounts', params=params) - - def get_columns(self) -> list: - """Gets all columns to be returned in pandas DataFrame responses""" - return [ - 'name', - 'kind', - 'websiteUrl', - 'adultContent', - 'sellerId', - 'users', - 'id', - 'youtubeChannelLinks', - 'googleMyBusinessLink', - 'businessInformation', - 'automaticImprovements', - 'adsLinks', - 'cssId', - 'labelIds', - 'accountManagement', - 'automaticLabelIds', - 'conversionSettings' - ] - - -class OrdersTable(APITable): - """ - Table class for the Google Content API for Shopping Orders table. - """ - - def select(self, query: ast.Select) -> DataFrame: - """ - Lists the orders in your Merchant Center account. - - Args: - query (ast.Select): SQL query to parse. - - Returns: - Response: Response object containing the results. - """ - - # Parse the query to get the conditions. - conditions = extract_comparison_conditions(query.where) - # Get the start and end times from the conditions. - params = {} - accepted_params = ['statuses', 'acknowledged'] - for op, arg1, arg2 in conditions: - if arg1 == 'orderId': - if op == '=': - params[arg1] = arg2 - elif op == '>': - params['startId'] = arg2 - elif op == '<': - params['endId'] = arg2 - else: - raise NotImplementedError - if arg1 == 'placedDateStart' or arg1 == 'placedDateEnd': - if op != '=': - raise NotImplementedError - params[arg1] = parse_utc_date(arg2) - if arg1 in accepted_params: - params[arg1] = parse_utc_date(arg2) - if op != '=': - raise NotImplementedError - params[arg1] = arg2 - else: - raise NotImplementedError - - if query.order_by is not None: - if query.order_by[0].value == 'placedDate': - if query.order_by[1].value == 'ASC': - params['orderBy'] = 'placedDateAsc' - else: - params['orderBy'] = 'placedDateDesc' - raise NotImplementedError - else: - raise NotImplementedError - - if query.limit is not None: - params['maxResults'] = query.limit.value - - # Get the orders from the API. - orders = self.handler. \ - call_application_api(method_name='get_orders', params=params) - - selected_columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - if len(orders) == 0: - orders = pd.DataFrame([], columns=selected_columns) - else: - orders.columns = self.get_columns() - for col in set(orders.columns).difference(set(selected_columns)): - orders = orders.drop(col, axis=1) - return orders - - def delete(self, query: ast.Delete): - """ - Deletes orders in your Merchant Center account. - - Args: - query (ast.Delete): SQL query to parse. - - Returns: - Response: Response object containing the results. - """ - - # Parse the query to get the conditions. - conditions = extract_comparison_conditions(query.where) - # Get the start and end times from the conditions. - params = {} - for op, arg1, arg2 in conditions: - if arg1 == 'orderId': - if op == '=': - params[arg1] = arg2 - elif op == '>': - params['startId'] = arg2 - elif op == '<': - params['endId'] = arg2 - else: - raise NotImplementedError - else: - raise NotImplementedError - - # Delete the events in the Google Calendar API. - self.handler.call_application_api(method_name='delete_orders', params=params) - - def get_columns(self) -> list: - """Gets all columns to be returned in pandas DataFrame responses""" - return [ - 'id', - 'merchantId', - 'merchantOrderId', - 'kind', - 'lineItems', - 'status', - 'paymentStatus', - 'acknowledged', - 'placedDate', - 'deliveryDetails', - 'customer', - 'shippingCost', - 'shippingCostTax', - 'refunds', - 'shipments', - 'billingAddress', - 'promotions', - 'taxCollector', - 'netPriceAmount', - 'netTaxAmount', - 'pickupDetails', - 'annotations' - ] - - -class ProductsTable(APITable): - """ - Table class for the Google Content API for Shopping Products table. - """ - - def select(self, query: ast.Select) -> DataFrame: - """ - Lists the products in your Merchant Center account. - - Args: - query (ast.Select): SQL query to parse. - - Returns: - Response: Response object containing the results. - """ - - # Parse the query to get the conditions. - conditions = extract_comparison_conditions(query.where) - params = {} - for op, arg1, arg2 in conditions: - if arg1 == 'productId': - if op == '=': - params[arg1] = arg2 - elif op == '>': - params['startId'] = arg2 - elif op == '<': - params['endId'] = arg2 - else: - raise NotImplementedError - else: - raise NotImplementedError - - if query.limit is not None: - params['maxResults'] = query.limit.value - - # Get the products from the API. - products = self.handler. \ - call_application_api(method_name='get_products', params=params) - - selected_columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - if len(products) == 0: - products = pd.DataFrame([], columns=selected_columns) - else: - products.columns = self.get_columns() - for col in set(products.columns).difference(set(selected_columns)): - products = products.drop(col, axis=1) - return products - - def update(self, query: ast.Update): - """ - Updates products in your Merchant Center account. - - Args: - query (ast.Update): SQL query to parse. - - Returns: - Response: Response object containing the results. - """ - - params = {} - values = query.values[0] - # Get the event data from the values. - accepted_params = self.get_columns() - for col, val in zip(query.update_columns, values): - if col in accepted_params: - params[col] = val - else: - raise NotImplementedError - - params['updateMask'] = ','.join(params.keys()) - # Parse the query to get the conditions. - conditions = extract_comparison_conditions(query.where) - # Get the start and end times from the conditions. - - for op, arg1, arg2 in conditions: - if arg1 == 'productId': - if op == '=': - params[arg1] = arg2 - elif op == '>': - params['startId'] = arg2 - elif op == '<': - params['endId'] = arg2 - else: - raise NotImplementedError - else: - raise NotImplementedError - - # Update the products in the Google Merchant Center API. - self.handler.call_application_api(method_name='update_products', params=params) - - def delete(self, query: ast.Delete): - """ - Deletes products in your Merchant Center account. - - Args: - query (ast.Delete): SQL query to parse. - - Returns: - Response: Response object containing the results. - """ - - # Parse the query to get the conditions. - conditions = extract_comparison_conditions(query.where) - # Get the start and end times from the conditions. - params = {} - for op, arg1, arg2 in conditions: - if arg1 == 'productId': - if op == '=': - params[arg1] = arg2 - elif op == '>': - params['startId'] = arg2 - elif op == '<': - params['endId'] = arg2 - else: - raise NotImplementedError - elif arg1 == 'feedId': - if op != '=': - raise NotImplementedError - params[arg1] = arg2 - else: - raise NotImplementedError - - # Delete the products in the Google Merchant Center API. - self.handler.call_application_api(method_name='delete_products', params=params) - - def get_columns(self) -> list: - """Gets all columns to be returned in pandas DataFrame responses""" - return [ - 'id', - 'offerId', - 'title', - 'description', - 'link', - 'imageLink', - 'contentLanguage', - 'targetCountry', - 'channel', - 'channelExclusivity', - 'price', - 'salePrice', - 'salePriceEffectiveDate', - 'gtin', - 'mpn', - 'brand', - 'condition', - 'adult', - 'multipack', - 'isBundle', - 'energyEfficiencyClass', - 'minEnergyEfficiencyClass', - 'maxEnergyEfficiencyClass', - 'ageGroup', - 'color', - 'expirationDate', - 'disclosureDate', - 'availability', - 'source' - ] diff --git a/mindsdb/integrations/handlers/google_content_shopping_handler/icon.svg b/mindsdb/integrations/handlers/google_content_shopping_handler/icon.svg deleted file mode 100644 index ae5eff44ccc..00000000000 --- a/mindsdb/integrations/handlers/google_content_shopping_handler/icon.svg +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/google_content_shopping_handler/requirements.txt b/mindsdb/integrations/handlers/google_content_shopping_handler/requirements.txt deleted file mode 100644 index d13929813b0..00000000000 --- a/mindsdb/integrations/handlers/google_content_shopping_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -google-api-python-client -google-auth \ No newline at end of file diff --git a/mindsdb/integrations/handlers/google_content_shopping_handler/tests/__init__.py b/mindsdb/integrations/handlers/google_content_shopping_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/google_content_shopping_handler/tests/test_google_content_shopping_handler.py b/mindsdb/integrations/handlers/google_content_shopping_handler/tests/test_google_content_shopping_handler.py deleted file mode 100644 index e548e4af785..00000000000 --- a/mindsdb/integrations/handlers/google_content_shopping_handler/tests/test_google_content_shopping_handler.py +++ /dev/null @@ -1,64 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.google_content_shopping_handler.google_content_shopping_handler import \ - GoogleContentShoppingHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class GoogleSearchConsoleHandlerTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "merchant_id": "1234567890", - "credentials": "/path/to/credentials.json" - }, - "file_storage": "/path/to/file_storage" - } - cls.handler = GoogleContentShoppingHandler('test_google_content_shopping_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_2_select_accounts_query(self): - query = "SELECT * FROM accounts LIMIT 10" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_3_select_orders_query(self): - query = "SELECT kind FROM orders WHERE orderId > 100" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_4_select_products_query(self): - query = "SELECT price FROM products WHERE brand = 'Google' LIMIT 100" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_delete_accounts_query(self): - query = "DELETE FROM accounts WHERE accountId = '1234567890'" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_6_delete_orders_query(self): - query = "DELETE FROM orders WHERE orderId < '1234567890'" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_7_delete_products_query(self): - query = "DELETE FROM products WHERE brand = 'Google'" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_8_update_products_query(self): - query = "UPDATE products SET price = 100 WHERE productId > 120 AND updateMask = 'price'" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/google_fit_handler/README.md b/mindsdb/integrations/handlers/google_fit_handler/README.md deleted file mode 100644 index 50993e26ae9..00000000000 --- a/mindsdb/integrations/handlers/google_fit_handler/README.md +++ /dev/null @@ -1,54 +0,0 @@ -# Google Fit API Handler - -To start connecting to your Google Fit app, visit [Google Fit Authorization](https://developers.google.com/fit/rest/v1/get-started) to obtain an authorization from Google and use the credentials.json file for the following steps. - -To create a database connected to Google Fit, you can either specify a path to the credentials file or manually input the credentials fields - -To connect using a path to the credentials file, run: -``` -CREATE DATABASE my_google_fit -With - ENGINE = 'google_fit', - PARAMETERS = { - "service_account_file": "Absolute path to the credentials file" - }; -``` -To connect using manually typed credentials, run: -``` -CREATE DATABASE my_google_fit -With - ENGINE = 'google_fit', - PARAMETERS = { - "service_account_json": { - "client_id": "cient id from the credentials file", - "project_id": "project id from the credentials file", - "auth_uri": "auth_uri from the credentials file", - "token_uri": "token uri from the credentials file", - "auth_provider_x509_cert_url": "auth_provider_x509_cert_url from the credentials file", - "client_secret": "client secret from the credentials file" - } - }; -``` - -This creates a database called my_google_fit. This database contains a table called aggregated_data that we can use to obtain our Google Fit data such as the step count. - - -## Searching for step count in SQL - -To search your step count data based on a time - -``` -SELECT * -FROM my_google_fit -WHERE - date > 'year-month-day' -LIMIT 20; -``` - -Note that in the WHERE clause, '>' means that the date is the start date and the end date is the current date, '<' means that the date is the end date and the start date is about one month ago. -If WHERE clause is not supplied, the data is of the last month by default. - -Once you have the data, you can utilize MindsDB's AI features to manipulate and extract information from it. - -## Special Notice -This is still a draft handler, and we will keep on adding features such as the options to search for other types of data based on the query. diff --git a/mindsdb/integrations/handlers/google_fit_handler/__about__.py b/mindsdb/integrations/handlers/google_fit_handler/__about__.py deleted file mode 100644 index 4e9ebab76bb..00000000000 --- a/mindsdb/integrations/handlers/google_fit_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Google Fit handler' -__package_name__ = 'mindsdb_google_fit_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for the Google Fit APIs" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/google_fit_handler/__init__.py b/mindsdb/integrations/handlers/google_fit_handler/__init__.py deleted file mode 100644 index 4fcf115e47a..00000000000 --- a/mindsdb/integrations/handlers/google_fit_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .google_fit_handler import GoogleFitHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Google Fit' -name = 'google_fit' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/google_fit_handler/google_fit_handler.py b/mindsdb/integrations/handlers/google_fit_handler/google_fit_handler.py deleted file mode 100644 index 1d47391bd7c..00000000000 --- a/mindsdb/integrations/handlers/google_fit_handler/google_fit_handler.py +++ /dev/null @@ -1,169 +0,0 @@ -import os.path -import json -import pandas as pd -import pytz -from tzlocal import get_localzone -from datetime import datetime - -from google.auth.transport.requests import Request -from google.oauth2.credentials import Credentials -from google_auth_oauthlib.flow import InstalledAppFlow -from googleapiclient.discovery import Resource -from googleapiclient.discovery import build -from googleapiclient.errors import HttpError -from mindsdb_sql_parser import parse_sql - -from mindsdb.utilities import log -from mindsdb.integrations.handlers.google_fit_handler.google_fit_tables import GoogleFitTable -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, -) - -SCOPES = ["https://www.googleapis.com/auth/fitness.activity.read"] -DATE_FORMAT = "%Y-%m-%d" - -logger = log.getLogger(__name__) - - -class GoogleFitHandler(APIHandler): - def __init__(self, name: str = None, **kwargs): - super().__init__(name) - args = kwargs.get("connection_data", {}) - self.connection_args = {} - self.credentials_path = None - if "service_account_file" in args: - if os.path.isfile(args["service_account_file"]) is False: - raise Exception("service_account_file must be a path to the credentials.json file") - self.credentials_path = args["service_account_file"] - elif "service_account_json" in args: - self.connection_args = args["service_account_json"] - if ( - not isinstance(self.connection_args, dict) - or (("redirect_uris" not in self.connection_args.keys()) and len(self.connection_args) != 6) - or ("redirect_uris" in self.connection_args.keys()) - and len(self.connection_args) != 7 - ): - raise Exception("service_account_json has to be a dictionary with all 6 required fields") - self.connection_args["redirect_uris"] = ["http://localhost"] - self.credentials_path = "mindsdb/integrations/handlers/google_fit_handler/credentials.json" - else: - raise Exception("Connection args have to content ether service_account_file or service_account_json") - - self.api = None - self.is_connected = False - - aggregated_data = GoogleFitTable(self) - self._register_table("aggregated_data", aggregated_data) - - def connect(self) -> Resource: - if self.is_connected is True and self.api: - return self.api - if self.connection_args: - credentialDict = {"installed": self.connection_args} - f = open(self.credentials_path, "w") - f.write(json.dumps(credentialDict).replace(" ", "")) - f.close() - - creds = None - - if os.path.isfile("mindsdb/integrations/handlers/google_fit_handler/token.json"): - creds = Credentials.from_authorized_user_file( - "mindsdb/integrations/handlers/google_fit_handler/token.json", SCOPES - ) - if not creds or not creds.valid: - if creds and creds.expired and creds.refresh_token: - creds.refresh(Request()) - else: - flow = InstalledAppFlow.from_client_secrets_file(self.credentials_path, SCOPES) - creds = flow.run_local_server(port=0) - with open("mindsdb/integrations/handlers/google_fit_handler/token.json", "w") as token: - token.write(creds.to_json()) - self.api = build("fitness", "v1", credentials=creds) - - self.is_connected = True - return self.api - - def check_connection(self) -> StatusResponse: - response = StatusResponse(False) - - try: - self.connect() - response.success = True - - except Exception as e: - logger.error(f"Error connecting to Google Fit API: {e}!") - response.error_message = e - - self.is_connected = response.success - return response - - def retrieve_data(self, service, startTimeMillis, endTimeMillis, dataSourceId) -> dict: - try: - return ( - service.users() - .dataset() - .aggregate( - userId="me", - body={ - "aggregateBy": [{"dataTypeName": "com.google.step_count.delta", "dataSourceId": dataSourceId}], - "bucketByTime": {"durationMillis": 86400000}, - "startTimeMillis": startTimeMillis, - "endTimeMillis": endTimeMillis, - }, - ) - .execute() - ) - except HttpError: - raise HttpError - - def native_query(self, query: str = None) -> Response: - """Receive raw query and act upon it somehow. - Args: - query (Any): query in native format (str for sql databases, - api's json etc) - Returns: - HandlerResponse - """ - ast = parse_sql(query) - return self.query(ast) - - def get_steps(self, start_time_millis, end_time_millis) -> pd.DataFrame: - steps = {} - steps_data = self.retrieve_data( - self.api, - start_time_millis, - end_time_millis, - "derived:com.google.step_count.delta:com.google.android.gms:estimated_steps", - ) - for daily_step_data in steps_data["bucket"]: - local_date = datetime.fromtimestamp( - int(daily_step_data["startTimeMillis"]) / 1000, tz=pytz.timezone(str(get_localzone())) - ) - local_date_str = local_date.strftime(DATE_FORMAT) - - data_point = daily_step_data["dataset"][0]["point"] - if data_point: - count = data_point[0]["value"][0]["intVal"] - data_source_id = data_point[0]["originDataSourceId"] - steps[local_date_str] = {"steps": count, "originDataSourceId": data_source_id} - ret = pd.DataFrame.from_dict(steps) - ret = ret.T - ret = ret.drop("originDataSourceId", axis=1) - ret = ret.reset_index(drop=False) - return ret - - def call_google_fit_api(self, method_name: str = None, params: dict = None) -> pd.DataFrame: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INSERT, DELETE, etc - Returns: - DataFrame - """ - self.connect() - if method_name == "get_steps": - val = self.get_steps(params["start_time"], params["end_time"]) - return val - raise NotImplementedError("Method name {} not supported by Google Fit Handler".format(method_name)) diff --git a/mindsdb/integrations/handlers/google_fit_handler/google_fit_tables.py b/mindsdb/integrations/handlers/google_fit_handler/google_fit_tables.py deleted file mode 100644 index 769e5c82187..00000000000 --- a/mindsdb/integrations/handlers/google_fit_handler/google_fit_tables.py +++ /dev/null @@ -1,67 +0,0 @@ -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.libs.response import HandlerResponse as Response -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb_sql_parser import ast -import datetime -import pytz -import time -from tzlocal import get_localzone - - -class GoogleFitTable(APITable): - - def time_parser(self, args) -> int: - """ - Receive raw date string and return the calculated milliseconds based on the time string. - Args: - args: time string in the format of YYYY-MM-DD - Returns: - the input time string in the format of milliseconds - """ - ymd = args.split('-') - epoch0 = datetime.datetime(1970, 1, 1, tzinfo=pytz.utc) - time = pytz.timezone(str(get_localzone())).localize(datetime.datetime(int(ymd[0].rstrip()), int(ymd[1].rstrip()), int(ymd[2].rstrip()))) - return int((time - epoch0).total_seconds() * 1000) - - def select(self, query: ast.Select) -> Response: - - conditions = extract_comparison_conditions(query.where) - - params = {} - # get the local time - now = int(round(time.time() * 1000)) - - # hard coded for now as user default query time period - one_month = 2629746000 - for op, arg1, arg2 in conditions: - if op == 'or': - raise NotImplementedError('OR is not supported') - if arg1 == 'date': - date = self.time_parser(arg2) - if op == '>': - params['start_time'] = date - params['end_time'] = now - - # hard coded as a month - elif op == '<': - params['start_time'] = date - one_month - params['end_time'] = date - else: - raise NotImplementedError - else: - raise NotImplementedError('This query is not supported') - # if time is not provided in the query, the time range is one month ago to now - if not params: - params['start_time'] = now - one_month - params['end_time'] = now - result = self.handler.call_google_fit_api( - method_name='get_steps', - params=params - ) - return result - - def get_columns(self): - return [ - 'dates', - 'steps' - ] diff --git a/mindsdb/integrations/handlers/google_fit_handler/icon.svg b/mindsdb/integrations/handlers/google_fit_handler/icon.svg deleted file mode 100644 index cbae266795a..00000000000 --- a/mindsdb/integrations/handlers/google_fit_handler/icon.svg +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/google_fit_handler/requirements.txt b/mindsdb/integrations/handlers/google_fit_handler/requirements.txt deleted file mode 100644 index 16dccc7bae5..00000000000 --- a/mindsdb/integrations/handlers/google_fit_handler/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -tzlocal -google -google-api-python-client -tzlocal -google-auth -google-auth-oauthlib \ No newline at end of file diff --git a/mindsdb/integrations/handlers/google_gemini_handler/README.md b/mindsdb/integrations/handlers/google_gemini_handler/README.md deleted file mode 100644 index 7c3a46f1fab..00000000000 --- a/mindsdb/integrations/handlers/google_gemini_handler/README.md +++ /dev/null @@ -1,182 +0,0 @@ ---- -title: Google Gemini -sidebarTitle: Google Gemini ---- - -This documentation describes the integration of MindsDB with [Google Gemini](), a generative artificial intelligence model developed by Google. The integration allows for the deployment of Google Gemini models within MindsDB, providing the models with access to data from various data sources. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To use Google Gemini within MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Obtain the Google Gemini API key required to deploy and use Google Gemini models within MindsDB. Follow the [instructions for obtaining the API key](https://ai.google.dev/gemini-api/docs/api-key). - -## Setup - -Create an AI engine from the [Google Gemini handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/google_gemini_handler). - -```sql -CREATE ML_ENGINE google_gemini_engine -FROM google_gemini -USING - api_key = 'api-key-value'; -``` - -## Example Usage - -### Create Gemini Pro Model (Prompt-Template) - -```sql -CREATE MODEL gem_p -PREDICT answer -USING - engine = 'google_gemini_engine', - prompt_template = 'Product Description: {{description}}. Question: {{question}}. Answer:', - model_name = 'gemini-pro'; -``` - -```sql -SELECT answer -FROM gem_p -WHERE description = " -What is Rabbit R1? -The Rabbit R1 is a pocket-sized AI device that promises a simpler and more intuitive way to interact with technology. Instead of being app-driven, the device relies on an AI model called LAMB (large action model) to understand your instructions and complete tasks autonomously. -The device has a bright orange body, and is small and lightweight with a touchscreen, scroll wheel, and a talk button. There is also a rotating camera that functions as eyes of the device. - -The Rabbit R1 runs on its own operating system, called the Rabbit OS, that eliminates the need for app stores and downloads, requiring only natural language voice input to navigate. The initial version supports integration with the likes of Uber, Spotify, and Amazon, with the AI able to train and learn using other apps in the future. -" -AND question = 'What are some key feature bullet points of this product?'; -``` - -### Create Gemini Contextual Model (Column-based) - -```sql -CREATE MODEL gemini_c -PREDICT answer -USING - engine = 'google_gemini_engine', - question_column = 'question', - context_column = 'context', - model_name = "gemini-pro"; -``` - -```sql -SELECT answer -FROM gem_qc -WHERE context = "Ashoka the Great was an Indian emperor of the Maurya Dynasty who ruled from 268 to 232 BCE. He is regarded as one of India's greatest emperors, known for his extensive empire, his efforts to spread Buddhism, and his commitment to non-violence and peaceful coexistence." -AND question = 'Ashoka was from which dynasty?'; -``` - -### Vision Mode Query - -```sql -CREATE MODEL gem_v -PREDICT answer -USING - engine = 'google_gemini_engine', - mode = 'vision', - img_url = 'url', - ctx_column = 'context'; -``` - -```sql -SELECT * -FROM gem_v -WHERE url = 'https://images.unsplash.com/photo-1589762738975-a6773160c7d7?q=80&w=1374&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D' -AND context = 'Is this man a superhuman?'; -``` - -### Embedding Mode - -```sql -CREATE MODEL gem_e -PREDICT answer -USING - engine = 'google_gemini_engine', - mode = 'embedding', - model_name = 'models/embedding-001', - question_column = 'question', - context_column = 'context', - title_column = 'title'; -- OPTIONAL -``` - -```sql -SELECT question, answer -FROM gem_e -WHERE question = 'How many moons are there in the solar system?' -USING - type = 'document'; -``` - -### JSON-Struct Mode - -```sql -CREATE MODEL product_extract_json -PREDICT json -USING - engine = 'google_gemini_engine', - json_struct = { - 'product_name': 'name', - 'product_category': 'category', - 'product_price': 'price' - }, - input_text = 'description'; -``` - -```sql -SELECT json -FROM product_extract_json -WHERE description = " -What is Rabbit R1? -The Rabbit R1 is a pocket-sized AI device that promises a simpler and more intuitive way to interact with technology. Instead of being app-driven, the device relies on an AI model called LAMB (large action model) to understand your instructions and complete tasks autonomously. -The device has a bright orange body, and is small and lightweight with a touchscreen, scroll wheel, and a talk button. There is also a rotating camera that functions as eyes of the device. IT provides all of this just for 300 dollars. - -The Rabbit R1 runs on its own operating system, called the Rabbit OS, that eliminates the need for app stores and downloads, requiring only natural language voice input to navigate. The initial version supports integration with the likes of Uber, Spotify, and Amazon, with the AI able to train and learn using other apps in the future. -"; -``` - -**Output** - -![image](https://github.com/mindsdb/mindsdb/assets/75653580/aad51d3f-4458-4bcc-b4a3-07983496d2fe) - - -### Create a model to generate text completions with the Gemini Pro model for your existing text data. - -```sql -CREATE MODEL google_gemini_model -PREDICT answer -USING - engine = 'google_gemini_engine', - column = 'question', - model = 'gemini-pro'; -``` - -### Query the model to get predictions. - -```sql -SELECT question, answer -FROM google_gemini_model -WHERE question = 'How are you?'; -``` - -### Query for batch predictions - -```sql -SELECT t.question, m.answer -FROM google_gemini_model AS m -JOIN data_table AS t; -``` - -### Describe Gemini Pro Model Metadata - -```sql -DESCRIBE MODEL `MODEL_NAME`.metadata; -``` - - -**Next Steps** - -Go to the [Use Cases](/use-cases/overview) section to see more examples. - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/google_gemini_handler/__about__.py b/mindsdb/integrations/handlers/google_gemini_handler/__about__.py deleted file mode 100644 index b6149264191..00000000000 --- a/mindsdb/integrations/handlers/google_gemini_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Google Gemini handler" -__package_name__ = "mindsdb_google_gemini_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for the Google Gemini (Bard) API" -__author__ = "someshfengde" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/google_gemini_handler/__init__.py b/mindsdb/integrations/handlers/google_gemini_handler/__init__.py deleted file mode 100644 index da9c07809c7..00000000000 --- a/mindsdb/integrations/handlers/google_gemini_handler/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE -from .__about__ import __version__ as version, __description__ as description -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - -try: - from .google_gemini_handler import GoogleGeminiHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Google Gemini" -name = "google_gemini" -type = HANDLER_TYPE.ML -icon_path = "icon.svg" -permanent = False -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/google_gemini_handler/google_gemini_handler.py b/mindsdb/integrations/handlers/google_gemini_handler/google_gemini_handler.py deleted file mode 100644 index 4e8e6b7fc19..00000000000 --- a/mindsdb/integrations/handlers/google_gemini_handler/google_gemini_handler.py +++ /dev/null @@ -1,370 +0,0 @@ -import os -from typing import Dict, Optional - -from PIL import Image -import requests -import numpy as np -from io import BytesIO -import json -import textwrap -import google.generativeai as genai -import pandas as pd -from mindsdb.integrations.libs.base import BaseMLEngine -from mindsdb.utilities import log -from mindsdb.utilities.config import Config -from mindsdb.integrations.libs.llm.utils import get_completed_prompts -import concurrent.futures - -logger = log.getLogger(__name__) - - -class GoogleGeminiHandler(BaseMLEngine): - """ - Integration with the Google generative AI Python Library - """ - - name = "google_gemini" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.default_model = "gemini-pro" - self.default_embedding_model = "models/embedding-001" - self.generative = True - self.mode = "default" - - # Similiar to openai handler - @staticmethod - def create_validation(target, args=None, **kwargs): - if "using" not in args: - raise Exception( - "Gemini engine requires a USING clause! Refer to its documentation for more details." - ) - else: - args = args["using"] - - if ( - len( - set(args.keys()) - & { - "img_url", - "input_text", - "question_column", - "prompt_template", - "json_struct", - "prompt", - } - ) - == 0 - ): - raise Exception( - "One of `question_column`, `prompt_template` or `json_struct` is required for this engine." - ) - - keys_collection = [ - ["prompt_template"], - ["question_column", "context_column"], - ["prompt", "user_column", "assistant_column"], - ["json_struct", "input_text"], - ["img_url", "ctx_column"], - ] - for keys in keys_collection: - if keys[0] in args and any( - x[0] in args for x in keys_collection if x != keys - ): - raise Exception( - textwrap.dedent( - """\ - Please provide one of - 1) a `prompt_template` - 2) a `question_column` and an optional `context_column` - 3) a `json_struct` - 4) a `prompt' and 'user_column' and 'assistant_column` - 5) a `img_url` and optional `ctx_column` for mode=`vision` - """ - ) - ) - - # for all args that are not expected, raise an error - known_args = set() - # flatten of keys_collection - for keys in keys_collection: - known_args = known_args.union(set(keys)) - - # TODO: need a systematic way to maintain a list of known args - known_args = known_args.union( - { - "target", - "model_name", - "mode", - "title_column", - "predict_params", - "type", - "max_tokens", - "temperature", - "api_key", - } - ) - - unknown_args = set(args.keys()) - known_args - if unknown_args: - # return a list of unknown args as a string - raise Exception( - f"Unknown arguments: {', '.join(unknown_args)}.\n Known arguments are: {', '.join(known_args)}" - ) - - def create(self, target, args=None, **kwargs): - args = args["using"] - args["target"] = target - self.model_storage.json_set("args", args) - - def predict( - self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None - ) -> pd.DataFrame: - pred_args = args["predict_params"] if args else {} - args = self.model_storage.json_get("args") - df = df.reset_index(drop=True) - - # same as opeani handler for getting prompt template and mode - if pred_args.get("prompt_template", False): - base_template = pred_args[ - "prompt_template" - ] # override with predict-time template if available - elif args.get("prompt_template", False): - base_template = args["prompt_template"] - else: - base_template = None - - # Embedding Mode - if args.get("mode") == "embedding": - args["type"] = pred_args.get("type", "query") - return self.embedding_worker(args, df) - - elif args.get("mode") == "vision": - return self.vision_worker(args, df) - - elif args.get("mode") == "conversational": - # Enable chat mode using - # https://ai.google.dev/tutorials/python_quickstart#chat_conversations - # OR - # https://github.com/google/generative-ai-python?tab=readme-ov-file#developers-who-use-the-palm-api - pass - - else: - if args.get("prompt_template", False): - prompts, empty_prompt_ids = get_completed_prompts(base_template, df) - - # Disclaimer: The following code has been adapted from the OpenAI handler. - elif args.get("context_column", False): - empty_prompt_ids = np.where( - df[[args["context_column"], args["question_column"]]] - .isna() - .all(axis=1) - .values - )[0] - contexts = list(df[args["context_column"]].apply(lambda x: str(x))) - questions = list(df[args["question_column"]].apply(lambda x: str(x))) - prompts = [ - f"Give only answer for: \nContext: {c}\nQuestion: {q}\nAnswer: " - for c, q in zip(contexts, questions) - ] - - # Disclaimer: The following code has been adapted from the OpenAI handler. - elif args.get("json_struct", False): - empty_prompt_ids = np.where( - df[[args["input_text"]]].isna().all(axis=1).values - )[0] - prompts = [] - for i in df.index: - if "json_struct" in df.columns: - if isinstance(df["json_struct"][i], str): - df["json_struct"][i] = json.loads(df["json_struct"][i]) - json_struct = "" - for ind, val in enumerate(df["json_struct"][i].values()): - json_struct = json_struct + f"{ind}. {val}\n" - else: - json_struct = "" - for ind, val in enumerate(args["json_struct"].values()): - json_struct = json_struct + f"{ind + 1}. {val}\n" - - p = textwrap.dedent( - f"""\ - Using text starting after 'The text is:', give exactly {len(args['json_struct'])} answers to the questions: - {{{{json_struct}}}} - - Answers should be in the same order as the questions. - Answer should be in form of one JSON Object eg. {"{'key':'value',..}"} where key=question and value=answer. - If there is no answer to the question in the text, put a -. - Answers should be as short as possible, ideally 1-2 words (unless otherwise specified). - - The text is: - {{{{{args['input_text']}}}}} - """ - ) - p = p.replace("{{json_struct}}", json_struct) - for column in df.columns: - if column == "json_struct": - continue - p = p.replace(f"{{{{{column}}}}}", str(df[column][i])) - prompts.append(p) - elif "prompt" in args: - empty_prompt_ids = [] - prompts = list(df[args["user_column"]]) - else: - empty_prompt_ids = np.where( - df[[args["question_column"]]].isna().all(axis=1).values - )[0] - prompts = list(df[args["question_column"]].apply(lambda x: str(x))) - - # remove prompts without signal from completion queue - prompts = [j for i, j in enumerate(prompts) if i not in empty_prompt_ids] - - api_key = self._get_google_gemini_api_key(args) - genai.configure(api_key=api_key) - - # called gemini model withinputs - model = genai.GenerativeModel(args.get("model_name", self.default_model)) - results = [] - for m in prompts: - results.append(model.generate_content(m).text) - - pred_df = pd.DataFrame(results, columns=[args["target"]]) - return pred_df - - def _get_google_gemini_api_key(self, args, strict=True): - """ - API_KEY preference order: - 1. provided at model creation - 2. provided at engine creation - 3. GOOGLE_GENAI_API_KEY env variable - 4. google_gemini.api_key setting in config.json - """ - - if "api_key" in args: - return args["api_key"] - # 2 - connection_args = self.engine_storage.get_connection_args() - if "api_key" in connection_args: - return connection_args["api_key"] - # 3 - api_key = os.getenv("GOOGLE_GENAI_API_KEY") - if api_key is not None: - return api_key - # 4 - config = Config() - google_gemini_config = config.get("google_gemini", {}) - if "api_key" in google_gemini_config: - return google_gemini_config["api_key"] - - if strict: - raise Exception( - 'Missing API key "api_key". Either re-create this ML_ENGINE specifying the `api_key` parameter,\ - or re-create this model and pass the API key with `USING` syntax.' - ) - - def embedding_worker(self, args: Dict, df: pd.DataFrame): - if args.get("question_column"): - prompts = list(df[args["question_column"]].apply(lambda x: str(x))) - if args.get("title_column", None): - titles = list(df[args["title_column"]].apply(lambda x: str(x))) - else: - titles = None - - api_key = self._get_google_gemini_api_key(args) - genai.configure(api_key=api_key) - model_name = args.get("model_name", self.default_embedding_model) - task_type = args.get("type") - task_type = f"retrieval_{task_type}" - - if task_type == "retrieval_query": - results = [ - str( - genai.embed_content( - model=model_name, content=query, task_type=task_type - )["embedding"] - ) - for query in prompts - ] - elif titles: - results = [ - str( - genai.embed_content( - model=model_name, - content=doc, - task_type=task_type, - title=title, - )["embedding"] - ) - for title, doc in zip(titles, prompts) - ] - else: - results = [ - str( - genai.embed_content( - model=model_name, content=doc, task_type=task_type - )["embedding"] - ) - for doc in prompts - ] - - pred_df = pd.DataFrame(results, columns=[args["target"]]) - return pred_df - else: - raise Exception("Embedding mode needs a question_column") - - def vision_worker(self, args: Dict, df: pd.DataFrame): - def get_img(url): - # URL Validation - response = requests.get(url) - if response.status_code == 200 and response.headers.get( - "content-type", "" - ).startswith("image/"): - return Image.open(BytesIO(response.content)) - else: - raise Exception(f"{url} is not vaild image URL..") - - if args.get("img_url"): - urls = list(df[args["img_url"]].apply(lambda x: str(x))) - - else: - raise Exception("Vision mode needs a img_url") - - prompts = None - if args.get("ctx_column"): - prompts = list(df[args["ctx_column"]].apply(lambda x: str(x))) - - api_key = self._get_google_gemini_api_key(args) - genai.configure(api_key=api_key) - model = genai.GenerativeModel("gemini-pro-vision") - with concurrent.futures.ThreadPoolExecutor() as executor: - # Download images concurrently using ThreadPoolExecutor - imgs = list(executor.map(get_img, urls)) - # imgs = [Image.open(BytesIO(requests.get(url).content)) for url in urls] - if prompts: - results = [ - model.generate_content([img, text]).text - for img, text in zip(imgs, prompts) - ] - else: - results = [model.generate_content(img).text for img in imgs] - - pred_df = pd.DataFrame(results, columns=[args["target"]]) - - return pred_df - - # Disclaimer: The following code has been adapted from the OpenAI handler. - def describe(self, attribute: Optional[str] = None) -> pd.DataFrame: - - args = self.model_storage.json_get("args") - - if attribute == "args": - return pd.DataFrame(args.items(), columns=["key", "value"]) - elif attribute == "metadata": - api_key = self._get_google_gemini_api_key(args) - genai.configure(api_key=api_key) - model_name = args.get("model_name", self.default_model) - - meta = genai.get_model(f"models/{model_name}").__dict__ - return pd.DataFrame(meta.items(), columns=["key", "value"]) - else: - tables = ["args", "metadata"] - return pd.DataFrame(tables, columns=["tables"]) diff --git a/mindsdb/integrations/handlers/google_gemini_handler/icon.svg b/mindsdb/integrations/handlers/google_gemini_handler/icon.svg deleted file mode 100644 index 6eeb0572ccb..00000000000 --- a/mindsdb/integrations/handlers/google_gemini_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/google_gemini_handler/requirements.txt b/mindsdb/integrations/handlers/google_gemini_handler/requirements.txt deleted file mode 100644 index c0ae207c122..00000000000 --- a/mindsdb/integrations/handlers/google_gemini_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -google-generativeai==0.3.2 -pillow diff --git a/mindsdb/integrations/handlers/greptimedb_handler/README.md b/mindsdb/integrations/handlers/greptimedb_handler/README.md deleted file mode 100644 index 90c2dea2b68..00000000000 --- a/mindsdb/integrations/handlers/greptimedb_handler/README.md +++ /dev/null @@ -1,42 +0,0 @@ -## Implementation - -This handler is implemented by extending the MySQLHandler. - -Connect GreptimeDB to MindsDB by providing the following parameters: - -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `database` is the database name. -* `user` is the database user. -* `password` is the database password. - -There are several optional parameters that can be used as well. - -* `ssl` is the `ssl` parameter value that indicates whether SSL is enabled (`True`) or disabled (`False`). -* `ssl_ca` is the SSL Certificate Authority. -* `ssl_cert` stores SSL certificates. -* `ssl_key` stores SSL keys. - -## Usage - -In order to make use of this handler and connect to the GreptimeDB database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE greptimedb_datasource -WITH - engine = 'greptimedb', - parameters = { - "host": "127.0.0.1", - "port": 4002, - "database": "public", - "user": "username", - "password": "password" - }; -``` - -You can use this established connection to query your table as follows. - -```sql -SELECT * -FROM greptimedb_datasource.example_table; -``` diff --git a/mindsdb/integrations/handlers/greptimedb_handler/__about__.py b/mindsdb/integrations/handlers/greptimedb_handler/__about__.py deleted file mode 100644 index 1730b33e720..00000000000 --- a/mindsdb/integrations/handlers/greptimedb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB GreptimeDB handler' -__package_name__ = 'mindsdb_greptimedb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for GreptimeDB" -__author__ = 'Ning Sun' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/greptimedb_handler/__init__.py b/mindsdb/integrations/handlers/greptimedb_handler/__init__.py deleted file mode 100644 index 79ce45d25d6..00000000000 --- a/mindsdb/integrations/handlers/greptimedb_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .greptimedb_handler import GreptimeDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'GreptimeDB' -name = 'greptimedb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/greptimedb_handler/greptimedb_handler.py b/mindsdb/integrations/handlers/greptimedb_handler/greptimedb_handler.py deleted file mode 100644 index 2650537424f..00000000000 --- a/mindsdb/integrations/handlers/greptimedb_handler/greptimedb_handler.py +++ /dev/null @@ -1,21 +0,0 @@ -""" -This is the GreptimeDB integration handler for mindsdb. It provides the routines -which provide for interacting with the database. - -Because GreptimeDB has built-in MySQL wire protocol support, this handler is simply - a subclass of mindsdb's MySQL handler -""" - -from mindsdb.integrations.handlers.mysql_handler import Handler as MySQLHandler - - -class GreptimeDBHandler(MySQLHandler): - """ - This handler handles connection and execution of GreptimeDB statements. - It's a subclass of default mysql handler - """ - - name = 'greptimedb' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/greptimedb_handler/icon.svg b/mindsdb/integrations/handlers/greptimedb_handler/icon.svg deleted file mode 100755 index 1a01a413072..00000000000 --- a/mindsdb/integrations/handlers/greptimedb_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/mindsdb/integrations/handlers/greptimedb_handler/requirements.txt b/mindsdb/integrations/handlers/greptimedb_handler/requirements.txt deleted file mode 100644 index ee467569031..00000000000 --- a/mindsdb/integrations/handlers/greptimedb_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/mysql_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/hackernews_handler/README.md b/mindsdb/integrations/handlers/hackernews_handler/README.md deleted file mode 100644 index 7c9f3c47780..00000000000 --- a/mindsdb/integrations/handlers/hackernews_handler/README.md +++ /dev/null @@ -1,79 +0,0 @@ -# HackerNews Handler - -HackerNews handler for MindsDB provides interfaces to connect to HackerNews via APIs and pull data into MindsDB. - ---- - -## Table of Contents - -- [About HackerNews](#about-hackernews) - - [HackerNews Handler Implementation](#hackernews-handler-implementation) - - [Implemented Features](#implemented-features) - - [TODO](#todo) - - [Example Usage](#example-usage) ---- -## About HackerNews - -HackerNews is a social news website that provides a platform for users to submit links, create content, and have discussions about various topics. It was created by the startup incubator Y Combinator. - -## HackerNews Handler Implementation - -This handler was implemented using the official HackerNews API. It provides a simple and easy-to-use interface to access the HackerNews API. - - -## Implemented Features - -- Fetch submissions from a subreddit based on sorting type and limit. -- (Add other implemented features here) - -## TODO - -- (List any pending features or improvements here) - -## Example Usage -``` -CREATE DATABASE my_hackernews -WITH -ENGINE = 'hackernews' -``` - -After setting up the HackerNews Handler, you can use SQL queries to fetch data from HackerNews: - -```sql -SELECT * -FROM my_hackernews.stories -LIMIT 2; -``` - -OR - -```sql -SELECT * -FROM my_hackernews.hnstories -LIMIT 5; -``` - -OR - -```SQL -SELECT * -FROM my_hackernews.jobstories -LIMIT 7; -``` - -OR - -```sql -SELECT * -FROM my_hackernews.showstories -LIMIT 5; -``` - -Each Post has a unique ID. You can use this ID to fetch comments for a particular post. - -```sql -SELECT * -FROM my_hackernews.comments -WHERE item_id=35662571 -LIMIT 1; -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/hackernews_handler/__about__.py b/mindsdb/integrations/handlers/hackernews_handler/__about__.py deleted file mode 100644 index 5d05e85a01e..00000000000 --- a/mindsdb/integrations/handlers/hackernews_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB HackerNews handler' -__package_name__ = 'mindsdb_hn_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for HackerNews" -__author__ = 'Maro Akpobi' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/hackernews_handler/__init__.py b/mindsdb/integrations/handlers/hackernews_handler/__init__.py deleted file mode 100644 index 35a24b2431a..00000000000 --- a/mindsdb/integrations/handlers/hackernews_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .hn_handler import ( - HackerNewsHandler as Handler - ) - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Hacker News' -name = 'hackernews' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/hackernews_handler/hn_handler.py b/mindsdb/integrations/handlers/hackernews_handler/hn_handler.py deleted file mode 100644 index 8ba6a06dee0..00000000000 --- a/mindsdb/integrations/handlers/hackernews_handler/hn_handler.py +++ /dev/null @@ -1,100 +0,0 @@ -import requests -import pandas as pd -from mindsdb.utilities import log -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse, HandlerResponse as Response, RESPONSE_TYPE -from .hn_table import StoriesTable, CommentsTable, HNStoriesTable, JobStoriesTable, ShowStoriesTable - -logger = log.getLogger(__name__) - - -class HackerNewsHandler(APIHandler): - """ - A class for handling connections and interactions with the Hacker News API. - """ - - def __init__(self, name=None, **kwargs): - super().__init__(name) - - self.base_url = 'https://hacker-news.firebaseio.com/v0' - - stories = StoriesTable(self) - self._register_table('stories', stories) - - hnstories = HNStoriesTable(self) - self._register_table('hnstories', hnstories) - - jobstories = JobStoriesTable(self) - self._register_table('jobstories', jobstories) - - showstories = ShowStoriesTable(self) - self._register_table('showstories', showstories) - - comments = CommentsTable(self) - self._register_table('comments', comments) - - def connect(self): - return - - def check_connection(self) -> StatusResponse: - try: - response = requests.get(f'{self.base_url}/maxitem.json') - response.raise_for_status() - return StatusResponse(True) - except Exception as e: - logger.error(f'Error checking connection: {e}') - return StatusResponse(False, str(e)) - - def native_query(self, query_string: str = None): - method_name, params = self.parse_native_query(query_string) - - df = self.call_hackernews_api(method_name, params) - - return Response( - RESPONSE_TYPE.TABLE, - data_frame=df - ) - - def get_df_from_class(self, table: StoriesTable = None, limit: int = None): - url = f'{self.base_url}/{table.json_endpoint}' - response = requests.get(url) - data = response.json() - stories_data = [] - if limit is None: - limit = len(data) - for story_id in data[:limit]: - url = f'{self.base_url}/item/{story_id}.json' - response = requests.get(url) - story_data = response.json() - stories_data.append(story_data) - return pd.DataFrame(stories_data, columns=table.columns) - - def call_hackernews_api(self, method_name: str = None, params: dict = None): - story_method_handlers = { - 'get_top_stories': StoriesTable, - 'ask_hn_stories': HNStoriesTable, - 'get_job_stories': JobStoriesTable, - 'show_hn_stories': ShowStoriesTable, - } - if method_name in story_method_handlers: - table = story_method_handlers[method_name] - df = self.get_df_from_class(table) - elif method_name == 'get_comments': - item_id = params.get('item_id') - url = f'{self.base_url}/item/{item_id}.json' - response = requests.get(url) - item_data = response.json() - if 'kids' in item_data: - comments_data = [] - for comment_id in item_data['kids']: - url = f'{self.base_url}/item/{comment_id}.json' - response = requests.get(url) - comment_data = response.json() - comments_data.append(comment_data) - df = pd.DataFrame(comments_data) - else: - df = pd.DataFrame() - else: - raise ValueError(f'Unknown method_name: {method_name}') - - return df diff --git a/mindsdb/integrations/handlers/hackernews_handler/hn_table.py b/mindsdb/integrations/handlers/hackernews_handler/hn_table.py deleted file mode 100644 index 11d88c788d2..00000000000 --- a/mindsdb/integrations/handlers/hackernews_handler/hn_table.py +++ /dev/null @@ -1,151 +0,0 @@ -import pandas as pd -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb_sql_parser import ast -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from typing import List - - -class StoriesTable(APITable): - json_endpoint = "topstories.json" - columns = ['id', 'time', 'title', 'url', 'score', 'descendants'] - - def select(self, query: ast.Select) -> pd.DataFrame: - """Select data from the stories table and return it as a pandas DataFrame. - Args: - query (ast.Select): The SQL query to be executed. - Returns: - pandas.DataFrame: A pandas DataFrame containing the selected data. - """ - hn_handler = self.handler - - # Extract the limit value from the SQL query, if it exists - limit = None - if query.limit is not None: - limit = query.limit.value - - df = hn_handler.get_df_from_class(self, limit) - - # Apply any WHERE clauses in the SQL query to the DataFrame - conditions = extract_comparison_conditions(query.where) - for condition in conditions: - if condition[0] == '=' and condition[1] == 'id': - df = df[df['id'] == int(condition[2])] - elif condition[0] == '>' and condition[1] == 'time': - timestamp = int(condition[2]) - df = df[df['time'] > timestamp] - - # Filter the columns in the DataFrame according to the SQL query - self.filter_columns(df, query) - - return df - - def get_columns(self): - """Get the list of column names for the stories table. - Returns: - list: A list of column names for the stories table. - """ - return self.columns - - def filter_columns(self, df, query): - """Filter the columns in the DataFrame according to the SQL query. - Args: - df (pandas.DataFrame): The DataFrame to filter. - query (ast.Select): The SQL query to apply to the DataFrame. - """ - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - columns.append(target.value) - df = df[columns] - return df - - -class HNStoriesTable(StoriesTable): - json_endpoint = "askstories.json" - columns = ['id', 'time', 'title', 'text', 'score', 'descendants'] - - -class JobStoriesTable(StoriesTable): - json_endpoint = "jobstories.json" - columns = ['id', 'time', 'title', 'url', 'score', 'type'] - - -class ShowStoriesTable(StoriesTable): - json_endpoint = "showstories.json" - columns = ['id', 'time', 'title', 'text', 'score', 'descendants'] - - -class CommentsTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - """Select data from the comments table and return it as a pandas DataFrame. - Args: - query (ast.Select): The SQL query to be executed. - Returns: - pandas.DataFrame: A pandas DataFrame containing the selected data. - """ - hn_handler = self.handler - - # Get the limit value from the SQL query, if it exists - limit = None - if query.limit is not None: - limit = query.limit.value - - # Get the item ID from the SQL query - item_id = None - conditions = extract_comparison_conditions(query.where) - for condition in conditions: - if condition[0] == '=' and condition[1] == 'item_id': - item_id = condition[2] - - if item_id is None: - raise ValueError('Item ID is missing in the SQL query') - - # Call the Hacker News API to get the comments for the specified item - comments_df = hn_handler.call_hackernews_api('get_comments', params={'item_id': item_id}) - - # Fill NaN values with 'deleted' - comments_df = comments_df.fillna('deleted') - # Filter the columns to those specified in the SQL query - self.filter_columns(comments_df, query) - - # Limit the number of results if necessary - if limit is not None: - comments_df = comments_df.head(limit) - - return comments_df - - def get_columns(self) -> List[str]: - """Get the list of column names for the comments table. - Returns: - list: A list of column names for the comments table. - """ - return [ - 'id', - 'by', - 'parent', - 'text', - 'time', - 'type', - ] - - def filter_columns(self, result: pd.DataFrame, query: ast.Select = None) -> None: - """Filter the columns of a DataFrame to those specified in an SQL query. - Args: - result (pandas.DataFrame): The DataFrame to filter. - query (ast.Select): The SQL query containing the column names to filter on. - """ - if query is None: - return - - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - return - elif isinstance(target, ast.Identifier): - columns.append(target.value) - - if len(columns) > 0: - result = result[columns] diff --git a/mindsdb/integrations/handlers/hackernews_handler/icon.svg b/mindsdb/integrations/handlers/hackernews_handler/icon.svg deleted file mode 100644 index 8df62c58f89..00000000000 --- a/mindsdb/integrations/handlers/hackernews_handler/icon.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/hana_handler/README.md b/mindsdb/integrations/handlers/hana_handler/README.md deleted file mode 100644 index cea583db9a2..00000000000 --- a/mindsdb/integrations/handlers/hana_handler/README.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: SAP HANA -sidebarTitle: SAP HANA ---- - -This documentation describes the integration of MindsDB with [SAP HANA](https://www.sap.com/products/technology-platform/hana/what-is-sap-hana.html), a multi-model database with a column-oriented in-memory design that stores data in its memory instead of keeping it on a disk. -The integration allows MindsDB to access data from SAP HANA and enhance SAP HANA with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect SAP HANA to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to SAP HANA from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/hana_handler) as an engine. - -```sql -CREATE DATABASE sap_hana_datasource -WITH - ENGINE = 'hana', - PARAMETERS = { - "address": "123e4567-e89b-12d3-a456-426614174000.hana.trial-us10.hanacloud.ondemand.com", - "port": "443", - "user": "demo_user", - "password": "demo_password", - "encrypt": true - }; -``` - -Required connection parameters include the following: - -* `address`: The hostname, IP address, or URL of the SAP HANA database. -* `port`: The port number for connecting to the SAP HANA database. -* `user`: The username for the SAP HANA database. -* `password`: The password for the SAP HANA database. - -Optional connection parameters include the following: - -* 'database': The name of the database to connect to. This parameter is not used for SAP HANA Cloud. -* `schema`: The database schema to use. Defaults to the user's default schema. -* `encrypt`: The setting to enable or disable encryption. Defaults to `True' - -## Usage - -Retrieve data from a specified table by providing the integration, schema and table names: - -```sql -SELECT * -FROM sap_hana_datasource.schema_name.table_name -LIMIT 10; -``` - -Run Teradata SQL queries directly on the connected Teradata database: - -```sql -SELECT * FROM sap_hana_datasource ( - - --Native Query Goes Here - SELECT customer, year, SUM(sales) - FROM t1 - GROUP BY ROLLUP(customer, year); - - SELECT customer, year, SUM(sales) - FROM t1 - GROUP BY GROUPING SETS - ( - (customer, year), - (customer) - ) - UNION ALL - SELECT NULL, NULL, SUM(sales) - FROM t1; - -); -``` - - -The above examples utilize `sap_hana_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the SAP HANA database. -* **Checklist**: - 1. Make sure the SAP HANA database is active. - 2. Confirm that address, port, user and password are correct. Try a direct connection using a client like DBeaver. - 3. Ensure a stable network between MindsDB and SAP HANA. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel-data - * Incorrect: SELECT * FROM integration.'travel-data' - * Correct: SELECT * FROM integration.\`travel-data\` - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/hana_handler/__about__.py b/mindsdb/integrations/handlers/hana_handler/__about__.py deleted file mode 100644 index 5ec64b8a7dd..00000000000 --- a/mindsdb/integrations/handlers/hana_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB SAP HANA handler' -__package_name__ = 'mindsdb_hana_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for SAP HANA" -__author__ = 'Sudipto Ghosh' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022 - mindsdb' diff --git a/mindsdb/integrations/handlers/hana_handler/__init__.py b/mindsdb/integrations/handlers/hana_handler/__init__.py deleted file mode 100644 index a517e5b687d..00000000000 --- a/mindsdb/integrations/handlers/hana_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .hana_handler import HanaHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'SAP HANA' -name = 'hana' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/hana_handler/connection_args.py b/mindsdb/integrations/handlers/hana_handler/connection_args.py deleted file mode 100644 index 1e389bf2b86..00000000000 --- a/mindsdb/integrations/handlers/hana_handler/connection_args.py +++ /dev/null @@ -1,57 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - address={ - 'type': ARG_TYPE.STR, - 'description': 'The hostname, IP address, or URL of the SAP HANA database.', - 'required': True, - 'label': 'Address' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The port number for connecting to the SAP HANA database.', - 'required': True, - 'label': 'Port' - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The username for the SAP HANA database.', - 'required': True, - 'label': 'User' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password for the SAP HANA database.', - 'secret': True, - 'required': True, - 'label': 'Password' - }, - schema={ - 'type': ARG_TYPE.STR, - 'description': "The database schema to use. Defaults to the user's default schema.", - 'required': False, - 'label': 'Schema' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The name of the database to connect to. This parameter is not used for SAP HANA Cloud.', - 'required': False, - 'label': 'Database' - }, - encrypt={ - 'type': ARG_TYPE.BOOL, - 'description': 'The setting to enable or disable encryption. Default is `True`.', - 'required': False, - 'label': 'Encrypt' - } -) - -connection_args_example = OrderedDict( - host='123e4567-e89b-12d3-a456-426614174000.hana.trial-us10.hanacloud.ondemand.com', - port=30013, - user='DBADMIN', - password='password' -) diff --git a/mindsdb/integrations/handlers/hana_handler/hana_handler.py b/mindsdb/integrations/handlers/hana_handler/hana_handler.py deleted file mode 100644 index eb04fd68338..00000000000 --- a/mindsdb/integrations/handlers/hana_handler/hana_handler.py +++ /dev/null @@ -1,259 +0,0 @@ -from typing import Any, Dict, Text - -from hdbcli import dbapi -from hdbcli.dbapi import Error, ProgrammingError -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from pandas import DataFrame -import sqlalchemy_hana.dialect as hana_dialect - -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class HanaHandler(DatabaseHandler): - """ - This handler handles the connection and execution of SQL statements on SAP HANA. - """ - - name = 'hana' - - def __init__(self, name: Text, connection_data: Dict, **kwargs: Any) -> None: - """ - Initializes the handler. - - Args: - name (Text): The name of the handler instance. - connection_data (Dict): The connection data required to connect to the SAP HANA database. - kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self): - """ - Closes the connection when the handler instance is deleted. - """ - if self.is_connected is True: - self.disconnect() - - def connect(self) -> dbapi.Connection: - """ - Establishes a connection to the SAP HANA database. - - Raises: - ValueError: If the expected connection parameters are not provided. - hdbcli.dbapi.Error: If an error occurs while connecting to the SAP HANA database. - - Returns: - hdbcli.dbapi.Connection: A connection object to the SAP HANA database. - """ - if self.is_connected is True: - return self.connection - - # Mandatory connection parameters. - if not all(key in self.connection_data for key in ['address', 'port', 'user', 'password']): - raise ValueError('Required parameters (address, port, user, password) must be provided.') - - config = { - 'address': self.connection_data['address'], - 'port': self.connection_data['port'], - 'user': self.connection_data['user'], - 'password': self.connection_data['password'], - } - - # Optional connection parameters. - if 'database' in self.connection_data: - config['databaseName'] = self.connection_data['database'] - - if 'schema' in self.connection_data: - config['currentSchema'] = self.connection_data['schema'] - - if 'encrypt' in self.connection_data: - config['encrypt'] = self.connection_data['encrypt'] - - try: - self.connection = dbapi.connect( - **config - ) - self.is_connected = True - return self.connection - except Error as known_error: - logger.error(f'Error connecting to SAP HANA, {known_error}!') - raise - except Exception as unknown_error: - logger.error(f'Unknown error connecting to Teradata, {unknown_error}!') - raise - - def disconnect(self) -> None: - """ - Closes the connection to the SAP HANA database if it's currently open. - """ - if self.is_connected is True: - self.connection.close() - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the SAP HANA database. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - connection = self.connect() - with connection.cursor() as cur: - cur.execute('SELECT 1 FROM SYS.DUMMY') - response.success = True - except (Error, ProgrammingError, ValueError) as known_error: - logger.error(f'Connection check to SAP HANA failed, {known_error}!') - response.error_message = str(known_error) - except Exception as unknown_error: - logger.error(f'Connection check to SAP HANA failed due to an unknown error, {unknown_error}!') - response.error_message = str(unknown_error) - - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: Text) -> Response: - """ - Executes a native SQL query on the SAP HANA database and returns the result. - - Args: - query (Text): The SQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - need_to_close = self.is_connected is False - - connection = self.connect() - with connection.cursor() as cur: - try: - cur.execute(query) - if not cur.description: - response = Response(RESPONSE_TYPE.OK) - else: - result = cur.fetchall() - response = Response( - RESPONSE_TYPE.TABLE, - DataFrame( - result, - columns=[x[0] for x in cur.description] - ) - ) - connection.commit() - except ProgrammingError as programming_error: - logger.error(f'Error running query: {query} on {self.address}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_code=0, - error_message=str(programming_error) - ) - connection.rollback() - except Exception as unknown_error: - logger.error(f'Unknown error running query: {query} on {self.address}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_code=0, - error_message=str(unknown_error) - ) - connection.rollback() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Executes a SQL query represented by an ASTNode on the SAP HANA database and retrieves the data (if any). - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. - """ - renderer = SqlalchemyRender(hana_dialect.HANAHDBCLIDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Retrieves a list of all non-system tables in the SAP HANA database. - - Returns: - Response: A response object containing a list of tables in the SAP HANA database. - """ - query = """ - SELECT SCHEMA_NAME, - TABLE_NAME, - 'BASE TABLE' AS TABLE_TYPE - FROM - SYS.TABLES - WHERE IS_SYSTEM_TABLE = 'FALSE' - AND IS_USER_DEFINED_TYPE = 'FALSE' - AND IS_TEMPORARY = 'FALSE' - - UNION - - SELECT SCHEMA_NAME, - VIEW_NAME AS TABLE_NAME, - 'VIEW' AS TABLE_TYPE - FROM - SYS.VIEWS - WHERE SCHEMA_NAME <> 'SYS' - AND SCHEMA_NAME NOT LIKE '_SYS%' - """ - return self.native_query(query) - - def get_columns(self, table_name: Text) -> Response: - """ - Retrieves column details for a specified table in the SAP HANA database. - - Args: - table_name (Text): The name of the table for which to retrieve column information. - - Raises: - ValueError: If the 'table_name' is not a valid string. - - Returns: - Response: A response object containing the column details. - """ - if not table_name or not isinstance(table_name, str): - raise ValueError("Invalid table name provided.") - - query = f""" - SELECT COLUMN_NAME AS Field, - DATA_TYPE_NAME AS Type - FROM SYS.TABLE_COLUMNS - WHERE TABLE_NAME = '{table_name}' - - UNION ALL - - SELECT COLUMN_NAME AS Field, - DATA_TYPE_NAME AS Type - FROM SYS.VIEW_COLUMNS - WHERE VIEW_NAME = '{table_name}' - """ - return self.native_query(query) diff --git a/mindsdb/integrations/handlers/hana_handler/icon.svg b/mindsdb/integrations/handlers/hana_handler/icon.svg deleted file mode 100644 index 8b712a8ecda..00000000000 --- a/mindsdb/integrations/handlers/hana_handler/icon.svg +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/hana_handler/requirements.txt b/mindsdb/integrations/handlers/hana_handler/requirements.txt deleted file mode 100644 index b63b667b94e..00000000000 --- a/mindsdb/integrations/handlers/hana_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -sqlalchemy-hana -hdbcli diff --git a/mindsdb/integrations/handlers/hana_handler/tests/test_hana_handler.py b/mindsdb/integrations/handlers/hana_handler/tests/test_hana_handler.py deleted file mode 100644 index d66a0be3030..00000000000 --- a/mindsdb/integrations/handlers/hana_handler/tests/test_hana_handler.py +++ /dev/null @@ -1,65 +0,0 @@ -import os -import unittest - -from mindsdb.integrations.handlers.hana_handler.hana_handler import HanaHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -""" -create schema MINDSDB; - -create table MINDSDB.TEST -( - ID INTEGER not null, - NAME NVARCHAR(1), - DESCRIPTION NVARCHAR(1) -); - -create unique index MINDSDB.TEST_ID_INDEX - on MINDSDB.TEST (ID); - -alter table MINDSDB.TEST - add constraint TEST_PK - primary key (ID); - -insert into MINDSDB.TEST -values (1, 'h', 'w'); -""" - - -class HanaHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "address": os.environ.get('HANA_ADDRESS', 'localhost'), - "port": os.environ.get('HANA_PORT', 30015), - "user": "DBADMIN", - "password": os.environ.get('HANA_PASSWORD'), - "schema": "MINDSDB", - "encrypt": True - } - cls.handler = HanaHandler('test_hana_handler', cls.kwargs) - - def test_0_connect(self): - assert self.handler.connect() - - def test_1_check_connection(self): - assert self.handler.check_connection().success is True - - def test_2_get_columns(self): - assert self.handler.get_columns('TEST').resp_type is not RESPONSE_TYPE.ERROR - - def test_3_get_tables(self): - assert self.handler.get_tables().resp_type is not RESPONSE_TYPE.ERROR - - def test_4_select_query(self): - query = 'SELECT * FROM MINDSDB.TEST WHERE ID=2' - assert self.handler.query(query).resp_type is RESPONSE_TYPE.TABLE - - def test_5_update_query(self): - query = 'UPDATE MINDSDB.TEST SET NAME=\'s\' WHERE ID=1' - assert self.handler.query(query).resp_type is RESPONSE_TYPE.OK - - -if __name__ == "__main__": - unittest.main(failfast=True) diff --git a/mindsdb/integrations/handlers/hive_handler/README.md b/mindsdb/integrations/handlers/hive_handler/README.md deleted file mode 100644 index cbac1914208..00000000000 --- a/mindsdb/integrations/handlers/hive_handler/README.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Apache Hive -sidebarTitle: Apache Hive ---- - -This documentation describes the integration of MindsDB with [Apache Hive](https://hive.apache.org/), a data warehouse software project built on top of Apache Hadoop for providing data query and analysis. Hive gives an SQL-like interface to query data stored in various databases and file systems that integrate with Hadoop. -The integration allows MindsDB to access data from Apache Hive and enhance Apache Hive with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect Apache Hive to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to Apache Hive from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/hive_handler) as an engine. - -```sql -CREATE DATABASE hive_datasource -WITH - engine = 'hive', - parameters = { - "username": "demo_user", - "password": "demo_password", - "host": "127.0.0.1", - "database": "default" - }; -``` - -Required connection parameters include the following: - -* `host`: The hostname, IP address, or URL of the Apache Hive server. -* `database`: The name of the Apache Hive database to connect to. - -Optional connection parameters include the following: - -* `username`: The username for the Apache Hive database. -* `password`: The password for the Apache Hive database. -* `port`: The port number for connecting to the Apache Hive server. Default is `10000`. -* `auth`: The authentication mechanism to use. Default is `CUSTOM`. Other options are `NONE`, `NOSASL`, `KERBEROS` and `LDAP`. - -## Usage - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM hive_datasource.table_name -LIMIT 10; -``` - -Run HiveQL queries directly on the connected Apache Hive database: - -```sql -SELECT * FROM hive_datasource ( - - --Native Query Goes Here - FROM (FROM (FROM src - SELECT TRANSFORM(value) - USING 'mapper' - AS value, count) mapped - SELECT cast(value as double) AS value, cast(count as int) AS count - SORT BY value, count) sorted - SELECT TRANSFORM(value, count) - USING 'reducer' - AS whatever - -); -``` - - -The above examples utilize `hive_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Apache Hive database. -* **Checklist**: - 1. Ensure that the Apache Hive server is running and accessible - 2. Confirm that host, port, user, and password are correct. Try a direct Apache Hive connection using a client like DBeaver. - 3. Test the network connection between the MindsDB host and the Apache Hive server. - diff --git a/mindsdb/integrations/handlers/hive_handler/__about__.py b/mindsdb/integrations/handlers/hive_handler/__about__.py deleted file mode 100644 index f4e48582b48..00000000000 --- a/mindsdb/integrations/handlers/hive_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Hive handler' -__package_name__ = 'mindsdb_hive_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Hive2" -__author__ = 'Biswadip Paul' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/hive_handler/__init__.py b/mindsdb/integrations/handlers/hive_handler/__init__.py deleted file mode 100644 index 1d83d0cbecd..00000000000 --- a/mindsdb/integrations/handlers/hive_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .hive_handler import HiveHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Hive' -name = 'hive' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/hive_handler/connection_args.py b/mindsdb/integrations/handlers/hive_handler/connection_args.py deleted file mode 100644 index ffd552ba378..00000000000 --- a/mindsdb/integrations/handlers/hive_handler/connection_args.py +++ /dev/null @@ -1,53 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - username={ - 'type': ARG_TYPE.STR, - 'description': 'The username for the Apache Hive database.', - 'required': False, - 'label': 'Username' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password for the Apache Hive database.', - 'secret': True, - 'required': False, - 'label': 'Password' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The name of the Apache Hive database to connect to.', - 'required': True, - 'label': 'Database' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The hostname, IP address, or URL of the Apache Hive server.. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.', - 'required': True, - 'label': 'Host' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The port number for connecting to the Apache Hive server. Default is `10000`.', - 'required': False, - 'label': 'Port' - }, - auth={ - 'type': ARG_TYPE.STR, - 'description': 'The authentication mechanism to use. Default is `CUSTOM`. Other options are `NONE`, `NOSASL`, `KERBEROS` and `LDAP`.', - 'required': False, - 'label': 'Authentication' - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port='10000', - auth='CUSTOM', - user='root', - password='password', - database='database' -) diff --git a/mindsdb/integrations/handlers/hive_handler/hive_handler.py b/mindsdb/integrations/handlers/hive_handler/hive_handler.py deleted file mode 100644 index 41ab1fee8eb..00000000000 --- a/mindsdb/integrations/handlers/hive_handler/hive_handler.py +++ /dev/null @@ -1,224 +0,0 @@ -from typing import Text, Dict, Optional - -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb_sql_parser.ast.base import ASTNode -import pandas as pd -from pyhive import (hive, sqlalchemy_hive) -from pyhive.exc import OperationalError -from thrift.transport.TTransport import TTransportException - -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class HiveHandler(DatabaseHandler): - """ - This handler handles the connection and execution of SQL statements on Apache Hive. - """ - - name = 'hive' - - def __init__(self, name: Text, connection_data: Optional[Dict], **kwargs) -> None: - """ - Initializes the handler. - - Args: - name (Text): The name of the handler instance. - connection_data (Dict): The connection data required to connect to the Apache Hive server. - kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self) -> None: - """ - Closes the connection when the handler instance is deleted. - """ - if self.is_connected: - self.disconnect() - - def connect(self) -> hive.Connection: - """ - Establishes a connection to the Apache Hive server. - - Raises: - ValueError: If the expected connection parameters are not provided. - - Returns: - hive.Connection: A connection object to the Apache Hive server. - """ - if self.is_connected: - return self.connection - - # Mandatory connection parameters. - if not all(key in self.connection_data for key in ['host', 'database']): - raise ValueError('Required parameters (account, database) must be provided.') - - config = { - 'host': self.connection_data.get('host'), - 'database': self.connection_data.get('database') - } - - # Optional connection parameters. - optional_parameters = ['port', 'username', 'password'] - for param in optional_parameters: - if param in self.connection_data: - config[param] = self.connection_data[param] - - config['auth'] = self.connection_data.get('auth', 'CUSTOM').upper() - - try: - self.connection = hive.Connection(**config) - self.is_connected = True - return self.connection - except (OperationalError, TTransportException, ValueError) as known_error: - logger.error(f'Error connecting to Hive {config["database"]}, {known_error}!') - raise - except Exception as unknown_error: - logger.error(f'Unknown error connecting to Hive {config["database"]}, {unknown_error}!') - raise - - def disconnect(self) -> None: - """ - Closes the connection to the Apache Hive server if it's currently open. - """ - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the Apache Hive server. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except (OperationalError, TTransportException, ValueError) as known_error: - logger.error(f'Connection check to Hive failed, {known_error}!') - response.error_message = str(known_error) - except Exception as unknown_error: - logger.error(f'Connection check to Hive failed due to an unknown error, {unknown_error}!') - response.error_message = str(unknown_error) - - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: Text) -> Response: - """ - Executes a native SQL query on the Apache Hive server and returns the result. - - Args: - query (Text): The SQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - need_to_close = self.is_connected is False - - connection = self.connect() - with connection.cursor() as cur: - try: - cur.execute(query) - result = cur.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, - pd.DataFrame( - result, - columns=[x[0].split('.')[-1] for x in cur.description] - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except OperationalError as operational_error: - logger.error(f'Error running query: {query} on {self.connection_data["database"]}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(operational_error) - ) - connection.rollback() - except Exception as unknown_error: - logger.error(f'Unknown error running query: {query} on {self.connection_data["database"]}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(unknown_error) - ) - connection.rollback() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Executes a SQL query represented by an ASTNode on the Apache Hive server and retrieves the data (if any). - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. - """ - renderer = SqlalchemyRender(sqlalchemy_hive.HiveDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Retrieves a list of all non-system tables in the Apache Hive server. - - Returns: - Response: A response object containing a list of tables in the Apache Hive server. - """ - q = "SHOW TABLES" - result = self.native_query(q) - df = result.data_frame - result.data_frame = df.rename(columns={df.columns[0]: 'table_name'}) - return result - - def get_columns(self, table_name: Text) -> Response: - """ - Retrieves column details for a specified table in the Apache Hive server. - - Args: - table_name (Text): The name of the table for which to retrieve column information. - - Raises: - ValueError: If the 'table_name' is not a valid string. - - Returns: - Response: A response object containing the column details. - """ - if not table_name or not isinstance(table_name, str): - raise ValueError("Invalid table name provided.") - - q = f"DESCRIBE {table_name}" - result = self.native_query(q) - return result diff --git a/mindsdb/integrations/handlers/hive_handler/icon.svg b/mindsdb/integrations/handlers/hive_handler/icon.svg deleted file mode 100644 index a9995bbdaf1..00000000000 --- a/mindsdb/integrations/handlers/hive_handler/icon.svg +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/hive_handler/requirements.txt b/mindsdb/integrations/handlers/hive_handler/requirements.txt deleted file mode 100644 index 1be42692a42..00000000000 --- a/mindsdb/integrations/handlers/hive_handler/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -thrift -thrift-sasl -pyhive diff --git a/mindsdb/integrations/handlers/hive_handler/tests/__init__.py b/mindsdb/integrations/handlers/hive_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/hive_handler/tests/test_hive_handler.py b/mindsdb/integrations/handlers/hive_handler/tests/test_hive_handler.py deleted file mode 100644 index fd2eb5772c0..00000000000 --- a/mindsdb/integrations/handlers/hive_handler/tests/test_hive_handler.py +++ /dev/null @@ -1,50 +0,0 @@ -import unittest - -from mindsdb.integrations.handlers.hive_handler.hive_handler import HiveHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class HiveHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": "localhost", - "port": "10000", - "user": "admin", - "password": "password", - "database": "default", - "auth": "CUSTOM" - } - cls.handler = HiveHandler('test_hive_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.check_connection() - - def test_1_native_query_show_dbs(self): - dbs = self.handler.native_query("SHOW DATABASES;") - assert dbs['type'] is not RESPONSE_TYPE.ERROR - - def test_2_get_tables(self): - tbls = self.handler.get_tables() - assert tbls['type'] is not RESPONSE_TYPE.ERROR - - def test_5_drop_table(self): - res = self.handler.native_query("DROP TABLE IF EXISTS test_hdb") - assert res['type'] is not RESPONSE_TYPE.ERROR - - def test_4_create_table(self): - res = self.handler.native_query("CREATE TABLE IF NOT EXISTS test_hdb (test_col INT)") - assert res['type'] is not RESPONSE_TYPE.ERROR - - def test_6_describe_table(self): - described = self.handler.get_columns("test_hdb") - assert described['type'] is RESPONSE_TYPE.TABLE - - def test_7_select_query(self): - query = "SELECT * FROM test_mdb WHERE foo=238" - result = self.handler.query(query) - assert result['type'] is RESPONSE_TYPE.TABLE - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/hsqldb_handler/README.md b/mindsdb/integrations/handlers/hsqldb_handler/README.md deleted file mode 100644 index e50fbcc2e2f..00000000000 --- a/mindsdb/integrations/handlers/hsqldb_handler/README.md +++ /dev/null @@ -1,52 +0,0 @@ -# HSQLDB handler - -This is the implementation of the HSQLDB handler for MindsDB. -To know more about how was this implemented [follow this link](http://hsqldb.org/doc/2.0/guide/guide.html#odbc-chapt). There is also a python code sample for you to [check](http://hsqldb.org/doc/2.0/verbatim/sample/sample.py) - -## HyperSQLDB - -HSQLDB (HyperSQL DataBase) is the leading SQL relational database system written in Java. It offers a small, fast multithreaded and transactional database engine with in-memory and disk-based tables and supports embedded and server modes. It includes a powerful command line SQL tool and simple GUI query tools. - -HSQLDB supports the widest range of SQL Standard features seen in any open source database engine: SQL:2016 core language features and an extensive list of SQL:2016 optional features. It supports full Advanced ANSI-92 SQL with only two exceptions. Many extensions to the Standard, including syntax compatibility modes and features of other popular database engines, are also supported. - -HyperSQL is fully multithreaded and supports high performance 2PL and MVCC (multiversion concurrency control) transaction control models. - -https://hsqldb.org/ - -## Implementation - -This handler was implemented using [pyodbc](https://pypi.org/project/pyodbc/), the Python ODBC bridge and the [Postgres ODBC Driver](https://www.postgresql.org/ftp/odbc/versions/). - -## Usage - -In order to make use of this handler and connect to a HyperSQL database in MindsDB, you must install [unixODBC](https://www.unixodbc.org/) along with [Postgres ODBC Driver](https://www.postgresql.org/ftp/odbc/versions/). There are [several guides](https://www.ibm.com/docs/en/db2/11.1?topic=managers-installing-unixodbc-driver-manager) for you to follow. The postgres odbc driver must be added in your unixODBC `odbcinst.ini` file as it follows: - -``` -[PostgreSQL Unicode] -Description = PostgreSQL ODBC driver (Unicode version) -Driver = psqlodbcw.so -Debug = 0 -CommLog = 1 -UsageCount = 1 -``` - -then, in mindsDB, the following syntax can be used to access your database, - -```sql -CREATE DATABASE exampledb -WITH -engine='hsqldb', -parameters={ - "server_name": "samples.mindsdb.com", - "port": "5432", - "database_name": "demo", - "username": "demo_user", - "password": "demo_password" -}; -``` - -Now, you can make queries to your database as follows, - -```sql -SELECT * FROM [your table]; -``` diff --git a/mindsdb/integrations/handlers/hsqldb_handler/__about__.py b/mindsdb/integrations/handlers/hsqldb_handler/__about__.py deleted file mode 100644 index 44dbe371634..00000000000 --- a/mindsdb/integrations/handlers/hsqldb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB HSQLDB handler' -__package_name__ = 'mindsdb_hsqldb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for HSQLDB" -__author__ = 'Enrique Atayde' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/hsqldb_handler/__init__.py b/mindsdb/integrations/handlers/hsqldb_handler/__init__.py deleted file mode 100644 index 0c0ec7801b3..00000000000 --- a/mindsdb/integrations/handlers/hsqldb_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .hsqldb_handler import HSQLDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'HyperSQLDB' -name = 'hsqldb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.png' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/hsqldb_handler/connection_args.py b/mindsdb/integrations/handlers/hsqldb_handler/connection_args.py deleted file mode 100644 index b955418da26..00000000000 --- a/mindsdb/integrations/handlers/hsqldb_handler/connection_args.py +++ /dev/null @@ -1,39 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - server_name={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the database' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'Specify port to connect.' - }, - database_name={ - 'type': ARG_TYPE.STR, - 'description': ''' - The database name to use when connecting. - ''' - }, - username={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate.', - 'secret': True - }, - -) - -connection_args_example = OrderedDict( - server_name='localhost', - port=9001, - database_name='xdb', - username='SA', - password='password' -) diff --git a/mindsdb/integrations/handlers/hsqldb_handler/hsqldb_handler.py b/mindsdb/integrations/handlers/hsqldb_handler/hsqldb_handler.py deleted file mode 100644 index 3e66bd6c290..00000000000 --- a/mindsdb/integrations/handlers/hsqldb_handler/hsqldb_handler.py +++ /dev/null @@ -1,198 +0,0 @@ -import pandas as pd -import pyodbc - -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender - -logger = log.getLogger(__name__) - - -class HSQLDBHandler(DatabaseHandler): - """ - This handler handles connection and execution of the HyperSQL statements. - """ - - name = 'hsqldb' - - def __init__(self, name: str, **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = 'hsqldb' - self.connection_args = kwargs.get('connection_data') - self.server_name = self.connection_args.get('server_name', 'localhost') - self.port = self.connection_args.get('port') - self.database_name = self.connection_args.get('database_name') - self.username = self.connection_args.get('username') - self.password = self.connection_args.get('password') - self.conn_str = f"DRIVER={{PostgreSQL Unicode}};SERVER={self.server_name};PORT={self.port};DATABASE={self.database_name};UID={self.username};PWD={self.password};Trusted_Connection=True" - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - if self.is_connected is True: - return self.connection - - self.connection = pyodbc.connect(self.conn_str, timeout=10) - self.is_connected = True - - return self.connection - - def disconnect(self): - """ - Close any existing connections. - """ - - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return self.is_connected - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to SQLite, {e}!') - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - with connection.cursor() as cursor: - try: - cursor.execute(query) - result = cursor.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame.from_records( - result, - columns=[x[0] for x in cursor.description] - ) - ) - - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except Exception as e: - logger.error(f'Error running query: {query}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - - renderer = SqlalchemyRender('postgres') - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - - connection = self.connect() - cursor = connection.cursor() - cursor.execute("SELECT * FROM information_schema.tables WHERE table_schema NOT IN ('information_schema', 'pg_catalog') AND table_type='BASE TABLE'") - results = cursor.fetchall() - df = pd.DataFrame([x[2] for x in results], columns=['table_name']) # Workaround since cursor.tables() wont work with postgres driver - response = Response( - RESPONSE_TYPE.TABLE, - df - ) - - return response - - def get_columns(self, table_name: str) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - - connection = self.connect() - cursor = connection.cursor() - query = f'SELECT * FROM information_schema.columns WHERE table_name ={table_name}' # Workaround since cursor.columns() wont work with postgres driver - cursor.execute(query) - results = cursor.fetchall() - df = pd.DataFrame( - [(x[3], x[7]) for x in results], - columns=['column_name', 'data_type'] - ) - - response = Response( - RESPONSE_TYPE.TABLE, - df - ) - - return response diff --git a/mindsdb/integrations/handlers/hsqldb_handler/icon.png b/mindsdb/integrations/handlers/hsqldb_handler/icon.png deleted file mode 100644 index 55d034d5765..00000000000 Binary files a/mindsdb/integrations/handlers/hsqldb_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/hsqldb_handler/requirements.txt b/mindsdb/integrations/handlers/hsqldb_handler/requirements.txt deleted file mode 100644 index eef1d7371a1..00000000000 --- a/mindsdb/integrations/handlers/hsqldb_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pyodbc==4.0.34 \ No newline at end of file diff --git a/mindsdb/integrations/handlers/hubspot_handler/__init__.py b/mindsdb/integrations/handlers/hubspot_handler/__init__.py index e49bf704468..d10f473186c 100644 --- a/mindsdb/integrations/handlers/hubspot_handler/__init__.py +++ b/mindsdb/integrations/handlers/hubspot_handler/__init__.py @@ -19,7 +19,7 @@ name = "hubspot" type = HANDLER_TYPE.DATA icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY +support_level = HANDLER_SUPPORT_LEVEL.MINDSDB __all__ = [ "Handler", diff --git a/mindsdb/integrations/handlers/hubspot_handler/hubspot_association_tables.py b/mindsdb/integrations/handlers/hubspot_handler/hubspot_association_tables.py index c81fd48fed2..4017789cd20 100644 --- a/mindsdb/integrations/handlers/hubspot_handler/hubspot_association_tables.py +++ b/mindsdb/integrations/handlers/hubspot_handler/hubspot_association_tables.py @@ -12,14 +12,14 @@ from typing import Any import pandas as pd -from mindsdb.integrations.libs.api_handler import APIResource +from mindsdb.integrations.libs.api_handler import MetaAPIResource from mindsdb.integrations.utilities.sql_utils import FilterCondition, SortColumn from mindsdb.utilities import log logger = log.getLogger(__name__) -class HubSpotAssociationTable(APIResource): +class HubSpotAssociationTable(MetaAPIResource): """ Base class for HubSpot association tables. @@ -37,12 +37,6 @@ def get_columns(self) -> list[str]: """Return column names for the association table.""" return [self.FROM_ID_COLUMN, self.TO_ID_COLUMN, "association_type", "association_label"] - def select(self, query) -> pd.DataFrame: - """Execute SELECT query on association table.""" - result_limit = query.limit.value if query.limit else None - - return self.list(limit=result_limit) - def list( self, conditions: list[FilterCondition] | None = None, @@ -51,19 +45,114 @@ def list( targets: list[str] | None = None, **kwargs, ) -> pd.DataFrame: - """Fetch associations between objects.""" - associations = self._fetch_associations(limit=limit) + """Fetch associations between objects. + + When a condition on FROM_ID_COLUMN is present (eq or IN), the HubSpot + batch associations API is used so the query is O(filtered IDs) rather + than O(all objects). This makes JOIN queries like + FROM companies co + JOIN company_contacts cc ON cc.company_id = co.id + JOIN contacts c ON c.id = cc.contact_id + efficient. + """ + from_id_values = self._extract_from_id_conditions(conditions) + + if from_id_values: + associations = self._fetch_associations_by_ids(from_id_values, limit=limit) + else: + associations = self._fetch_associations(limit=limit) if not associations: return pd.DataFrame(columns=self.get_columns()) df = pd.DataFrame(associations) - if conditions: - df = self._apply_conditions(df, conditions) + # Apply any remaining (non-FROM_ID_COLUMN) conditions + remaining = [ + c for c in (conditions or []) if (c.column if hasattr(c, "column") else c[1]) != self.FROM_ID_COLUMN + ] + if remaining: + df = self._apply_conditions(df, remaining) return df + def _extract_from_id_conditions(self, conditions: list[FilterCondition] | None) -> list[str] | None: + """Return FROM_ID_COLUMN values from eq/IN conditions and mark them applied.""" + if not conditions: + return None + for cond in conditions: + column = cond.column if hasattr(cond, "column") else cond[1] + if column != self.FROM_ID_COLUMN: + continue + op = str(cond.op.value if hasattr(cond, "op") and hasattr(cond.op, "value") else cond[0]).lower() + value = cond.value if hasattr(cond, "value") else cond[2] + if op in ("=", "==", "eq") and value is not None: + if hasattr(cond, "applied"): + cond.applied = True + return [str(value)] + if op == "in": + vals = list(value) if isinstance(value, (list, tuple, set)) else [value] + valid = [str(v) for v in vals if v is not None] + if valid: + if hasattr(cond, "applied"): + cond.applied = True + return valid + return None + + def _fetch_associations_by_ids(self, from_ids: list[str], limit: int | None = None) -> list[dict[str, Any]]: + """Use HubSpot batch associations API for specific from-object IDs.""" + from hubspot.crm.associations.models import ( + BatchInputPublicObjectId, + PublicObjectId, + ) + + hubspot = self.handler.connect() + BATCH = 100 + results: list[dict[str, Any]] = [] + + for i in range(0, len(from_ids), BATCH): + chunk = from_ids[i : i + BATCH] + try: + resp = hubspot.crm.associations.batch_api.read( + self.FROM_OBJECT_TYPE, + self.TO_OBJECT_TYPE, + BatchInputPublicObjectId(inputs=[PublicObjectId(id=fid) for fid in chunk]), + ) + for multi in resp.results or []: + from_id = str( + (multi._from or {}).get("id", "") + if isinstance(multi._from, dict) + else getattr(multi._from, "id", "") + ) + for assoc in multi.to or []: + to_id = str(assoc.get("id", "") if isinstance(assoc, dict) else getattr(assoc, "id", "")) + if not to_id: + continue + results.append( + { + self.FROM_ID_COLUMN: from_id, + self.TO_ID_COLUMN: to_id, + "association_type": None, + "association_label": None, + } + ) + if limit and len(results) >= limit: + logger.info( + f"Retrieved {len(results)} {self.FROM_OBJECT_TYPE}" + f"->{self.TO_OBJECT_TYPE} associations via batch API" + ) + return results + except Exception as e: + logger.warning( + f"Failed to batch fetch {self.FROM_OBJECT_TYPE}->{self.TO_OBJECT_TYPE} " + f"associations for chunk {chunk}: {e}" + ) + + logger.info( + f"Retrieved {len(results)} {self.FROM_OBJECT_TYPE}->{self.TO_OBJECT_TYPE} associations via batch API" + ) + return results + def _fetch_associations(self, limit: int | None = None) -> list[dict[str, Any]]: """ Fetch associations by getting source objects with their associations. @@ -262,6 +351,22 @@ def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: }, ] + def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: + return [ + { + "TABLE_NAME": "company_contacts", + "COLUMN_NAME": "company_id", + "REFERENCED_TABLE_NAME": "companies", + "REFERENCED_COLUMN_NAME": "id", + }, + { + "TABLE_NAME": "company_contacts", + "COLUMN_NAME": "contact_id", + "REFERENCED_TABLE_NAME": "contacts", + "REFERENCED_COLUMN_NAME": "id", + }, + ] + class CompanyDealsTable(HubSpotAssociationTable): """Association table for company-deal relationships.""" @@ -307,6 +412,22 @@ def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: }, ] + def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: + return [ + { + "TABLE_NAME": "company_deals", + "COLUMN_NAME": "company_id", + "REFERENCED_TABLE_NAME": "companies", + "REFERENCED_COLUMN_NAME": "id", + }, + { + "TABLE_NAME": "company_deals", + "COLUMN_NAME": "deal_id", + "REFERENCED_TABLE_NAME": "deals", + "REFERENCED_COLUMN_NAME": "id", + }, + ] + class CompanyTicketsTable(HubSpotAssociationTable): """Association table for company-ticket relationships.""" @@ -352,6 +473,22 @@ def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: }, ] + def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: + return [ + { + "TABLE_NAME": "company_tickets", + "COLUMN_NAME": "company_id", + "REFERENCED_TABLE_NAME": "companies", + "REFERENCED_COLUMN_NAME": "id", + }, + { + "TABLE_NAME": "company_tickets", + "COLUMN_NAME": "ticket_id", + "REFERENCED_TABLE_NAME": "tickets", + "REFERENCED_COLUMN_NAME": "id", + }, + ] + class ContactCompaniesTable(HubSpotAssociationTable): """Association table for contact-company relationships.""" @@ -397,6 +534,22 @@ def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: }, ] + def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: + return [ + { + "TABLE_NAME": "contact_companies", + "COLUMN_NAME": "contact_id", + "REFERENCED_TABLE_NAME": "contacts", + "REFERENCED_COLUMN_NAME": "id", + }, + { + "TABLE_NAME": "contact_companies", + "COLUMN_NAME": "company_id", + "REFERENCED_TABLE_NAME": "companies", + "REFERENCED_COLUMN_NAME": "id", + }, + ] + class ContactDealsTable(HubSpotAssociationTable): """Association table for contact-deal relationships.""" @@ -442,6 +595,22 @@ def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: }, ] + def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: + return [ + { + "TABLE_NAME": "contact_deals", + "COLUMN_NAME": "contact_id", + "REFERENCED_TABLE_NAME": "contacts", + "REFERENCED_COLUMN_NAME": "id", + }, + { + "TABLE_NAME": "contact_deals", + "COLUMN_NAME": "deal_id", + "REFERENCED_TABLE_NAME": "deals", + "REFERENCED_COLUMN_NAME": "id", + }, + ] + class ContactTicketsTable(HubSpotAssociationTable): """Association table for contact-ticket relationships.""" @@ -487,6 +656,22 @@ def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: }, ] + def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: + return [ + { + "TABLE_NAME": "contact_tickets", + "COLUMN_NAME": "contact_id", + "REFERENCED_TABLE_NAME": "contacts", + "REFERENCED_COLUMN_NAME": "id", + }, + { + "TABLE_NAME": "contact_tickets", + "COLUMN_NAME": "ticket_id", + "REFERENCED_TABLE_NAME": "tickets", + "REFERENCED_COLUMN_NAME": "id", + }, + ] + class DealCompaniesTable(HubSpotAssociationTable): """Association table for deal-company relationships.""" @@ -532,6 +717,22 @@ def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: }, ] + def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: + return [ + { + "TABLE_NAME": "deal_companies", + "COLUMN_NAME": "deal_id", + "REFERENCED_TABLE_NAME": "deals", + "REFERENCED_COLUMN_NAME": "id", + }, + { + "TABLE_NAME": "deal_companies", + "COLUMN_NAME": "company_id", + "REFERENCED_TABLE_NAME": "companies", + "REFERENCED_COLUMN_NAME": "id", + }, + ] + class DealContactsTable(HubSpotAssociationTable): """Association table for deal-contact relationships.""" @@ -577,6 +778,22 @@ def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: }, ] + def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: + return [ + { + "TABLE_NAME": "deal_contacts", + "COLUMN_NAME": "deal_id", + "REFERENCED_TABLE_NAME": "deals", + "REFERENCED_COLUMN_NAME": "id", + }, + { + "TABLE_NAME": "deal_contacts", + "COLUMN_NAME": "contact_id", + "REFERENCED_TABLE_NAME": "contacts", + "REFERENCED_COLUMN_NAME": "id", + }, + ] + class TicketCompaniesTable(HubSpotAssociationTable): """Association table for ticket-company relationships.""" @@ -622,6 +839,22 @@ def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: }, ] + def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: + return [ + { + "TABLE_NAME": "ticket_companies", + "COLUMN_NAME": "ticket_id", + "REFERENCED_TABLE_NAME": "tickets", + "REFERENCED_COLUMN_NAME": "id", + }, + { + "TABLE_NAME": "ticket_companies", + "COLUMN_NAME": "company_id", + "REFERENCED_TABLE_NAME": "companies", + "REFERENCED_COLUMN_NAME": "id", + }, + ] + class TicketContactsTable(HubSpotAssociationTable): """Association table for ticket-contact relationships.""" @@ -667,6 +900,22 @@ def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: }, ] + def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: + return [ + { + "TABLE_NAME": "ticket_contacts", + "COLUMN_NAME": "ticket_id", + "REFERENCED_TABLE_NAME": "tickets", + "REFERENCED_COLUMN_NAME": "id", + }, + { + "TABLE_NAME": "ticket_contacts", + "COLUMN_NAME": "contact_id", + "REFERENCED_TABLE_NAME": "contacts", + "REFERENCED_COLUMN_NAME": "id", + }, + ] + class TicketDealsTable(HubSpotAssociationTable): """Association table for ticket-deal relationships.""" @@ -712,6 +961,22 @@ def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: }, ] + def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: + return [ + { + "TABLE_NAME": "ticket_deals", + "COLUMN_NAME": "ticket_id", + "REFERENCED_TABLE_NAME": "tickets", + "REFERENCED_COLUMN_NAME": "id", + }, + { + "TABLE_NAME": "ticket_deals", + "COLUMN_NAME": "deal_id", + "REFERENCED_TABLE_NAME": "deals", + "REFERENCED_COLUMN_NAME": "id", + }, + ] + # Export all association table classes ASSOCIATION_TABLE_CLASSES = { diff --git a/mindsdb/integrations/handlers/hubspot_handler/hubspot_association_utils.py b/mindsdb/integrations/handlers/hubspot_handler/hubspot_association_utils.py index d8b7de1de24..c79ddf51df9 100644 --- a/mindsdb/integrations/handlers/hubspot_handler/hubspot_association_utils.py +++ b/mindsdb/integrations/handlers/hubspot_handler/hubspot_association_utils.py @@ -42,6 +42,10 @@ ("companies", "primary_company_id"), ("deals", "primary_deal_id"), ], + "leads": [ + ("contacts", "primary_contact_id"), + ("companies", "primary_company_id"), + ], } @@ -54,8 +58,10 @@ def extract_primary_association(obj: Any, to_object_type: str) -> Optional[str]: to_objects = associations.get(to_object_type, {}) if isinstance(to_objects, dict): results = to_objects.get("results", []) + elif isinstance(to_objects, list): + results = to_objects else: - results = to_objects if isinstance(to_objects, list) else [] + results = getattr(to_objects, "results", []) or [] else: to_objects = getattr(associations, to_object_type, None) if to_objects is None: diff --git a/mindsdb/integrations/handlers/hubspot_handler/hubspot_oauth.py b/mindsdb/integrations/handlers/hubspot_handler/hubspot_oauth.py new file mode 100644 index 00000000000..9608055edad --- /dev/null +++ b/mindsdb/integrations/handlers/hubspot_handler/hubspot_oauth.py @@ -0,0 +1,151 @@ +import time +import urllib.parse +from typing import Optional + +from flask import request +from hubspot import HubSpot +from hubspot.utils.oauth import get_auth_url + +from mindsdb.utilities import log +from mindsdb.integrations.utilities.handlers.auth_utilities.exceptions import AuthException + +logger = log.getLogger(__name__) + +_STORAGE_KEY = "hubspot_oauth_tokens" +_DEFAULT_REDIRECT_PATH = "/verify-auth" +_TOKEN_EXPIRY_BUFFER = 0.95 + + +class HubSpotOAuth2Manager: + """ + Manages HubSpot OAuth2 authorization_code flow for MindsDB. + """ + + def __init__( + self, + handler_storage, + client_id: str, + client_secret: str, + scopes: Optional[str] = None, + optional_scopes: Optional[str] = None, + redirect_uri: Optional[str] = None, + code: Optional[str] = None, + datasource_name: Optional[str] = None, + ) -> None: + self.handler_storage = handler_storage + self.client_id = client_id + self.client_secret = client_secret + self.scopes = tuple(scopes.split()) if scopes else ("oauth",) + self.optional_scopes = tuple(optional_scopes.split()) if optional_scopes else None + self.redirect_uri = redirect_uri + self.code = code + self.datasource_name = datasource_name + + def get_access_token(self) -> str: + """ + Return a valid HubSpot access token. + Raises: + AuthException: User authorization required; auth_url is attached. + """ + stored = self.handler_storage.encrypted_json_get(_STORAGE_KEY) + logger.debug(f"Retrieved stored token data: {stored}") + if stored: + if time.time() < stored.get("expires_at", 0): + return stored["access_token"] + + if stored.get("refresh_token"): + try: + return self._refresh_token(stored["refresh_token"]) + except Exception as e: + logger.warning("HubSpot token refresh failed, reauthorization required: %s", e) + + runtime_code = self._get_runtime_code() + if runtime_code: + try: + return self._exchange_code(runtime_code) + except Exception as e: + # OAuth codes are single-use and expire quickly. + # If the exchange fails (BAD_AUTH_CODE), don't retry β€” prompt re-authorization. + logger.warning("HubSpot code exchange failed (code may be expired/used): %s", e) + + redirect_uri = self._get_redirect_uri() + auth_url = get_auth_url( + scope=self.scopes, + optional_scope=self.optional_scopes, + client_id=self.client_id, + redirect_uri=redirect_uri, + ) + # Fix for HubSpot's strict URL parsing. Python's URL encode translates spaces to `+`, but + # HubSpot's optional_scopes requires `%20` or `,`. + auth_url = auth_url.replace("+", "%20") + + # Append state with datasource info so the frontend can complete the connection + # even when localStorage context is missing (e.g. script-initiated flows). + if self.datasource_name: + state_data = urllib.parse.urlencode( + { + "datasource_name": self.datasource_name, + "integrations_name": "hubspot", + "client_id": self.client_id, + "client_secret": self.client_secret, + "redirect_uri": redirect_uri, + "scope": " ".join(self.scopes) if self.scopes else "oauth", + "optional_scope": " ".join(self.optional_scopes) if self.optional_scopes else "", + } + ) + auth_url += f"&state={urllib.parse.quote(state_data)}" + + raise AuthException( + f"HubSpot authorization required. Please visit: {auth_url}", + auth_url=auth_url, + ) + + def _get_runtime_code(self) -> Optional[str]: + """Return the OAuth authorization code from explicit value or active request context.""" + if self.code: + return self.code + try: + return request.args.get("code") + except RuntimeError: + return None + + def _exchange_code(self, code: str) -> str: + """Exchange an authorization code for access and refresh tokens.""" + response = HubSpot().oauth.tokens_api.create( + grant_type="authorization_code", + code=code, + redirect_uri=self._get_redirect_uri(), + client_id=self.client_id, + client_secret=self.client_secret, + ) + return self._persist_tokens(response) + + def _refresh_token(self, refresh_token: str) -> str: + """Obtain a new access token using the stored refresh token.""" + response = HubSpot().oauth.tokens_api.create( + grant_type="refresh_token", + refresh_token=refresh_token, + redirect_uri=self._get_redirect_uri(), + client_id=self.client_id, + client_secret=self.client_secret, + ) + return self._persist_tokens(response) + + def _persist_tokens(self, token_response) -> str: + """Save token data to encrypted handler storage and return the access token.""" + tokens = { + "access_token": token_response.access_token, + "refresh_token": token_response.refresh_token, + "expires_at": time.time() + token_response.expires_in * _TOKEN_EXPIRY_BUFFER, + } + self.handler_storage.encrypted_json_set(_STORAGE_KEY, tokens) + return tokens["access_token"] + + def _get_redirect_uri(self) -> str: + if self.redirect_uri: + return self.redirect_uri + try: + origin = request.headers.get("ORIGIN", "http://localhost:47334") + except RuntimeError: + origin = "http://localhost:47334" + return origin + _DEFAULT_REDIRECT_PATH diff --git a/tests/unused/unit/handler_tests/test_hubspot_handler.py b/mindsdb/integrations/handlers/hubspot_handler/tests/test_hubspot_handler.py similarity index 84% rename from tests/unused/unit/handler_tests/test_hubspot_handler.py rename to mindsdb/integrations/handlers/hubspot_handler/tests/test_hubspot_handler.py index 6c78f9a047c..d6f12d110e7 100644 --- a/tests/unused/unit/handler_tests/test_hubspot_handler.py +++ b/mindsdb/integrations/handlers/hubspot_handler/tests/test_hubspot_handler.py @@ -6,15 +6,10 @@ class HubSpotHandlerTest(unittest.TestCase): - @classmethod def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "access_token": os.environ.get('ACCESS_TOKEN') - } - } - cls.handler = HubspotHandler('test_hubspot_handler', **cls.kwargs) + cls.kwargs = {"connection_data": {"access_token": os.environ.get("ACCESS_TOKEN")}} + cls.handler = HubspotHandler("test_hubspot_handler", **cls.kwargs) def test_0_check_connection(self): assert self.handler.check_connection() diff --git a/tests/unused/unit/ml_handlers/test_huggingface_api.py b/mindsdb/integrations/handlers/huggingface_api_handler/tests/test_huggingface_api.py similarity index 96% rename from tests/unused/unit/ml_handlers/test_huggingface_api.py rename to mindsdb/integrations/handlers/huggingface_api_handler/tests/test_huggingface_api.py index 4ee84429123..aa357b3a65d 100644 --- a/tests/unused/unit/ml_handlers/test_huggingface_api.py +++ b/mindsdb/integrations/handlers/huggingface_api_handler/tests/test_huggingface_api.py @@ -18,7 +18,7 @@ def test_text_classification(self, mock_handler): self.run_sql("CREATE DATABASE proj") texts = ["I like you. I love you", "I don't like you. I hate you"] - df = pd.DataFrame(texts, columns=['texts']) + df = pd.DataFrame(texts, columns=["texts"]) self.set_handler(mock_handler, name="pg", tables={"df": df}) diff --git a/mindsdb/integrations/handlers/huggingface_handler/requirements.txt b/mindsdb/integrations/handlers/huggingface_handler/requirements.txt index b70a302214c..eae77291d1f 100644 --- a/mindsdb/integrations/handlers/huggingface_handler/requirements.txt +++ b/mindsdb/integrations/handlers/huggingface_handler/requirements.txt @@ -1,7 +1,7 @@ # NOTE: Any changes made here need to be made to requirements_cpu.txt as well datasets==2.16.1 evaluate==0.4.3 -nltk==3.9.1 -huggingface-hub==0.29.3 +nltk==3.9.3 +huggingface-hub==1.9.1 torch==2.8.0 -transformers >= 4.42.4 +transformers==5.5.0 diff --git a/mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt b/mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt index 7a4e0de6084..b509a2942f4 100644 --- a/mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +++ b/mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt @@ -1,7 +1,7 @@ # Needs to be installed with `pip install --extra-index-url https://download.pytorch.org/whl/ .[huggingface_cpu]` datasets==2.16.1 evaluate==0.4.3 -nltk==3.9.1 -huggingface-hub==0.29.3 +nltk==3.9.3 +huggingface-hub==1.9.1 torch==2.8.0+cpu -transformers >= 4.42.4 \ No newline at end of file +transformers==5.5.0 diff --git a/tests/unused/unit/ml_handlers/test_huggingface.py b/mindsdb/integrations/handlers/huggingface_handler/tests/test_huggingface.py similarity index 93% rename from tests/unused/unit/ml_handlers/test_huggingface.py rename to mindsdb/integrations/handlers/huggingface_handler/tests/test_huggingface.py index f83d62de50b..63e0e91cedd 100644 --- a/tests/unused/unit/ml_handlers/test_huggingface.py +++ b/mindsdb/integrations/handlers/huggingface_handler/tests/test_huggingface.py @@ -40,9 +40,7 @@ def hf_test_run(self, mock_handler, model_name, create_sql, predict_sql): # wait done = False for attempt in range(900): - ret = self.run_sql( - f"select status from mindsdb.models where name='{model_name}'" - ) + ret = self.run_sql(f"select status from mindsdb.models where name='{model_name}'") data = ret.data.to_lists() if len(data) > 0: if data[0][0] == "complete": @@ -55,9 +53,7 @@ def hf_test_run(self, mock_handler, model_name, create_sql, predict_sql): raise RuntimeError("predictor not created") # use predictor - ret = self.command_executor.execute_command( - parse_sql(predict_sql) - ) + ret = self.command_executor.execute_command(parse_sql(predict_sql)) assert ret.error_code is None @patch("mindsdb.integrations.handlers.postgres_handler.Handler") @@ -90,9 +86,7 @@ def test_hf_classification_bin(self, mock_handler): where text_spammy= 'It is the best time to launch the Robot to get more money. https:\\/\\/Gof.bode-roesch.de\\/Gof' """ # use predictor - ret = self.command_executor.execute_command( - parse_sql(predict_sql) - ) + ret = self.command_executor.execute_command(parse_sql(predict_sql)) assert ret.error_code is None @patch("mindsdb.integrations.handlers.postgres_handler.Handler") @@ -244,9 +238,7 @@ def test_hf_text_classification_finetune(self, mock_handler): where text_spammy= 'It is the best time to launch the Robot to get more money. https:\\/\\/Gof.bode-roesch.de\\/Gof' """ # use predictor - ret = self.command_executor.execute_command( - parse_sql(predict_sql) - ) + ret = self.command_executor.execute_command(parse_sql(predict_sql)) assert ret.error_code is None # fine tune @@ -260,9 +252,7 @@ def test_hf_text_classification_finetune(self, mock_handler): tokenizer_from = 'bert-base-uncased'; """ - ret = self.command_executor.execute_command( - parse_sql(fine_tune_sql) - ) + ret = self.command_executor.execute_command(parse_sql(fine_tune_sql)) assert ret.error_code is None @@ -297,9 +287,7 @@ def test_hf_zero_shot_classification_finetune(self, mock_handler): FROM pg (SELECT label, hypothesis FROM df); """ - ret = self.command_executor.execute_command( - parse_sql(fine_tune_sql) - ) + ret = self.command_executor.execute_command(parse_sql(fine_tune_sql)) assert ret.error_code is None @@ -335,9 +323,7 @@ def test_hf_translation_finetune(self, mock_handler): FROM pg (SELECT text_long, transl FROM df); """ - ret = self.command_executor.execute_command( - parse_sql(fine_tune_sql) - ) + ret = self.command_executor.execute_command(parse_sql(fine_tune_sql)) assert ret.error_code is None @@ -374,8 +360,6 @@ def test_hf_summarization_finetune(self, mock_handler): ); """ - ret = self.command_executor.execute_command( - parse_sql(fine_tune_sql) - ) + ret = self.command_executor.execute_command(parse_sql(fine_tune_sql)) assert ret.error_code is None diff --git a/mindsdb/integrations/handlers/ibm_cos_handler/README.md b/mindsdb/integrations/handlers/ibm_cos_handler/README.md deleted file mode 100644 index ab330c4b34d..00000000000 --- a/mindsdb/integrations/handlers/ibm_cos_handler/README.md +++ /dev/null @@ -1,98 +0,0 @@ ---- -title: IBM COS -sidebarTitle: IBM Cloud Object Storage ---- - -This documentation describes the integration of MindsDB with [IBM COS](https://cloud.ibm.com/docs/cloud-object-storage?topic=cloud-object-storage-getting-started-cloud-object-storage), an object storage service that offers industry-leading scalability, data availability, security, and performance. - -## Prerequisites - -Before proceeding, ensure that MindsDB is installed locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). - -## Connection - -Establish a connection to your IBM COS buckets from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE ibm_datasource -WITH ENGINE = 'ibm_cos', - PARAMETERS = { - 'cos_hmac_access_key_id': 'your-access-key-id', - 'cos_hmac_secret_access_key': 'your-secret-access-key', - 'cos_endpoint_url': 'https://s3.eu-gb.cloud-object-storage.appdomain.cloud', - 'bucket': 'your-bucket-name' -- Not required - }; -``` - - -Note that sample parameter values are provided here for reference, and you should replace them with your connection parameters. - - -Required connection parameters include the following: - -- `cos_hmac_access_key_id`: The IBM COS access key that identifies the user or IAM role. -- `cos_hmac_secret_access_key`: The IBM COS secret access key that identifies the user or IAM role. -- `cos_endpoint_url`: The IBM COS resource ID for your cloud Object Storage. - -Optional connection parameters include the following: - -- `bucket`: The name of the IBM COS bucket. If not provided, all available buckets can be queried, however, this can affect performance, especially when listing all of the available objects. - -## Usage - -Retrieve data from a specified object (file) in a IBM COS bucket by providing the integration name and the object key: - -```sql -SELECT * -FROM ibm_datasource.`my-file.csv`; -LIMIT 10; -``` - - -If a bucket name is provided in the `CREATE DATABASE` command, querying will be limited to that bucket and the bucket name can be ommitted from the object key as shown in the example above. However, if the bucket name is not provided, the object key must include the bucket name, such as `ibm_datasource.`my-bucket/my-folder/my-file.csv`. - -Wrap the object key in backticks (\`) to avoid any issues parsing the SQL statements provided. This is especially important when the object key contains spaces, special characters or prefixes, such as `my-folder/my-file.csv`. - -At the moment, the supported file formats are CSV, TSV, JSON, and Parquet. - - - -The above examples utilize `ibm_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -The special `files` table can be used to list all objects available in the specified bucket or all buckets if the bucket name is not provided: - -```sql -SELECT * -FROM ibm_datasource.files LIMIT 10 -``` - -The content of files can also be retrieved by explicitly requesting the `content` column. This column is empty by default to avoid unnecessary data transfer: - -```sql -SELECT path, content -FROM ibm_datasource.files LIMIT 10 -``` - - -This table will return all objects regardless of the file format, however, only the supported file formats mentioned above can be queried. - - -## Troubleshooting Guide - - -`Database Connection Error` - -- **Symptoms**: Failure to connect MindsDB with the Amazon S3 bucket. -- **Checklist**: 1. Make sure the IBM COS bucket exists. 2. Confirm that provided IBM COS credentials are correct. Try making a direct connection to the IBM COS bucket using the IBM CLI. 3. Ensure a stable network between MindsDB and IBM COS. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -- **Symptoms**: SQL queries failing or not recognizing object names containing spaces, special characters or prefixes. -- **Checklist**: 1. Ensure object names with spaces, special characters or prefixes are enclosed in backticks. 2. Examples: - _ Incorrect: SELECT _ FROM integration.travel/travel_data.csv - - Incorrect: SELECT _ FROM integration.'travel/travel_data.csv' - _ Correct: SELECT \_ FROM integration.\`travel/travel_data.csv\` - diff --git a/mindsdb/integrations/handlers/ibm_cos_handler/__about__.py b/mindsdb/integrations/handlers/ibm_cos_handler/__about__.py deleted file mode 100644 index c0197eaec7e..00000000000 --- a/mindsdb/integrations/handlers/ibm_cos_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB IBM COS Handler" -__package_name__ = "mindsdb_ibm_cos_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for IBM Cloud Object Storage" -__author__ = "Ton Hoang Nguyen (Bill)" -__github__ = "'https://github.com/mindsdb/mindsdb'" -__pypi__ = "'https://github.com/mindsdb/mindsdb'" -__license__ = "MIT" -__copyright__ = "Copyright 2024 - mindsdb" diff --git a/mindsdb/integrations/handlers/ibm_cos_handler/__init__.py b/mindsdb/integrations/handlers/ibm_cos_handler/__init__.py deleted file mode 100644 index d3dac2794bd..00000000000 --- a/mindsdb/integrations/handlers/ibm_cos_handler/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example - -try: - from .ibm_cos_handler import IBMCloudObjectStorageHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "IBM Cloud Object Storage" -name = "ibm_cos" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/ibm_cos_handler/connection_args.py b/mindsdb/integrations/handlers/ibm_cos_handler/connection_args.py deleted file mode 100644 index 9fa875fc015..00000000000 --- a/mindsdb/integrations/handlers/ibm_cos_handler/connection_args.py +++ /dev/null @@ -1,38 +0,0 @@ -from collections import OrderedDict -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - -connection_args = OrderedDict( - cos_hmac_access_key_id={ - "type": ARG_TYPE.PWD, - "description": "IBM COS HMAC Access Key ID.", - "required": True, - "label": "HMAC Access Key ID", - "secret": True, - }, - cos_hmac_secret_access_key={ - "type": ARG_TYPE.PWD, - "description": "IBM COS HMAC Secret Access Key.", - "required": True, - "label": "HMAC Secret Access Key", - "secret": True, - }, - cos_endpoint_url={ - "type": ARG_TYPE.STR, - "description": "IBM COS Endpoint URL (e.g., https://s3.eu-gb.cloud-object-storage.appdomain.cloud).", - "required": True, - "label": "Endpoint URL", - }, - bucket={ - "type": ARG_TYPE.STR, - "description": "IBM COS Bucket Name (Optional).", - "required": False, - "label": "Bucket Name", - }, -) - -connection_args_example = OrderedDict( - cos_hmac_access_key_id="YOUR_HMAC_ACCESS_KEY_ID", - cos_hmac_secret_access_key="YOUR_HMAC_SECRET_ACCESS_KEY", - cos_endpoint_url="https://s3.eu-gb.cloud-object-storage.appdomain.cloud", - bucket="YOUR_BUCKET_NAME", -) diff --git a/mindsdb/integrations/handlers/ibm_cos_handler/ibm_cos_handler.py b/mindsdb/integrations/handlers/ibm_cos_handler/ibm_cos_handler.py deleted file mode 100644 index 98bd363e09c..00000000000 --- a/mindsdb/integrations/handlers/ibm_cos_handler/ibm_cos_handler.py +++ /dev/null @@ -1,318 +0,0 @@ -from contextlib import contextmanager -from typing import Text, Dict, Optional, List - -import ibm_boto3 -from ibm_botocore.client import ClientError -import pandas as pd -import duckdb - -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.ast import Select, Identifier, Insert, Star, Constant - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) - -from mindsdb.integrations.libs.api_handler import APIResource, APIHandler -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator - -logger = log.getLogger(__name__) - - -class ListFilesTable(APIResource): - def list( - self, - targets: List[str] = None, - conditions: List[FilterCondition] = None, - limit: int = None, - *args, - **kwargs, - ) -> pd.DataFrame: - buckets = None - for condition in conditions: - if condition.column == "bucket": - if condition.op == FilterOperator.IN: - buckets = condition.value - elif condition.op == FilterOperator.EQUAL: - buckets = [condition.value] - condition.applied = True - - data = [] - for obj in self.handler.get_objects(limit=limit, buckets=buckets): - path = obj["Key"] - if obj["Filename"].split(".")[1] in self.handler.supported_file_formats: - item = { - "path": path, - "bucket": obj["Bucket"], - "name": path[path.rfind("/") + 1 :], - "extension": path[path.rfind(".") + 1 :], - } - - data.append(item) - - return pd.DataFrame(data=data, columns=self.get_columns()) - - def get_columns(self) -> List[str]: - return ["path", "name", "extension", "bucket", "content"] - - -class FileTable(APIResource): - def list(self, targets: List[str] = None, table_name=None, *args, **kwargs) -> pd.DataFrame: - return self.handler.read_as_table(table_name) - - def add(self, data, table_name=None): - df = pd.DataFrame(data) - return self.handler.add_data_to_table(table_name, df) - - -class IBMCloudObjectStorageHandler(APIHandler): - name = "ibm_cos" - supported_file_formats = ["csv", "tsv", "json", "parquet"] - - def __init__(self, name: Text, connection_data: Optional[Dict] = None, **kwargs): - super().__init__(name) - self.connection_data = connection_data or {} - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - self.cache_thread_safe = True - self._regions = {} - - self.bucket = self.connection_data.get("bucket") - self._files_table = ListFilesTable(self) - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self): - if self.is_connected is True: - return self.connection - - required_params = [ - "cos_hmac_access_key_id", - "cos_hmac_secret_access_key", - "cos_endpoint_url", - ] - if not all(key in self.connection_data for key in required_params): - raise ValueError( - "Required parameters (cos_hmac_access_key_id, cos_hmac_secret_access_key, cos_endpoint_url) must be provided." - ) - - self.connection = self._connect_ibm_boto3() - self.is_connected = True - - return self.connection - - def _connect_ibm_boto3(self) -> ibm_boto3.client: - config = { - "aws_access_key_id": self.connection_data["cos_hmac_access_key_id"], - "aws_secret_access_key": self.connection_data["cos_hmac_secret_access_key"], - "endpoint_url": self.connection_data["cos_endpoint_url"], - } - - client = ibm_boto3.client("s3", **config) - - if self.bucket is not None: - client.head_bucket(Bucket=self.bucket) - else: - client.list_buckets() - - return client - - def disconnect(self): - if not self.is_connected: - return - self.connection = None - self.is_connected = False - - def check_connection(self) -> StatusResponse: - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self._connect_ibm_boto3() - response.success = True - except (ClientError, ValueError) as e: - logger.error(f"Error connecting to IBM COS with the given credentials, {e}!") - response.error_message = str(e) - - if response.success and need_to_close: - self.disconnect() - - elif not response.success and self.is_connected: - self.is_connected = False - - return response - - @contextmanager - def _connect_duckdb(self): - duckdb_conn = duckdb.connect(":memory:") - duckdb_conn.execute("INSTALL httpfs") - duckdb_conn.execute("LOAD httpfs") - - duckdb_conn.execute(f"SET s3_access_key_id='{self.connection_data['cos_hmac_access_key_id']}'") - duckdb_conn.execute(f"SET s3_secret_access_key='{self.connection_data['cos_hmac_secret_access_key']}'") - - endpoint_url = self.connection_data["cos_endpoint_url"] - if endpoint_url.startswith("https://"): - endpoint_url = endpoint_url[len("https://") :] - elif endpoint_url.startswith("http://"): - endpoint_url = endpoint_url[len("http://") :] - - duckdb_conn.execute(f"SET s3_endpoint='{endpoint_url}'") - duckdb_conn.execute("SET s3_url_style='path'") - duckdb_conn.execute("SET s3_use_ssl=true") - - try: - yield duckdb_conn - finally: - duckdb_conn.close() - - def _get_bucket(self, key): - if self.bucket is not None: - return self.bucket, key - - ar = key.split("/") - return ar[0], "/".join(ar[1:]) - - def read_as_table(self, key) -> pd.DataFrame: - bucket, key = self._get_bucket(key) - - with self._connect_duckdb() as connection: - cursor = connection.execute(f"SELECT * FROM 's3://{bucket}/{key}'") - - return cursor.fetchdf() - - def _read_as_content(self, key) -> None: - bucket, key = self._get_bucket(key) - - client = self.connect() - - obj = client.get_object(Bucket=bucket, Key=key) - content = obj["Body"].read() - return content - - def add_data_to_table(self, key, df) -> None: - bucket, key = self._get_bucket(key) - - try: - client = self.connect() - client.head_object(Bucket=bucket, Key=key) - except ClientError as e: - logger.error(f"Error querying the file {key} in the bucket {bucket}, {e}!") - raise e - - with self._connect_duckdb() as connection: - connection.execute(f"CREATE TABLE tmp_table AS SELECT * FROM 's3://{bucket}/{key}'") - - connection.execute("INSERT INTO tmp_table BY NAME SELECT * FROM df") - - connection.execute(f"COPY tmp_table TO 's3://{bucket}/{key}'") - - def query(self, query: ASTNode) -> Response: - self.connect() - if isinstance(query, Select): - table_name = query.from_table.parts[-1] - - if table_name == "files": - table = self._files_table - df = table.select(query) - - has_content = False - for target in query.targets: - if isinstance(target, Identifier) and target.parts[-1].lower() == "content": - has_content = True - break - if has_content: - df["content"] = df["path"].apply(self._read_as_content) - else: - extension = table_name.split(".")[-1] - if extension not in self.supported_file_formats: - logger.error(f"The file format {extension} is not supported!") - raise ValueError(f"The file format {extension} is not supported!") - - table = FileTable(self, table_name=table_name) - df = table.select(query) - - response = Response(RESPONSE_TYPE.TABLE, data_frame=df) - elif isinstance(query, Insert): - table_name = query.table.parts[-1] - table = FileTable(self, table_name=table_name) - table.insert(query) - response = Response(RESPONSE_TYPE.OK) - else: - raise NotImplementedError - - return response - - def get_objects(self, limit=None, buckets=None) -> List[dict]: - client = self.connect() - if self.bucket is not None: - add_bucket_to_name = False - scan_buckets = [self.bucket] - else: - add_bucket_to_name = True - resp = client.list_buckets() - scan_buckets = [b["Name"] for b in resp["Buckets"]] - - objects = [] - for bucket in scan_buckets: - if buckets is not None and bucket not in buckets: - continue - - resp = client.list_objects_v2(Bucket=bucket) - if "Contents" not in resp: - continue - - for obj in resp["Contents"]: - obj["Bucket"] = bucket - obj["Filename"] = obj["Key"] - if add_bucket_to_name: - obj["Key"] = f"{bucket}/{obj['Key']}" - objects.append(obj) - if limit is not None and len(objects) >= limit: - break - - return objects - - def get_tables(self) -> Response: - supported_names = [ - f"{obj['Key']}" for obj in self.get_objects() if obj["Key"].split(".")[-1] in self.supported_file_formats - ] - - supported_names.insert(0, "files") - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame(supported_names, columns=["table_name"]), - ) - - return response - - def get_columns(self, table_name: str) -> Response: - query = Select( - targets=[Star()], - from_table=Identifier(parts=[table_name]), - limit=Constant(1), - ) - - result = self.query(query) - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - { - "column_name": result.data_frame.columns, - "data_type": [ - str(dtype) if str(dtype) != "object" else "string" for dtype in result.data_frame.dtypes - ], - } - ), - ) - - return response diff --git a/mindsdb/integrations/handlers/ibm_cos_handler/icon.svg b/mindsdb/integrations/handlers/ibm_cos_handler/icon.svg deleted file mode 100644 index f8574f62860..00000000000 --- a/mindsdb/integrations/handlers/ibm_cos_handler/icon.svg +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mindsdb/integrations/handlers/ibm_cos_handler/requirements.txt b/mindsdb/integrations/handlers/ibm_cos_handler/requirements.txt deleted file mode 100644 index e64c5eb3812..00000000000 --- a/mindsdb/integrations/handlers/ibm_cos_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -ibm-cos-sdk \ No newline at end of file diff --git a/mindsdb/integrations/handlers/ibm_cos_handler/tests/__init__.py b/mindsdb/integrations/handlers/ibm_cos_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/ibm_cos_handler/tests/test_ibm_cos_handler.py b/mindsdb/integrations/handlers/ibm_cos_handler/tests/test_ibm_cos_handler.py deleted file mode 100644 index 294a698bdfe..00000000000 --- a/mindsdb/integrations/handlers/ibm_cos_handler/tests/test_ibm_cos_handler.py +++ /dev/null @@ -1,23 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.ibm_cos_handler.ibm_cos_handler import ( - IBMCloudObjectStorageHandler, -) - - -class IBMCloudObjectStorageHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "cos_hmac_access_key_id": "your-access-key-id", - "cos_hmac_secret_access_key": "your-secret-access-key", - "cos_endpoint_url": "https://s3.eu-gb.cloud-object-storage.appdomain.cloud", - "bucket": "your-bucket-name", - } - cls.handler = IBMCloudObjectStorageHandler("test_ibm_cos_handler", cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/ignite_handler/README.md b/mindsdb/integrations/handlers/ignite_handler/README.md deleted file mode 100644 index 8bfdb569a7d..00000000000 --- a/mindsdb/integrations/handlers/ignite_handler/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# Apache Ignite Handler - -This is the implementation of the Apache Ignite handler for MindsDB. - -## Apache Ignite -Apache Ignite is a distributed database for high-performance computing with in-memory speed. -
-https://ignite.apache.org/docs/latest/ - -## Implementation -This handler was implemented using the `pyignite` library, the Apache Ignite thin (binary protocol) client for Python. - -The required arguments to establish a connection are, -* `host`: the host name or IP address of the Apache Ignite cluster's node. -* `port`: the TCP/IP port of the Apache Ignite cluster's node. Must be an integer. - -There are several optional arguments that can be used as well, -* `username`: the username used to authenticate with the Apache Ignite cluster. This parameter is optional. Default: None. -* `password`: the password to authenticate the user with the Apache Ignite cluster. This parameter is optional. Default: None. -* `schema`: the schema to use for the connection to the Apache Ignite cluster. This parameter is optional. Default: PUBLIC. - -## Usage -In order to make use of this handler and connect to an Apache Ignite cluster in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE ignite_datasource -WITH ENGINE = 'ignite', -PARAMETERS = { - "host": "127.0.0.1", - "port": 10800, - "username": "admin", - "password": "password", - "schema": "example_schema" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM ignite_datasource.example_tbl -~~~~ - -At the moment, a connection can be established to only a single node in the cluster. Steps will be taken in the future, to configure the client to automatically fail over to another node if the connection to the current node fails or times out by providing the hosts and ports for many nodes as explained here, -
-https://ignite.apache.org/docs/latest/thin-clients/python-thin-client diff --git a/mindsdb/integrations/handlers/ignite_handler/__about__.py b/mindsdb/integrations/handlers/ignite_handler/__about__.py deleted file mode 100644 index 589d5e063d6..00000000000 --- a/mindsdb/integrations/handlers/ignite_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Apache Ignite handler' -__package_name__ = 'mindsdb_ignite_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Apache Ignite" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/ignite_handler/__init__.py b/mindsdb/integrations/handlers/ignite_handler/__init__.py deleted file mode 100644 index 655bbbd1e26..00000000000 --- a/mindsdb/integrations/handlers/ignite_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .ignite_handler import IgniteHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Apache Ignite' -name = 'ignite' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/ignite_handler/connection_args.py b/mindsdb/integrations/handlers/ignite_handler/connection_args.py deleted file mode 100644 index b2a1d9d47f6..00000000000 --- a/mindsdb/integrations/handlers/ignite_handler/connection_args.py +++ /dev/null @@ -1,46 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': "The host name or IP address of the Apache Ignite cluster's node.", - 'required': True, - 'label': 'Host' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': "The TCP/IP port of the Apache Ignite cluster's node. Must be an integer.", - 'required': True, - 'label': 'Port' - }, - username={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Apache Ignite cluster. This parameter is optional. Default: None.', - 'required': True, - 'label': 'User' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the Apache Ignite cluster. This parameter is optional. Default: None.', - 'required': True, - 'label': 'Password', - 'secret': True - }, - schema={ - 'type': ARG_TYPE.STR, - 'description': 'Schema to use for the connection to the Apache Ignite cluster. This parameter is optional. Default: PUBLIC.', - 'required': True, - 'label': 'Schema' - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port='10800', - username='root', - password='password', - schema='schema' -) diff --git a/mindsdb/integrations/handlers/ignite_handler/icon.svg b/mindsdb/integrations/handlers/ignite_handler/icon.svg deleted file mode 100644 index 88946fe2a82..00000000000 --- a/mindsdb/integrations/handlers/ignite_handler/icon.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/ignite_handler/ignite_handler.py b/mindsdb/integrations/handlers/ignite_handler/ignite_handler.py deleted file mode 100644 index ed5a758e094..00000000000 --- a/mindsdb/integrations/handlers/ignite_handler/ignite_handler.py +++ /dev/null @@ -1,208 +0,0 @@ -from typing import Optional - -from pyignite import Client -import pandas as pd - -from mindsdb_sql_parser import parse_sql -from mindsdb.integrations.libs.base import DatabaseHandler - -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - - -logger = log.getLogger(__name__) - - -class IgniteHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Apache Ignite statements. - """ - - name = 'ignite' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = 'ignite' - - optional_parameters = ['username', 'password', 'schema'] - for parameter in optional_parameters: - if parameter not in connection_data: - connection_data[parameter] = None - - self.connection_data = connection_data - self.kwargs = kwargs - - self.client = None - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - if self.is_connected is True: - return self.connection - - self.client = Client( - username=self.connection_data['username'], - password=self.connection_data['password'] - ) - - try: - port = int(self.connection_data['port']) - except ValueError: - raise ValueError("Invalid port number") - - nodes = [(self.connection_data['host'], port)] - self.connection = self.client.connect(nodes) - self.is_connected = True - - return self.client, self.connection - - def disconnect(self): - """ - Close any existing connections. - """ - - if self.is_connected is False: - return - - self.client.close() - self.is_connected = False - return self.is_connected - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error('Error connecting to Apache Ignite!') - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - client, connection = self.connect() - - try: - with connection: - with client.sql(query, include_field_names=True, schema=self.connection_data['schema']) as cursor: - result = list(cursor) - if result and result[0][0] != 'UPDATED': - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result[1:], - columns=result[0] - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f'Error running query: {query} on Apache Ignite!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - cursor.close() - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - - if isinstance(query, ASTNode): - query_str = query.to_string() - else: - query_str = str(query) - - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - - query = """ - SELECT TABLE_NAME FROM SYS.TABLES - """ - result = self.native_query(query) - df = result.data_frame - result.data_frame = df.rename(columns={df.columns[0]: 'table_name'}) - return result - - def get_columns(self, table_name: str) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - - query = f""" - SELECT COLUMN_NAME, TYPE FROM SYS.TABLE_COLUMNS WHERE TABLE_NAME = '{table_name.upper()}' - """ - result = self.native_query(query) - df = result.data_frame - df['TYPE'] = df.apply(lambda row: row['TYPE'].split('.')[-1], axis=1) - df = df.iloc[2:] - result.data_frame = df.rename(columns={'COLUMN_NAME': 'column_name', 'TYPE': 'data_type'}) - return result diff --git a/mindsdb/integrations/handlers/ignite_handler/requirements.txt b/mindsdb/integrations/handlers/ignite_handler/requirements.txt deleted file mode 100644 index 9f6467fac01..00000000000 --- a/mindsdb/integrations/handlers/ignite_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pyignite \ No newline at end of file diff --git a/mindsdb/integrations/handlers/ignite_handler/tests/__init__.py b/mindsdb/integrations/handlers/ignite_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/ignite_handler/tests/test_ignite_handler.py b/mindsdb/integrations/handlers/ignite_handler/tests/test_ignite_handler.py deleted file mode 100644 index e0ccaf8243f..00000000000 --- a/mindsdb/integrations/handlers/ignite_handler/tests/test_ignite_handler.py +++ /dev/null @@ -1,33 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.ignite_handler.ignite_handler import IgniteHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class IgniteHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": "127.0.0.1", - "port": 10800 - } - cls.handler = IgniteHandler('test_ignite_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM City" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_4_get_columns(self): - columns = self.handler.get_columns('City') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/impala_handler/README.md b/mindsdb/integrations/handlers/impala_handler/README.md deleted file mode 100644 index 1eaf2b5b93f..00000000000 --- a/mindsdb/integrations/handlers/impala_handler/README.md +++ /dev/null @@ -1,37 +0,0 @@ -# Impala Handler - -This is the implementation of the Impala handler for MindsDB. - -## Impala -Apache Impala is a MPP (Massive Parallel Processing) SQL query engine for processing huge volumes of data that is stored in Hadoop cluster. It is an open source software which is written in C++ and Java. It provides high performance and low latency compared to other SQL engines for Hadoop. -In other words, Impala is the highest performing SQL engine (giving RDBMS-like experience) which provides the fastest way to access data that is stored in Hadoop Distributed File System. - -## Implementation -This handler was implemented using the `impyla`, a Python library that allows you to use Python code to run SQL commands on Impala. - -The required arguments to establish a connection are, -* `user`: username asscociated with database -* `password`: password to authenticate your access -* `host`: host to server IP Address or hostname -* `port`: port through which TCP/IP connection is to be made -* `database`: Database name to be connected - -## Usage -In order to make use of this handler and connect to Impala in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE impala_datasource -WITH -engine='impala', -parameters={ - "user":"root", - "password":"p@55w0rd", - "host":"127.0.0.1", - "port":21050, - "database":"Db_NamE" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM impala_datasource.TEST; -~~~~ diff --git a/mindsdb/integrations/handlers/impala_handler/__about__.py b/mindsdb/integrations/handlers/impala_handler/__about__.py deleted file mode 100644 index c5acddd77a5..00000000000 --- a/mindsdb/integrations/handlers/impala_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Impala handler' -__package_name__ = 'mindsdb_impala_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Impala" -__author__ = 'Parthiv Makwana' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/impala_handler/__init__.py b/mindsdb/integrations/handlers/impala_handler/__init__.py deleted file mode 100644 index 11c1b120ad7..00000000000 --- a/mindsdb/integrations/handlers/impala_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .impala_handler import ImpalaHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Apache Impala' -name = 'impala' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/impala_handler/connection_args.py b/mindsdb/integrations/handlers/impala_handler/connection_args.py deleted file mode 100644 index b47ce677550..00000000000 --- a/mindsdb/integrations/handlers/impala_handler/connection_args.py +++ /dev/null @@ -1,37 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Impala server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the Impala server.', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the Impala server.' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Impala server.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the Impala server. Must be an integer. Default is 21050' - } - -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=21050, - user='USERNAME', - password='P@55W0Rd', - database='D4t4bA5e' -) diff --git a/mindsdb/integrations/handlers/impala_handler/icon.svg b/mindsdb/integrations/handlers/impala_handler/icon.svg deleted file mode 100644 index d2170bea0d8..00000000000 --- a/mindsdb/integrations/handlers/impala_handler/icon.svg +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/impala_handler/impala_handler.py b/mindsdb/integrations/handlers/impala_handler/impala_handler.py deleted file mode 100644 index f27c11e8446..00000000000 --- a/mindsdb/integrations/handlers/impala_handler/impala_handler.py +++ /dev/null @@ -1,153 +0,0 @@ -from typing import Optional - -import pandas as pd -from impala import dbapi as db, sqlalchemy as SA - -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - - -logger = log.getLogger(__name__) - - -class ImpalaHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Impala statements. - """ - - name = 'impala' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - super().__init__(name) - - self.parser = parse_sql - self.dialect = 'impala' - self.kwargs = kwargs - self.connection_data = connection_data - - self.connection = None - self.is_connected = False - - def connect(self): - if self.is_connected is True: - return self.connection - - config = { - 'host': self.connection_data.get('host'), - 'port': self.connection_data.get('port', 21050), - 'user': self.connection_data.get('user'), - 'password': self.connection_data.get('password'), - 'database': self.connection_data.get('database'), - - - } - - connection = db.connect(**config) - self.is_connected = True - self.connection = connection - return self.connection - - def disconnect(self): - if self.is_connected is False: - return - self.connection.close() - self.is_connected = False - return - - def check_connection(self) -> StatusResponse: - - result = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - connection = self.connect() - result.success = connection is not None - except Exception as e: - logger.error(f'x x x Error connecting to Impala {self.connection_data["database"]}, {e}!') - result.error_message = str(e) - - if result.success is True and need_to_close: - self.disconnect() - if result.success is False and self.is_connected is True: - self.is_connected = False - - return result - - def native_query(self, query: str) -> Response: - """ - Receive SQL query and runs it - :param query: The SQL query to run in Impala - :return: returns the records from the current recordset - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - with connection.cursor() as cur: - try: - cur.execute(query) - result = cur.fetchall() - if cur.has_result_set: - - response = Response( - RESPONSE_TYPE.TABLE, - pd.DataFrame( - result, - columns=[x[0] for x in cur.description] - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except Exception as e: - logger.error(f'Error running query: {query} on {self.connection_data["database"]}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - # connection.rollback() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Retrieve the data from the SQL statement. - """ - renderer = SqlalchemyRender(SA.ImpalaDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Get a list with all of the tabels in Impala - """ - q = "SHOW TABLES;" - result = self.native_query(q) - df = result.data_frame.rename(columns={'name': 'TABLE_NAME'}) - result.data_frame = df - - return result - - def get_columns(self, table_name: str) -> Response: - """ - Show details about the table - """ - q = f"DESCRIBE {table_name};" - - result = self.native_query(q) - df = result.data_frame.iloc[:, 0:2].rename(columns={'name': 'COLUMN_NAME', 'type': 'Data_Type'}) - result.data_frame = df - - return result diff --git a/mindsdb/integrations/handlers/impala_handler/requirements.txt b/mindsdb/integrations/handlers/impala_handler/requirements.txt deleted file mode 100644 index ce652f7b860..00000000000 --- a/mindsdb/integrations/handlers/impala_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -impyla \ No newline at end of file diff --git a/mindsdb/integrations/handlers/impala_handler/tests/__init__.py b/mindsdb/integrations/handlers/impala_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/impala_handler/tests/test_impala_handler.py b/mindsdb/integrations/handlers/impala_handler/tests/test_impala_handler.py deleted file mode 100644 index 98ac11b3426..00000000000 --- a/mindsdb/integrations/handlers/impala_handler/tests/test_impala_handler.py +++ /dev/null @@ -1,50 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.impala_handler.impala_handler import ImpalaHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class ImpalaHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - 'user': '', - 'password': '', - 'host': '127.0.0.1', - 'port': 21050, - 'database': 'temp' - - } - cls.handler = ImpalaHandler('test_impala_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_connect(self): - assert self.handler.connect() - - def test_2_create_table(self): - query = "CREATE Table Car(Name Varchar, Price Integer);" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_3_insert(self): - query = "INSERT INTO Car ('Tata SUV', 860000)" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_4_native_query_select(self): - query = "SELECT * FROM Car;" - result = self.handler.query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is RESPONSE_TYPE.TABLE - - def test_6_get_columns(self): - columns = self.handler.get_columns('Car') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/influxdb_handler/README.md b/mindsdb/integrations/handlers/influxdb_handler/README.md deleted file mode 100644 index e8723561320..00000000000 --- a/mindsdb/integrations/handlers/influxdb_handler/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# InfluxDB Handler -This is the implementation of the InfluxDB handler for MindsDB. - -## InfluxDB -In short, InfluxDB is a time series database that can be used to collect data & monitor the system & devices, especially Edge devices. -In this handler, influxdb 1.x api is used and more information about this api can be found (here)[https://docs.influxdata.com/influxdb/v1.7/query_language/schema_exploration/] - -Please follow this (link)[https://docs.influxdata.com/influxdb/cloud/security/tokens/create-token/#create-a-token-in-the-influxdb-ui] to generate token for accessing InfluxDB API - - -## Implementation -This handler was implemented as per the (Application Handler framework)[https://docs.mindsdb.com/contribute/app-handlers] - -The required arguments to establish a connection are, -* `influxdb_url` : Hosted url of InfluxDB Cloud -* `influxdb_token`: Authentication token for the hosted influxdb cloud instance -* `influxdb_db_name`: Database name of the influxdb cloud instance -* `influxdb_table_name`: Table name of the influxdb cloud instance -* `org` : Organisation of the influxdb cloud instance - - -## Install Dependencies - -``` -pip install influxdb3-python - -``` -## Usage -In order to make use of this handler and connect to an Jira in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE influxdb_source -WITH -engine='influxdb', -parameters={ - "influxdb_url": "", - "influxdb_token": "", - "influxdb_table_name": "", - "org": "Organisation" -}; -~~~~ - -For querying different tables, you need to create another database with `influxdb` handler as engine & mention the appropriate database & table name in the following parameters `influxdb_db_name` & `influxdb_table_name` - -Now, you can use this established connection to query your table as follows, -~~~~sql -SELECT * FROM influxdb_source.tables -~~~~ - -Advanced queries for the InfluxDB Handler -~~~~sql -SELECT name,time,sensor_id,temperature -FROM influxdb_source5.tables -ORDER BY temperature DESC -LIMIT 65; -~~~~ diff --git a/mindsdb/integrations/handlers/influxdb_handler/__about__.py b/mindsdb/integrations/handlers/influxdb_handler/__about__.py deleted file mode 100644 index 45948f6374f..00000000000 --- a/mindsdb/integrations/handlers/influxdb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB InfluxDB handler' -__package_name__ = 'mindsdb_influxdb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for InfluxDB" -__author__ = 'Balaji Seetharaman' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/influxdb_handler/__init__.py b/mindsdb/integrations/handlers/influxdb_handler/__init__.py deleted file mode 100644 index 6a2c8af8ffd..00000000000 --- a/mindsdb/integrations/handlers/influxdb_handler/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .influxdb_handler import InfluxDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'InfluxDB' -name = 'influxdb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/influxdb_handler/icon.svg b/mindsdb/integrations/handlers/influxdb_handler/icon.svg deleted file mode 100644 index 986e9b29e07..00000000000 --- a/mindsdb/integrations/handlers/influxdb_handler/icon.svg +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/influxdb_handler/influxdb_handler.py b/mindsdb/integrations/handlers/influxdb_handler/influxdb_handler.py deleted file mode 100644 index c7678f248fd..00000000000 --- a/mindsdb/integrations/handlers/influxdb_handler/influxdb_handler.py +++ /dev/null @@ -1,102 +0,0 @@ -from influxdb_client_3 import InfluxDBClient3 -from mindsdb.integrations.handlers.influxdb_handler.influxdb_tables import InfluxDBTables -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - - -logger = log.getLogger(__name__) - - -class InfluxDBHandler(APIHandler): - """InfluxDB handler implementation""" - - def __init__(self, name=None, **kwargs): - """Initialize the InfluxDB handler. - Parameters - ---------- - name : str - name of a handler instance - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - - self.parser = parse_sql - self.dialect = 'influxdb' - self.connection_data = connection_data - self.kwargs = kwargs - self.connection = None - self.is_connected = False - - influxdb_tables_data = InfluxDBTables(self) - self._register_table("tables", influxdb_tables_data) - - def connect(self): - """Set up the connection required by the handler. - Returns - ------- - None - - Raises Expection if ping check fails - """ - - if self.is_connected is True: - return self.connection - - self.connection = InfluxDBClient3(host=self.connection_data['influxdb_url'], token=self.connection_data['influxdb_token'], org=self.connection_data.get('org')) - - self.is_connected = True - - return self.connection - - def check_connection(self) -> StatusResponse: - """Check connection to the handler. - Returns - ------- - StatusResponse - Status confirmation - """ - response = StatusResponse(False) - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f"Error connecting to InfluxDB API: {e}!") - response.error_message = e - - self.is_connected = response.success - - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - Parameters - ---------- - query : str - query in a native format - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) - - def call_influxdb_tables(self, query): - """Pulls all the records from the given InfluxDB table and returns it select() - - Returns - ------- - pd.DataFrame of all the records of the particular InfluxDB - """ - influx_connection = self.connect() - if query is None: - query = 'SELECT * FROM ' + f"{self.connection_data['influxdb_table_name']}" - - table = influx_connection.query(query=query, database=self.connection_data['influxdb_db_name'], language='sql') - return table.to_pandas() diff --git a/mindsdb/integrations/handlers/influxdb_handler/influxdb_tables.py b/mindsdb/integrations/handlers/influxdb_handler/influxdb_tables.py deleted file mode 100644 index 5033cacd20e..00000000000 --- a/mindsdb/integrations/handlers/influxdb_handler/influxdb_tables.py +++ /dev/null @@ -1,81 +0,0 @@ -import pandas as pd - -from typing import List - -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.utilities import log - -from mindsdb_sql_parser import ast -from mindsdb.integrations.utilities.handlers.query_utilities.select_query_utilities import SELECTQueryParser - - -logger = log.getLogger(__name__) - - -class InfluxDBTables(APITable): - """InfluxDB Tables implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the InfluxDB "query" API endpoint - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - Returns - ------- - pd.DataFrame of particular InfluxDB table matching the query - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - table_name = self.handler.connection_data['influxdb_table_name'] - select_statement_parser = SELECTQueryParser( - query, - "tables", - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, _ = select_statement_parser.parse_query() - - try: - selected_columns.remove("name") - selected_columns.remove("tags") - except Exception as e: - logger.warn(e) - - formatted_query = self.get_select_query(table_name, selected_columns, where_conditions, order_by_conditions, query.limit) - influxdb_tables_df = self.handler.call_influxdb_tables(formatted_query) - - return influxdb_tables_df - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - Returns - ------- - List[str] - List of columns - """ - - dataframe = self.handler.call_influxdb_tables(f"SELECT * FROM {self.handler.connection_data['influxdb_table_name']} LIMIT 1") - - return list(dataframe.columns) - - def get_select_query(self, table_name, selected_columns, where_conditions, order_by_conditions, result_limit): - """Gets Well formed Query - Returns - ------- - str - """ - columns = ", ".join([f'"{column}"' for column in selected_columns]) - query = f'SELECT {columns} FROM "{table_name}"' - if (where_conditions is not None and len(where_conditions) > 0): - query += " WHERE " - query += " AND ".join([f"{i[1]} {i[0]} {i[2]}" for i in where_conditions]) - if (order_by_conditions != {} and order_by_conditions['columns'] is not None and len(order_by_conditions['columns']) > 0): - query += " ORDER BY " - query += ", ".join([f'{column_name} {"ASC"if asc else "DESC"}' for column_name, asc in zip(order_by_conditions['columns'], order_by_conditions['ascending'])]) - if (result_limit is not None): - query += f" LIMIT {result_limit}" - query += ";" - return query diff --git a/mindsdb/integrations/handlers/influxdb_handler/requirements.txt b/mindsdb/integrations/handlers/influxdb_handler/requirements.txt deleted file mode 100644 index 37e5f0a2136..00000000000 --- a/mindsdb/integrations/handlers/influxdb_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -influxdb3-python -urllib3>=2.6.3 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/mindsdb/integrations/handlers/informix_handler/README.md b/mindsdb/integrations/handlers/informix_handler/README.md deleted file mode 100644 index e4182045d6f..00000000000 --- a/mindsdb/integrations/handlers/informix_handler/README.md +++ /dev/null @@ -1,149 +0,0 @@ -# IBM Informix Handler - -This is the implementation of the IBM Informix handler for MindsDB. The `IfxPy` library is not currently being published to PyPI. As a workaround, you need to install it directly from the GitHub repository: - -``` -pip install git+https://github.com/OpenInformix/IfxPy#subdirectory=IfxPy -``` - -## IBM Informix - -IBM Informix is a product family within IBM's Information Management division that is centered on several relational database management system (RDBMS) offerings.The Informix server supports the object–relational model and supports (through extensions) data types that are not a part of the SQL standard. The most widely used of these are the JSON, BSON, time series and spatial extensions, which provide both data type support and language extensions that permit high performance domain specific queries and efficient storage for data sets based on semi-structured, time series, and spatial data. - -## Implementation - -This handler was implemented using the `IfxPy/IfxPyDbi`, a Python library that allows you to use Python code to run SQL commands on DB2 Database. - -The required arguments to establish a connection are, -* `user`: username asscociated with database -* `password`: password to authenticate your access -* `host`: host to server IP Address or hostname -* `port`: port through which TCPIP connection is to be made -* `database`: Database name to be connected -* `schema_name`: schema name to get tables -* `server`: Name of server you want connect -* `loging_enabled`: Is loging is enabled or not. Default is True - -## Usage -In order to make use of this handler and connect to Informix in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE informix_datasource -WITH -engine='informix', -parameters={ - "server": "server", - "host": "127.0.0.1", - "port": 9091, - "user": "informix", - "password": "in4mix", - "database": "stores_demo", - "schema_name": "love", - "loging_enabled": False -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM informix_datasource.items; -~~~~ - - -This integration uses IfxPy it is in develpment stage there it can be install using `pip install IfxPy`.But it doesn't work for higher version of python, therfore you have to build it from source. - -
- -# You can use below methods -
- - - -
- Check out For Linux - -Below code download and extracts onedb-ODBC Driver use to make connection - - -```bash - -cd $HOME -mkdir Informix -cd Informix -mkdir -p home/informix/cli -wget https://hcl-onedb.github.io/odbc/OneDB-Linux64-ODBC-Driver.tar -sudo tar xvf OneDB-Linux64-ODBC-Driver.tar -C ./home/informix/cli -rm OneDB-Linux64-ODBC-Driver.tar - -``` - -* After running above command you need to go in `.bashrc` file and add enviroment variable there - -```bash -export INFORMIXDIR=$HOME/Informix/home/informix/cli/onedb-odbc-driver -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}${INFORMIXDIR}/lib:${INFORMIXDIR}/lib/esql:${INFORMIXDIR}/lib/cli -``` -* Now you are done with setting Enviroment variable. -* Running below command clone IfxPy repo , build a wheel and install it . - -```bash - -pip install wheel -mkdir Temp -cd Temp -git clone https://github.com/OpenInformix/IfxPy.git -cd IfxPy/IfxPy -python setup.py bdist_wheel -pip install --find-links=./dist IfxPy -cd .. -cd .. -cd .. -rm -rf Temp - - - -``` - - -
- -
- Check out For Windows - -> Run Below Given Commands in CMD - -```cmd - cd $HOME -mkdir Informix -cd Informix -mkdir /home/informix/cli -wget https://hcl-onedb.github.io/odbc/OneDB-Win64-ODBC-Driver.zip -tar xvf OneDB-Win64-ODBC-Driver.zip -C ./home/informix/cli -del OneDB-Win64-ODBC-Driver.zip -``` - -* Above code will Download, Extract OneDB ODBC zip file. -* You need to add THis To ENViroment Variable -* `set INFORMIXDIR=$HOME/Informix/home/informix/cli/onedb-odbc-driver` -* Add **`%INFORMIXDIR%\bin to PATH`** - -* Below code will clone, build and install wheel -```cmd -pip install wheel -mkdir Temp -cd Temp -git clone https://github.com/OpenInformix/IfxPy.git -cd IfxPy/IfxPy -python setup.py bdist_wheel -pip install --find-links=./dist IfxPy -cd .. -cd .. -cd .. -rmdir Temp -``` -
- -
- -> For more Info checkout [here](https://github.com/OpenInformix/IfxPy) also it has some prerequisite. - -> There are many method for Build but wheel method easy and Recommended. - diff --git a/mindsdb/integrations/handlers/informix_handler/__about__.py b/mindsdb/integrations/handlers/informix_handler/__about__.py deleted file mode 100644 index 1e060424708..00000000000 --- a/mindsdb/integrations/handlers/informix_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB IBM Informix handler" -__package_name__ = "mindsdb_informix_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for IBM Informix" -__author__ = "Parthiv Makwana" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/informix_handler/__init__.py b/mindsdb/integrations/handlers/informix_handler/__init__.py deleted file mode 100644 index 5380c182fff..00000000000 --- a/mindsdb/integrations/handlers/informix_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -try: - from .informix_handler import InformixHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = "IBM Informix" -name = "informix" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/informix_handler/icon.svg b/mindsdb/integrations/handlers/informix_handler/icon.svg deleted file mode 100644 index a4151367222..00000000000 --- a/mindsdb/integrations/handlers/informix_handler/icon.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/informix_handler/informix_handler.py b/mindsdb/integrations/handlers/informix_handler/informix_handler.py deleted file mode 100644 index 4b4e62ca1db..00000000000 --- a/mindsdb/integrations/handlers/informix_handler/informix_handler.py +++ /dev/null @@ -1,271 +0,0 @@ -from collections import OrderedDict -from typing import Optional -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -import pandas as pd -import IfxPyDbi as Ifx -from sqlalchemy_informix.ibmdb import InformixDialect - -logger = log.getLogger(__name__) - - -class InformixHandler(DatabaseHandler): - name = "informix" - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """Initialize the handler - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - self.kwargs = kwargs - self.parser = parse_sql - self.loging_enabled = connection_data["loging_enabled"] if "loging_enabled" in connection_data else True - self.server = connection_data["server"] - self.database = connection_data["database"] - self.user = connection_data["user"] - self.password = connection_data["password"] - self.schemaName = connection_data["schema_name"] - self.host = connection_data["host"] - self.port = connection_data["port"] - self.connString = ("SERVER={0};DATABASE={1};HOST={2};PORT={3};UID={4};PWD={5};").format( - self.server, self.database, self.host, self.port, self.user, self.password - ) - - self.connection = None - self.is_connected = False - - def connect(self): - """Set up any connections required by the handler - Should return output of check_connection() method after attempting - connection. Should switch self.is_connected. - Returns: - Connection Object - """ - if self.is_connected is True: - return self.connection - - try: - self.connection = Ifx.connect(self.connString, "", "") - - self.is_connected = True - except Exception as e: - logger.error(f"Error while connecting to {self.database}, {e}") - - return self.connection - - def disconnect(self): - """Close any existing connections - Should switch self.is_connected. - """ - if self.is_connected is False: - return - try: - self.connection.close() - self.is_connected = False - except Exception as e: - logger.error(f"Error while disconnecting to {self.database}, {e}") - - return - - def check_connection(self) -> StatusResponse: - """Check connection to the handler - Returns: - HandlerStatusResponse - """ - responseCode = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - responseCode.success = True - except Exception as e: - logger.error(f"Error connecting to database {self.database}, {e}!") - responseCode.error_message = str(e) - finally: - if responseCode.success is True and need_to_close: - self.disconnect() - if responseCode.success is False and self.is_connected is True: - self.is_connected = False - - return responseCode - - def native_query(self, query: str) -> StatusResponse: - """Receive raw query and act upon it somehow. - Args: - query (Any): query in native format (str for sql databases, - etc) - Returns: - HandlerResponse - """ - need_to_close = self.is_connected is False - conn = self.connect() - cur = conn.cursor() - try: - cur.execute(query) - - if cur._result_set_produced: - result = cur.fetchall() - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame(result, columns=[x[0] for x in cur.description]), - ) - else: - response = Response(RESPONSE_TYPE.OK) - if self.loging_enabled: - self.connection.commit() - except Exception as e: - logger.error(f"Error running query: {query} on {self.database}") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - if self.loging_enabled: - self.connection.rollback() - - cur.close() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - - renderer = SqlalchemyRender(InformixDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """Return list of entities - Return list of entities that will be accesible as tables. - Returns: - HandlerResponse: shoud have same columns as information_schema.tables - (https://dev.mysql.com/doc/refman/8.0/en/information-schema-tables-table.html) - Column 'TABLE_NAME' is mandatory, other is optional. - """ - self.connect() - - result = self.connection.tables() - try: - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - [x["TABLE_NAME"] for x in result if x["TABLE_SCHEM"] == self.schemaName], - columns=["TABLE_NAME"], - ), - ) - else: - response = Response(RESPONSE_TYPE.OK) - - except Exception as e: - logger.error(f"Error running while getting table {e} on ") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - return response - - def get_columns(self, table_name: str) -> StatusResponse: - """Returns a list of entity columns - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse: shoud have same columns as information_schema.columns - (https://dev.mysql.com/doc/refman/8.0/en/information-schema-columns-table.html) - Column 'COLUMN_NAME' is mandatory, other is optional. Hightly - recomended to define also 'DATA_TYPE': it should be one of - python data types (by default it str). - """ - - self.connect() - - result = self.connection.columns(table_name=table_name) - try: - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - [result[i]["COLUMN_NAME"] for i in range(len(result))], - columns=["COLUMN_NAME"], - ), - ) - else: - response = Response(RESPONSE_TYPE.OK) - - except Exception as e: - logger.error(f"Error running while getting table {e} on ") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - return response - - -connection_args = OrderedDict( - server={ - "type": ARG_TYPE.STR, - "description": """ - The server name you want to get connected. - """, - }, - database={ - "type": ARG_TYPE.STR, - "description": """ - The database name to use when connecting with the DB2 server. - """, - }, - user={ - "type": ARG_TYPE.STR, - "description": "The user name used to authenticate with the DB2 server.", - }, - password={ - "type": ARG_TYPE.STR, - "description": "The password to authenticate the user with the DB2 server.", - }, - host={ - "type": ARG_TYPE.STR, - "description": "The host name or IP address of the DB2 server/database.", - }, - port={ - "type": ARG_TYPE.INT, - "description": "Specify port to connect DB2 through TCP/IP", - }, - schema_name={ - "type": ARG_TYPE.STR, - "description": "Specify the schema name for showing tables ", - }, - logging_enabled={ - "type": ARG_TYPE.BOOL, - "description": """ - Used for COMMIT and ROLLBACK as this command works only for logging enabled database. - Note: Its optional. - Default is TRUE - """, - }, -) - -connection_args_example = OrderedDict( - server="server", - database="stores_demo", - user="informix", - password="in4mix", - host="127.0.0.1", - port="9091", - schema_name="Love", -) diff --git a/mindsdb/integrations/handlers/informix_handler/requirements.txt b/mindsdb/integrations/handlers/informix_handler/requirements.txt deleted file mode 100644 index 26886617b94..00000000000 --- a/mindsdb/integrations/handlers/informix_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -sqlalchemy-informix diff --git a/mindsdb/integrations/handlers/informix_handler/tests/__init__.py b/mindsdb/integrations/handlers/informix_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/informix_handler/tests/test_informix_handler.py b/mindsdb/integrations/handlers/informix_handler/tests/test_informix_handler.py deleted file mode 100644 index 354d808fb35..00000000000 --- a/mindsdb/integrations/handlers/informix_handler/tests/test_informix_handler.py +++ /dev/null @@ -1,54 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.informix_handler.informix_handler import ( - InformixHandler, -) -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class InformixHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "server": "server", - "host": "127.0.0.1", - "port": 9093, - "user": "informix", - "password": "in4mix", - "database": "demo", - "schema_name": "love", - "loging_enabled": False, - } - } - cls.handler = InformixHandler("test_informix_handler", cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_drop_table(self): - res = self.handler.query("DROP TABLE IF EXISTS LOVE;") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_2_create_table(self): - res = self.handler.query("CREATE TABLE IF NOT EXISTS LOVE (LOVER varchar(20));") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_3_insert(self): - res = self.handler.query("INSERT INTO LOVE VALUES('Hari');") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_4_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is RESPONSE_TYPE.TABLE - - def test_5_select_query(self): - query = "SELECT * FROM LOVE;" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_check_connection(self): - self.handler.check_connection() - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/ingres_handler/README.md b/mindsdb/integrations/handlers/ingres_handler/README.md deleted file mode 100644 index aa63da07de5..00000000000 --- a/mindsdb/integrations/handlers/ingres_handler/README.md +++ /dev/null @@ -1,69 +0,0 @@ -# Ingres Handler - -This is the implementation of the Ingres handler for MindsDB. - -## Ingres - -Ingres is an open-source relational database management system (DBMS) designed for large-scale commercial and government -applications. Actian Corporation currently oversees the development of the database while providing certified binaries -and support. It is designed to run on a wide range of platforms, including Unix, Linux, Windows, and mainframe systems, -and is known for its scalability, reliability, and security. - -## Implementation - -This handler was implemented using [pyodbc](https://pypi.org/project/pyodbc/) -and [ingres_sa_dialect](https://pypi.org/project/ingres-sa-dialect/) for the implementation of the Ingres dialect from -SQLAlchemy. - -The required arguments to establish a connection are: - -* `user`: username associated with database -* `password`: password to authenticate your access -* `server`: Server to be connected -* `database`: Database name to be connected - -The optional arguments are: - -* `servertype`: Server type to be connected *(optional)* (default: `ingres`) - -## Usage - -Install the Ingres ODBC driver for your platform. You can find the appropriate driver on the Ingres website. - -Information about connecting to Ingres 11.2 using ODBC can be -found [here](https://docs.actian.com/ingres/11.2/index.html#page/QuickStart_Linux/Connecting_to_Ingres_Using_ODBC.htm#ww306952). - -**Important**: - -Before you run the Ingres Handler you first need to execute the following commands -in order to install the newest version Ingres dialect for SQLAlchemy: - -~~~~shell -python -m pip install pyodbc sqlalchemy -cd mindsdb/integrations/handlers/ingres_handler -git clone https://github.com/ActianCorp/ingres_sa_dialect.git -cd ingres_sa_dialect -python -m pip install -e . -~~~~ - -In order to make use of this handler and connect to Ingres in MindsDB, the following syntax can be used: - -~~~~sql -CREATE -DATABASE ingres_db -WITH engine='ingres', -parameters={ - "user": "admin", - "password": "password", - "server": "myserver.example.com", - "database": "test_db" -}; -~~~~ - -Now, you can use this established connection to query your database as follows: - -~~~~sql -SELECT * -FROM test_db.test; -~~~~ - diff --git a/mindsdb/integrations/handlers/ingres_handler/__about__.py b/mindsdb/integrations/handlers/ingres_handler/__about__.py deleted file mode 100644 index 35ee01aaaac..00000000000 --- a/mindsdb/integrations/handlers/ingres_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Ingres handler' -__package_name__ = 'mindsdb_ingres_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Ingres" -__author__ = 'Panagiotis-Alexios Spanakis and Theodoros Malikourtis' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/ingres_handler/__init__.py b/mindsdb/integrations/handlers/ingres_handler/__init__.py deleted file mode 100644 index 47518f88390..00000000000 --- a/mindsdb/integrations/handlers/ingres_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .ingres_handler import IngresHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Ingres' -name = 'ingres' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'connection_args', 'connection_args_example', - 'description', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/ingres_handler/connection_args.py b/mindsdb/integrations/handlers/ingres_handler/connection_args.py deleted file mode 100644 index ef4077e5108..00000000000 --- a/mindsdb/integrations/handlers/ingres_handler/connection_args.py +++ /dev/null @@ -1,36 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Ingres server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the Ingres server.', - 'secret': True - }, - server={ - 'type': ARG_TYPE.STR, - 'description': 'The server used to authenticate with the Ingres server.' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'Specify database name to connect Ingres server' - }, - servertype={ - 'type': ARG_TYPE.STR, - 'description': 'Specify server type to connect Ingres server' - } -) - -connection_args_example = OrderedDict( - user='admin', - password='password', - server='(local)', - database='test_db', - servertype='ingres' -) diff --git a/mindsdb/integrations/handlers/ingres_handler/icon.svg b/mindsdb/integrations/handlers/ingres_handler/icon.svg deleted file mode 100644 index 71db13238a1..00000000000 --- a/mindsdb/integrations/handlers/ingres_handler/icon.svg +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/ingres_handler/ingres_handler.py b/mindsdb/integrations/handlers/ingres_handler/ingres_handler.py deleted file mode 100644 index 04aedd6b106..00000000000 --- a/mindsdb/integrations/handlers/ingres_handler/ingres_handler.py +++ /dev/null @@ -1,211 +0,0 @@ -import pyodbc - -import pandas as pd -from mindsdb_sql_parser import parse_sql -from ingres_sa_dialect.base import IngresDialect -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.integrations.libs.base import DatabaseHandler - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - - -logger = log.getLogger(__name__) - - -class IngresHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Ingres statements. - """ - - name = 'ingres' - - def __init__(self, name: str, **kwargs): - """ - Initializes a new instance of the Ingres handler. - - Args: - name (str): The name of the database. - **kwargs: parameters for connecting to the database - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = 'ingres' - self.connection_args = kwargs.get('connection_data') - self.database = self.connection_args.get('database') - self.server = self.connection_args.get('server') - self.user = self.connection_args.get('user') - self.password = self.connection_args.get('password') - self.servertype = self.connection_args.get('servertype', 'ingres') - self.connection = None - self.is_connected = False - - def __del__(self): - """ - Destructor for the Ingres class. - """ - if self.is_connected is True: - self.disconnect() - - def connect(self): - """ - Establishes a connection to the Ingres server. - Returns: - HandlerStatusResponse - """ - if self.is_connected: - return self.connection - - conn_str = f"Driver={{Ingres}};Server={self.server};Database={self.database};UID={self.user};" \ - f"PWD={self.password};ServerType={self.servertype}" - - self.connection = pyodbc.connect(conn_str) - self.is_connected = True - return self.connection - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to Ingres, {e}!') - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def disconnect(self): - """ - Closes the connection to the Ingres server. - """ - - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return self.is_connected - - def native_query(self, query: str) -> Response: - """ - Receive raw query and act upon it somehow. - Args: - query (str): SQL query to execute. - Returns: - HandlerResponse - """ - need_to_close = self.is_connected is False - - connection = self.connect() - with connection.cursor() as cursor: - try: - cursor.execute(query) - result = cursor.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame.from_records( - result, - columns=[x[0] for x in cursor.description] - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except Exception as e: - logger.error(f'Error running query: {query} on {self.connection_args["database"]}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INSERT, DELETE, etc - Returns: - HandlerResponse - """ - - renderer = SqlalchemyRender(IngresDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Gets a list of table names in the database. - - Returns: - list: A list of table names in the database. - """ - connection = self.connect() - cursor = connection.cursor() - # Execute query to get all table names - cursor.execute( - "SELECT table_name FROM iitables WHERE table_type = 'T'") - - table_names = [x[0] for x in cursor.fetchall()] - - # Create dataframe with table names - df = pd.DataFrame(table_names, columns=['table_name', 'data_type']) - - # Create response object - response = Response( - RESPONSE_TYPE.TABLE, - df - ) - - return response - - def get_columns(self, table_name: str) -> Response: - """ - Gets a list of column names in the specified table. - - Args: - table_name (str): The name of the table to get column names from. - - Returns: - list: A list of column names in the specified table. - """ - conn = self.connect() - cursor = conn.cursor() - cursor.execute("SELECT column_name FROM iicolumns WHERE table_name = '{}'".format(table_name)) - results = cursor.fetchall() - - # construct a pandas dataframe from the query results - df = pd.DataFrame( - results, - columns=['column_name', 'data_type'] - ) - - response = Response( - RESPONSE_TYPE.TABLE, - df - ) - - return response diff --git a/mindsdb/integrations/handlers/ingres_handler/requirements.txt b/mindsdb/integrations/handlers/ingres_handler/requirements.txt deleted file mode 100644 index bc123cdf05c..00000000000 --- a/mindsdb/integrations/handlers/ingres_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -pyodbc -sqlalchemy-ingres[all] diff --git a/mindsdb/integrations/handlers/ingres_handler/tests/__init__.py b/mindsdb/integrations/handlers/ingres_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/ingres_handler/tests/test_ingres_handler.py b/mindsdb/integrations/handlers/ingres_handler/tests/test_ingres_handler.py deleted file mode 100644 index 26d0a134d93..00000000000 --- a/mindsdb/integrations/handlers/ingres_handler/tests/test_ingres_handler.py +++ /dev/null @@ -1,53 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.ingres_handler.ingres_handler import IngresHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class IngresHandlerTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "user": "admin", - "password": "password", - "server": "(local)", - "database": "test_db" - } - } - cls.handler = IngresHandler('test_ingres_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_2_select_query(self): - query = "SELECT * FROM test_db.home_rentals" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_3_get_columns(self): - columns = self.handler.get_columns('test') - assert columns.type is not RESPONSE_TYPE.ERROR - - def test_4_drop_table(self): - res = self.handler.native_query("DROP TABLE IF EXISTS test_db.test") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_5_create_table(self): - res = self.handler.native_query("CREATE TABLE IF NOT EXISTS test_db.test (id INT, name VARCHAR(255))") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_6_insert(self): - res = self.handler.native_query("INSERT INTO test VALUES (100,'ONE HUNDRED')") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_7_disconnect(self): - assert self.handler.disconnect() - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/instatus_handler/README.md b/mindsdb/integrations/handlers/instatus_handler/README.md deleted file mode 100644 index f6b2a098a77..00000000000 --- a/mindsdb/integrations/handlers/instatus_handler/README.md +++ /dev/null @@ -1,207 +0,0 @@ -# Instatus Handler - -Instatus handler for MindsDB provides interfaces to connect with Instatus via APIs and pull the status pages. - -## Instatus - -Instatus is a cloud-based status page software that allows users to communicate their status using incidents and maintenances. It's a SaaS platform that helps companies create status pages for their services. - -## Instatus Handler Initialization - -The Instatus handler is initialized with the following parameters: - -- `api_key`: Instatus API key to use for authentication - -Please follow this [link](https://dashboard.instatus.com/developer) to get the api key for accessing Instatus API. - -## Implemented Features - -- [x] Instatus status pages table - - [x] Support SELECT - - [x] Support INSERT - - [x] Support UPDATE -- [x] Instatus components table - - [x] Support SELECT - - [x] Support INSERT - - [x] Support UPDATE - -## TODO -- [ ] Instatus Incidents table - - [ ] Support SELECT - - [ ] Support INSERT - - [ ] Support UPDATE -- [ ] Instatus Incidents updates table - - [ ] Support SELECT - - [ ] Support INSERT - - [ ] Support UPDATE -- [ ] Instatus Maintenances table - - [ ] Support SELECT - - [ ] Support INSERT - - [ ] Support UPDATE -- [ ] Instatus Maintenance updates table - - [ ] Support SELECT - - [ ] Support INSERT - - [ ] Support UPDATE -- [ ] Instatus Templates table - - [ ] Support SELECT - - [ ] Support INSERT - - [ ] Support UPDATE -- [ ] Instatus Teammates table - - [ ] Support SELECT - - [ ] Support INSERT - - [ ] Support UPDATE -- [ ] Instatus Subscribers table - - [ ] Support SELECT - - [ ] Support INSERT - - [ ] Support UPDATE -- [ ] Instatus Metrics table - - [ ] Support SELECT - - [ ] Support INSERT - - [ ] Support UPDATE - -## Connection - -The first step is to create a database with the new `instatus` engine. - -```sql -CREATE DATABASE mindsdb_instatus --- Display name for the database. -WITH - ENGINE = 'instatus', --- Name of the MindsDB handler. - PARAMETERS = { - "api_key": "" --- Instatus API key to use for authentication. - }; -``` -## Usage (Status pages table) - -### SELECT - -```sql -SELECT * -FROM mindsdb_instatus.status_pages; -``` - -### WHERE - -```sql -SELECT * -FROM mindsdb_instatus.status_pages -WHERE id = ''; -``` - -### INSERT - -```sql -INSERT INTO mindsdb_instatus.status_pages (email, name, subdomain, components, logoUrl, faviconUrl, websiteUrl, language, useLargeHeader, brandColor, okColor, disruptedColor, degradedColor, downColor, noticeColor, unknownColor, googleAnalytics, subscribeBySms, smsService, twilioSid, twilioToken, twilioSender, nexmoKey, nexmoSecret, nexmoSender, htmlInMeta, htmlAboveHeader, htmlBelowHeader, htmlAboveFooter, htmlBelowFooter, htmlBelowSummary, cssGlobal, launchDate, dateFormat, dateFormatShort, timeFormat) -VALUES ('yourname@gmail.com', 'mindsdb', 'mindsdb-instatus', '["Website", "App", "API"]', 'https://instatus.com/sample.png', 'https://instatus.com/favicon-32x32.png', 'https://instatus.com', 'en', true, '#111', '#33B17E', '#FF8C03', '#ECC94B', '#DC123D', '#70808F', '#DFE0E1', 'UA-00000000-1', true, 'twilio', 'YOUR_TWILIO_SID', 'YOUR_TWILIO_TOKEN', 'YOUR_TWILIO_SENDER', null, null, null, null, null, null, null, null, null, null, null, 'MMMMMM d, yyyy', 'MMM yyyy', 'p'); -``` - -Note: - -- `email` is required field (Example: 'yourname@gmail.com') -- `name` is required field (Example: 'mindsdb') -- `subdomain` is required field (Example: 'mindsdb-docs') -- `components` is required field (Example: '["Website", "App", "API"]') -- other fields are optional - -### UPDATE - -```sql -UPDATE mindsdb_instatus.status_pages -SET name = 'mindsdb', - status = 'UP', - subdomain = 'mindsdb-slack', - logoUrl = 'https://instatus.com/sample.png', - faviconUrl = 'https://instatus.com/favicon-32x32.png', - websiteUrl = 'https://instatus.com', - language = 'en', - publicEmail = 'hello@nasa.gov', - useLargeHeader = true, - brandColor = '#111', - okColor = '#33B17E', - disruptedColor = '#FF8C03', - degradedColor = '#ECC94B', - downColor = '#DC123D', - noticeColor = '#70808F', - unknownColor = '#DFE0E1', - googleAnalytics = 'UA-00000000-1', - subscribeBySms = true, - smsService = 'twilio', - twilioSid = 'YOUR_TWILIO_SID', - twilioToken = 'YOUR_TWILIO_TOKEN', - twilioSender = 'YOUR_TWILIO_SENDER', - nexmoKey = null, - nexmoSecret = null, - nexmoSender = null, - htmlInMeta = null, - htmlAboveHeader = null, - htmlBelowHeader = null, - htmlAboveFooter = null, - htmlBelowFooter = null, - htmlBelowSummary = null, - cssGlobal = null, - launchDate = null, - dateFormat = 'MMMMMM d, yyyy', - dateFormatShort = 'MMM yyyy', - timeFormat = 'p', - private = false, - useAllowList = false, - translations = '{ - "name": { - "fr": "nasa" - } - }' -WHERE id = ''; -``` - -## Usage (Components table) - -### SELECT - -```sql -SELECT * -FROM mindsdb_instatus.components -WHERE page_id = ''; -``` - -### WHERE - -```sql -SELECT * -FROM mindsdb_instatus.components -WHERE page_id = '' -AND component_id = ''; -``` - -### CREATE - -```sql -INSERT INTO mindsdb_instatus.components (page_id, name, description, status, order, showUptime, grouped, translations_name_in_fr, translations_desc_in_fr) -VALUES ( - '', - 'Test component', - 'Testing', - 'OPERATIONAL', - 6, - true, - false, - "Composant de test", - "En test" -); -``` - -### UPDATE - -```sql -UPDATE mindsdb_instatus.components -SET - name = 'Test component 4', - description = 'Test test test', - status = 'OPERATIONAL', - order = 6, - showUptime = true, - grouped = false, - translations_name_in_fr = "Composant de test 4", - translations_desc_in_fr = "Test test test" -WHERE page_id = '' -AND component_id = ''; -``` diff --git a/mindsdb/integrations/handlers/instatus_handler/__about__.py b/mindsdb/integrations/handlers/instatus_handler/__about__.py deleted file mode 100644 index 959fc1cd66a..00000000000 --- a/mindsdb/integrations/handlers/instatus_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB instatus handler" -__package_name__ = "mindsdb_instatus_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Instatus" -__author__ = "Ritwick Raj Makkhal" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/instatus_handler/__init__.py b/mindsdb/integrations/handlers/instatus_handler/__init__.py deleted file mode 100644 index 20bf89f2cc7..00000000000 --- a/mindsdb/integrations/handlers/instatus_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version - -try: - from .instatus_handler import InstatusHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Instatus" -name = "instatus" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/instatus_handler/icon.svg b/mindsdb/integrations/handlers/instatus_handler/icon.svg deleted file mode 100644 index 84351a2f961..00000000000 --- a/mindsdb/integrations/handlers/instatus_handler/icon.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/instatus_handler/instatus_handler.py b/mindsdb/integrations/handlers/instatus_handler/instatus_handler.py deleted file mode 100644 index a0117ce45b2..00000000000 --- a/mindsdb/integrations/handlers/instatus_handler/instatus_handler.py +++ /dev/null @@ -1,127 +0,0 @@ -from mindsdb.integrations.handlers.instatus_handler.instatus_tables import StatusPages, Components -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql -import requests -import pandas as pd -from collections import OrderedDict -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - -logger = log.getLogger(__name__) - - -class InstatusHandler(APIHandler): - def __init__(self, name: str, **kwargs) -> None: - """initializer method - - Args: - name (str): handler name - """ - super().__init__(name) - self._base_url = "https://api.instatus.com" - self._api_key = None - - args = kwargs.get('connection_data', {}) - if 'api_key' in args: - self._api_key = args['api_key'] - - self.connection = None - self.is_connected = False - - _tables = [ - StatusPages, - Components - ] - - for Table in _tables: - self._register_table(Table.name, Table(self)) - - def check_connection(self) -> StatusResponse: - """checking the connection - - Returns: - StatusResponse: whether the connection is still up - """ - response = StatusResponse(False) - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to Instatus API: {e}!') - response.error_message = e - - self.is_connected = response.success - return response - - def connect(self) -> StatusResponse: - # If already connected, return the existing connection - if self.is_connected and self.connection: - return self.connection - - if self._api_key: - try: - headers = {"Authorization": f"Bearer {self._api_key}"} - response = requests.get(f"{self._base_url}/v2/pages", headers=headers) - - if response.status_code == 200: - self.connection = response - self.is_connected = True - return StatusResponse(True) - else: - raise Exception(f"Error connecting to Instatus API: {response.status_code} - {response.text}") - except requests.RequestException as e: - raise Exception(f"Request to Instatus API failed: {str(e)}") - - raise Exception("API key is missing") - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - - Parameters - ---------- - query : str - query in a native format - - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) - - def call_instatus_api(self, endpoint: str, method: str = 'GET', params: dict = None, json_data: dict = {}) -> pd.DataFrame: - if not params: - params = {} - - headers = {"Authorization": f"Bearer {self._api_key}"} - url = f"{self._base_url}{endpoint}" - - if method.upper() in ('GET', 'POST', 'PUT', 'DELETE'): - headers['Content-Type'] = 'application/json' - - response = requests.request(method, url, headers=headers, params=params, json=json_data) - - if response.status_code == 200: - data = response.json() - return pd.DataFrame(data) if isinstance(data, list) else pd.DataFrame([data]) - else: - raise Exception(f"Error connecting to Instatus API: {response.status_code} - {response.text}") - - return pd.DataFrame() - - -connection_args = OrderedDict( - api_key={ - "type": ARG_TYPE.PWD, - "description": "Instatus API key to use for authentication.", - "required": True, - "label": "Api key", - }, -) - -connection_args_example = OrderedDict( - api_key="d25509b171ad79395dc2c51b099ee6d0" -) diff --git a/mindsdb/integrations/handlers/instatus_handler/instatus_tables.py b/mindsdb/integrations/handlers/instatus_handler/instatus_tables.py deleted file mode 100644 index c55b547c004..00000000000 --- a/mindsdb/integrations/handlers/instatus_handler/instatus_tables.py +++ /dev/null @@ -1,393 +0,0 @@ -from typing import List -import pandas as pd -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb_sql_parser import ast -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb_sql_parser.ast.select.constant import Constant -import json -import re - -langCodes = ["ar", "cs", "da", "de", "en", "es", "et", "fi", "fr", "hu", "id", "it", "ja", "ko", - "nl", "no", "pl", "pt", "pt-BR", "ro", "rs", "ru", "sl", "sq", "sv", "tr", "uk", - "vi", "zh", "zh-TW"] - - -class StatusPages(APITable): - - # table name in the database - name = 'status_pages' - - def select(self, query: ast.Select) -> pd.DataFrame: - """Receive query as AST (abstract syntax tree) and act upon it. - - Args: - query (ASTNode): sql query represented as AST. Usually it should be ast.Select - - Returns: - pd.DataFrame - """ - conditions = extract_comparison_conditions(query.where) - # Get page id from query - _id = None - for op, arg1, arg2 in conditions: - if arg1 == 'id' and op == '=': - _id = arg2 - else: - raise NotImplementedError - - # Get column names from query - selected_columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - # 'id' needs to selected when searching with 'id' - temp_selected_columns = selected_columns - if _id and 'id' not in selected_columns: - selected_columns = ['id'] + selected_columns - - # Get limit from query - limit = query.limit.value if query.limit else 20 - total_results = limit - - page_no = 1 # default page no - result_df = pd.DataFrame(columns=selected_columns) - - # call instatus api and get the response as pd.DataFrame - while True: - df = self.handler.call_instatus_api(endpoint='/v2/pages', params={'page': page_no, 'per_page': 100}) - if len(df) == 0 or limit <= 0: - break - else: - result_df = pd.concat([result_df, df[selected_columns]], ignore_index=True) - - page_no += 1 - limit -= len(df) - - # select columns from pandas data frame df - if result_df.empty: - result_df = pd.DataFrame(columns=selected_columns) - elif _id: - result_df = result_df[result_df['id'] == _id] - - # delete 'id' column if 'id' not present in temp_selected_columns - if 'id' not in temp_selected_columns and 'id' in selected_columns: - result_df = result_df.drop('id', axis=1) - - return result_df.head(n=total_results) - - def insert(self, query: ast.Insert) -> None: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - - Args: - query (ASTNode): sql query represented as AST. Usually it should be ast.Insert - - Returns: - None - """ - data = {} - for column, value in zip(query.columns, query.values[0]): - if isinstance(value, str): - try: - value = json.loads(value) - except json.JSONDecodeError: - if value == 'True': - value = True - elif value == 'False': - value = False - data[column.name] = value - self.handler.call_instatus_api(endpoint='/v1/pages', method='POST', json_data=data) - - def update(self, query: ast.Update) -> None: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - - Args: - query (ASTNode): sql query represented as AST. Usually it should be ast.Update - Returns: - None - """ - conditions = extract_comparison_conditions(query.where) - # Get page id from query - _id = None - for op, arg1, arg2 in conditions: - if arg1 == 'id' and op == '=': - _id = arg2 - else: - raise NotImplementedError - - data = {} - for key, value in query.update_columns.items(): - if isinstance(value, Constant): - if key == 'components': - data[key] = json.loads(value.value) - else: - data[key] = value.value - - if 'components' in data and isinstance(data['components'], str): - data['components'] = json.loads(data['components']) - - self.handler.call_instatus_api(endpoint=f'/v2/{_id}', method='PUT', json_data=data) - - def get_columns(self, ignore: List[str] = []) -> List[str]: - """columns - - Args: - ignore (List[str], optional): exclusion items. Defaults to []. - - Returns: - List[str]: available columns with `ignore` items removed from the list. - """ - return [ - "id", - "subdomain", - "name", - "workspaceId", - "logoUrl", - "faviconUrl", - "websiteUrl", - "customDomain", - "publicEmail", - "twitter", - "status", - "subscribeBySms", - "sendSmsNotifications", - "language", - "useLargeHeader", - "brandColor", - "okColor", - "disruptedColor", - "degradedColor", - "downColor", - "noticeColor", - "unknownColor", - "googleAnalytics", - "smsService", - "htmlInMeta", - "htmlAboveHeader", - "htmlBelowHeader", - "htmlAboveFooter", - "htmlBelowFooter", - "htmlBelowSummary", - "uptimeDaysDisplay", - "uptimeOutageDisplay", - "launchDate", - "cssGlobal", - "onboarded", - "createdAt", - "updatedAt" - ] - - -class Components(APITable): - - # table name in the database - name = 'components' - - def select(self, query: ast.Select) -> pd.DataFrame: - """Receive query as AST (abstract syntax tree) and act upon it. - - Args: - query (ASTNode): SQL query represented as AST. Usually it should be ast.Select - - Returns: - pd.DataFrame - """ - conditions = extract_comparison_conditions(query.where) - - if len(conditions) == 0: - raise Exception('WHERE clause is required') - - # Get page id and component id from query - pageId = None - componentId = None - for condition in conditions: - if condition[1] == 'page_id' and condition[0] == '=': - pageId = condition[2] - - if condition[1] == 'component_id' and condition[0] == '=': - componentId = condition[2] - - # Get column names from query - selected_columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - limit = query.limit.value if query.limit else None - if componentId: - # Call instatus API and get the response as pd.DataFrame - df = self.handler.call_instatus_api(endpoint=f'/v1/{pageId}/components/{componentId}') - for langCode in langCodes: - try: - df[f"translations_name_in_{langCode}"] = df["translations"].apply(lambda x: x.get("name", None)).apply(lambda x: x.get(langCode, None)) - df[f"translations_desc_in_{langCode}"] = df["translations"].apply(lambda x: x.get("description", None)).apply(lambda x: x.get(langCode, None)) - except AttributeError: - df[f"translations_name_in_{langCode}"] = None - df[f"translations_desc_in_{langCode}"] = None - df = df.drop(columns=["translations"]) - - result_df = df[selected_columns] - else: - # Call instatus API and get the response as pd.DataFrame - page_size = 100 - # Calculate the number of pages required - page_count = (limit + page_size - 1) // page_size if limit else 1 - result_df = pd.DataFrame(columns=selected_columns) - - # Call instatus API and get the response as pd.DataFrame for each page - for page in range(1, page_count + 1): - current_page_size = min(page_size, limit) if limit else page_size - - df = self.handler.call_instatus_api(endpoint=f'/v1/{pageId}/components', params={'page': page, 'per_page': current_page_size}) - # Break if no more data is available or limit is reached - if len(df) == 0 or (limit and limit <= 0) or limit == 0: - break - ''' Add translations_name_in_{langCode} and translations_desc_in_{langCode} columns to the dataframe''' - for i in range(len(df)): - for langCode in langCodes: - try: - df.at[i, f"translations_name_in_{langCode}"] = df.at[i, "translations"].get("name", {}).get(langCode, None) - df.at[i, f"translations_desc_in_{langCode}"] = df.at[i, "translations"].get("description", {}).get(langCode, None) - except AttributeError: - df.at[i, f"translations_name_in_{langCode}"] = None - df.at[i, f"translations_desc_in_{langCode}"] = None - - # Drop the 'translations' column - df = df.drop(columns=["translations"]) - # Concatenate the dataframes - result_df = pd.concat([result_df, df[selected_columns]], ignore_index=True) - - if limit: - limit -= len(df) - - return result_df - - def insert(self, query: ast.Insert) -> None: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - - Args: - query (ASTNode): sql query represented as AST. Usually it should be ast.Insert - - Returns: - None - """ - data = {'translations': { - "name": {}, - "description": {} - }} - - for column, value in zip(query.columns, query.values[0]): - if isinstance(value, Constant): - data[column.name] = json.loads(value.value) if column.name == 'translations' else value.value - elif isinstance(value, str): - try: - if re.match(r'^translations_name_in_[a-zA-Z\-]+$', column.name): - lang_code = column.name.split('_')[-1] - if lang_code not in langCodes: - raise Exception(f'Invalid language code {lang_code}') - data['translations']['name'][lang_code] = value - elif re.match(r'^translations_desc_in_[a-zA-Z\-]+$', column.name): - lang_code = column.name.split('_')[-1] - if lang_code not in langCodes: - raise Exception(f'Invalid language code {lang_code}') - data['translations']['description'][lang_code] = value - else: - data[column.name] = json.loads(value) - except json.JSONDecodeError: - data[column.name] = True if value == 'True' else (False if value == 'False' else value) - - page_id = data.pop('page_id', None) - - if page_id is not None: - self.handler.call_instatus_api(endpoint=f'/v1/{page_id}/components', method='POST', json_data=data) - - def update(self, query: ast.Update) -> None: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - - Args: - query (ASTNode): sql query represented as AST. Usually it should be ast.Update - Returns: - None - """ - conditions = extract_comparison_conditions(query.where) - # Get page id and component id from query - pageId = None - componentId = None - for condition in conditions: - if condition[1] == 'page_id' and condition[0] == '=': - pageId = condition[2] - elif condition[1] == 'component_id' and condition[0] == '=': - componentId = condition[2] - else: - raise Exception("page_id and component_id both are required") - - data = {'translations': { - "name": {}, - "description": {} - }} - for key, value in query.update_columns.items(): - if isinstance(value, Constant): - if re.match(r'^translations_name_in_[a-zA-Z\-]+$', key): - lang_code = key.split('_')[-1] - if lang_code not in langCodes: - raise Exception(f'Invalid language code {lang_code}') - data['translations']['name'][lang_code] = value.value - elif re.match(r'^translations_desc_in_[a-zA-Z\-]+$', key): - lang_code = key.split('_')[-1] - if lang_code not in langCodes: - raise Exception(f'Invalid language code {lang_code}') - data['translations']['description'][lang_code] = value.value - else: - data[key] = value.value - self.handler.call_instatus_api(endpoint=f'/v1/{pageId}/components/{componentId}', method='PUT', json_data=data) - - def get_columns(self, ignore: List[str] = []) -> List[str]: - """columns - - Args: - ignore (List[str], optional): exclusion items. Defaults to []. - - Returns: - List[str]: available columns with `ignore` items removed from the list. - """ - return [ - "id", - "name", - "nameTranslationId", - "description", - "descriptionTranslationId", - "status", - "order", - "showUptime", - "createdAt", - "updatedAt", - "archivedAt", - "siteId", - "uniqueEmail", - "oldGroup", - "groupId", - "isParent", - "isCollapsed", - "monitorId", - "nameHtml", - "nameHtmlTranslationId", - "descriptionHtml", - "descriptionHtmlTranslationId", - "isThirdParty", - "thirdPartyStatus", - "thirdPartyComponentId", - "thirdPartyComponentServiceId", - "importedFromStatuspage", - "startDate", - "group", - ] + [f'translations_name_in_{langCode}' for langCode in langCodes] + [f'translations_desc_in_{langCode}' for langCode in langCodes] diff --git a/mindsdb/integrations/handlers/intercom_handler/README.md b/mindsdb/integrations/handlers/intercom_handler/README.md deleted file mode 100644 index a5df3423d09..00000000000 --- a/mindsdb/integrations/handlers/intercom_handler/README.md +++ /dev/null @@ -1,95 +0,0 @@ -## Intercom API Handler Initialization - -The Intercom API handler can be initialized with the following parameters: - -- `access_token` - your Intercom access token - -## Implemented Features - -- [x] Articles - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support column selection - - [x] Support Insert - - [x] Support UPDATE - -## TODO: - -- [ ] Implement `ORDER BY`, `DELETE` for Articles -- [ ] Intercom Admins table (Follow: [Admins](https://developers.intercom.com/intercom-api-reference/reference#admins)) -- [ ] Intercom Companies table (Follow: [Companies](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/Companies/)) -- [ ] Intercom Contacts table (Follow: [Contacts](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/Contacts/)) -- [ ] Intercom Conversations table (Follow: [Conversations](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/Conversations/)) -- [ ] Intercom Data Attributes table (Follow: [Data Attributes](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/Data-Attributes/)) -- [ ] Intercom Data Events table (Follow: [Events](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/Data-Events/)) -- [ ] Intercom Data Export table (Follow : [Data Export](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/Data-Export/)) -- [ ] Intercom Help Center table (Follow: [Help Center](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/Help-Center/)) -- [ ] Intercom Messages table (Follow: [Messages](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/Messages/)) -- [ ] Intercom News table (Follow: [News](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/News/)) -- [ ] Intercom Notes table (Follow: [Notes](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/Notes/)) -- [ ] Intercom Subscriptions table (Follow: [Subscriptions](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/Subscription-Types/)) -- [ ] Intercom Tags table (Follow: [Tags](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/Tags/)) -- [ ] Intercom Tickets table (Follow: [Tickets](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/Tickets/)) -- [ ] Intercom Teams table (Follow: [Teams](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/Teams/)) -- [ ] Intercom Visitors table (Follow: [Visitors](https://developers.intercom.com/docs/references/rest-api/api.intercom.io/Visitors/)) - -## Usage - -To create a database with the Intercom engine, you can use SQL-like syntax: - -```sql -CREATE DATABASE myintercom -WITH - ENGINE = 'intercom', - PARAMETERS = { - "access_token" : "" - }; -``` - -### SELECT - -You can retrieve data from Intercom using a `SELECT` statement: - -```sql -SELECT * -FROM myintercom.articles; -``` - -### WHERE - -You can filter data based on specific criteria using a `WHERE` clause: - -```sql -SELECT * -FROM myintercom.articles -WHERE id = ; -``` - -### INSERT - -You can create new article in Intercom using the `INSERT` statement: - -```sql -INSERT INTO myintercom.articles (title, description, body, author_id, state, parent_id, parent_type) -VALUES ( - 'Thanks for everything', - 'Description of the Article', - 'Body of the Article', - 6840572, - 'published', - 6801839, - 'collection' -); -``` - -### UPDATE - -You can update existing records in Intercom using the `UPDATE` statement: - -```sql -UPDATE myintercom.articles -SET title = 'Christmas is here!', - body = '

New gifts in store for the jolly season

' -WHERE id = ; -``` diff --git a/mindsdb/integrations/handlers/intercom_handler/__about__.py b/mindsdb/integrations/handlers/intercom_handler/__about__.py deleted file mode 100644 index f91902498b1..00000000000 --- a/mindsdb/integrations/handlers/intercom_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB intercom handler" -__package_name__ = "mindsdb_intercom_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Intercom" -__author__ = "Ritwick Raj Makhal" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/intercom_handler/__init__.py b/mindsdb/integrations/handlers/intercom_handler/__init__.py deleted file mode 100644 index 857ede039a3..00000000000 --- a/mindsdb/integrations/handlers/intercom_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version - -try: - from .intercom_handler import IntercomHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Intercom" -name = "intercom" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/intercom_handler/icon.svg b/mindsdb/integrations/handlers/intercom_handler/icon.svg deleted file mode 100644 index 1d05c4c9fca..00000000000 --- a/mindsdb/integrations/handlers/intercom_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/intercom_handler/intercom_handler.py b/mindsdb/integrations/handlers/intercom_handler/intercom_handler.py deleted file mode 100644 index f50f5b53361..00000000000 --- a/mindsdb/integrations/handlers/intercom_handler/intercom_handler.py +++ /dev/null @@ -1,117 +0,0 @@ -from mindsdb.integrations.handlers.intercom_handler.intercom_tables import Articles -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse -from mindsdb_sql_parser import parse_sql -import requests -import pandas as pd -from collections import OrderedDict -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE -import json -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class IntercomHandler(APIHandler): - def __init__(self, name: str, **kwargs) -> None: - """initializer method - - Args: - name (str): handler name - """ - super().__init__(name) - - self.connection = None - self.is_connected = False - self._baseUrl = 'https://api.intercom.io' - args = kwargs.get('connection_data', {}) - if 'access_token' in args: - access_token = args['access_token'] - self._headers = { - "Accept": "application/json", - "Authorization": f"Bearer {access_token}" - } - self._register_table(Articles.name, Articles(self)) - - def check_connection(self) -> StatusResponse: - """checking the connection - - Returns: - StatusResponse: whether the connection is still up - """ - response = StatusResponse(False) - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to Intercom API: {e}!') - response.error_message = e - - self.is_connected = response.success - return response - - def connect(self) -> StatusResponse: - """making the connectino object - """ - if self.is_connected and self.connection: - return self.connection - - if self._headers: - try: - response = requests.get( - url=self._baseUrl, - headers=self._headers - ) - if response.status_code == 200: - self.connection = response - self.is_connected = True - return StatusResponse(True) - else: - raise Exception(f"Error connecting to Intercom API: {response.status_code} - {response.text}") - except requests.RequestException as e: - raise Exception(f"Request to Intercom API failed: {str(e)}") - - raise Exception("Access token is missing") - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - - Parameters - ---------- - query : str - query in a native format - - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) - - def call_intercom_api(self, endpoint: str, method: str = 'GET', params: dict = {}, data=None) -> pd.DataFrame: - url = f"{self._baseUrl}{endpoint}" - json_data = json.loads(data) if data else None - - response = requests.request(method.upper(), url, headers=self._headers, params=params, json=json_data) - - if response.status_code == 200: - data = response.json() - return pd.DataFrame([data]) - else: - raise requests.Response.raise_for_status(response) - - -connection_args = OrderedDict( - access_token={ - "type": ARG_TYPE.PWD, - "description": "Intercom access token to use for authentication.", - "required": True, - "label": "Access token", - }, -) - -connection_args_example = OrderedDict( - api_key="d25509b171ad79395dc2c51b099ee6d0" -) diff --git a/mindsdb/integrations/handlers/intercom_handler/intercom_tables.py b/mindsdb/integrations/handlers/intercom_handler/intercom_tables.py deleted file mode 100644 index f6ef5e0504a..00000000000 --- a/mindsdb/integrations/handlers/intercom_handler/intercom_tables.py +++ /dev/null @@ -1,152 +0,0 @@ -from typing import List -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb.integrations.libs.api_handler import APIHandler, APITable -from mindsdb_sql_parser import ast -import pandas as pd -from mindsdb_sql_parser.ast.select.constant import Constant -import json - - -class Articles(APITable): - name: str = "articles" - - def __init__(self, handler: APIHandler): - super().__init__(handler) - - def select(self, query: ast.Select) -> pd.DataFrame: - """triggered at the SELECT query - - Args: - query (ast.Select): user's entered query - - Returns: - pd.DataFrame: the queried information - """ - _id = None - selected_columns = [] - - # Get id from where clause, if available - conditions = extract_comparison_conditions(query.where) - for op, arg1, arg2 in conditions: - if arg1 == 'id' and op == '=': - _id = arg2 - else: - raise ValueError("Unsupported condition in WHERE clause") - - # Get selected columns from query - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - # Initialize the result DataFrame - result_df = None - - if _id is not None: - # Fetch data using the provided endpoint for the specific id - df = self.handler.call_intercom_api(endpoint=f'/articles/{_id}') - - if len(df) > 0: - result_df = df[selected_columns] - else: - # Fetch data without specifying an id - page_size = 100 # The page size you want to use for API requests - limit = query.limit.value if query.limit else None - result_df = pd.DataFrame(columns=selected_columns) - - if limit: - # Calculate the number of pages required - page_count = (limit + page_size - 1) // page_size - else: - page_count = 1 - - for page in range(1, page_count + 1): - if limit == 0: - break - if limit: - # Calculate the page size for this request - current_page_size = min(page_size, limit) - else: - current_page_size = page_size - - df = pd.DataFrame(self.handler.call_intercom_api(endpoint='/articles', params={'page': page, 'per_page': current_page_size})['data'][0]) - if len(df) == 0: - break - result_df = pd.concat([result_df, df[selected_columns]], ignore_index=True) - if limit: - limit -= current_page_size - return result_df - - def insert(self, query: ast.Insert) -> None: - """insert - - Args: - query (ast.Insert): user's entered query - - Returns: - None - """ - data = {} - for column, value in zip(query.columns, query.values[0]): - if isinstance(value, Constant): - data[column.name] = value.value - else: - data[column.name] = value - self.handler.call_intercom_api(endpoint='/articles', method='POST', data=json.dumps(data)) - - def update(self, query: ast.Update) -> None: - """update - - Args: - query (ast.Update): user's entered query - - Returns: - None - """ - conditions = extract_comparison_conditions(query.where) - # Get page id from query - _id = None - for op, arg1, arg2 in conditions: - if arg1 == 'id' and op == '=': - _id = arg2 - else: - raise NotImplementedError - - data = {} - for key, value in query.update_columns.items(): - if isinstance(value, Constant): - data[key] = value.value - else: - data[key] = value - self.handler.call_intercom_api(endpoint=f'/articles/{_id}', method='PUT', data=json.dumps(data)) - - def get_columns(self, ignore: List[str] = []) -> List[str]: - """columns - - Args: - ignore (List[str], optional): exclusion items. Defaults to []. - - Returns: - List[str]: available columns with `ignore` items removed from the list. - """ - return [ - "type", - "id", - "workspace_id", - "title", - "description", - "body", - "author_id", - "state", - "created_at", - "updated_at", - "url", - "parent_id", - "parent_ids", - "parent_type", - "statistics" - ] diff --git a/mindsdb/integrations/handlers/jira_handler/README.md b/mindsdb/integrations/handlers/jira_handler/README.md deleted file mode 100644 index 99820fcb842..00000000000 --- a/mindsdb/integrations/handlers/jira_handler/README.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: Jira -sidebarTitle: Jira ---- - -This documentation describes the integration of MindsDB with [Jira](https://www.atlassian.com/software/jira/guides/getting-started/introduction), the #1 agile project management tool used by teams to plan, track, release and support world-class software with confidence. -The integration allows MindsDB to access data from Jira and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect Salesforce to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to Jira from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/jira_handler) as an engine. - -```sql -CREATE DATABASE jira_datasource -WITH - ENGINE = 'jira', - PARAMETERS = { - "url": "https://example.atlassian.net", - "username": "john.doe@example.com", - "api_token": "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" - }; -``` - -Required connection parameters include the following: - -* `url`: The base URL for your Jira instance/server. -* `username`: The email address associated with your Jira account. -* `api_token`: The API token generated for your Jira account. -* `cloud`: (Optional) Set to `true` for Jira Cloud or `false` for Jira Server. Defaults to `true`. - - -Refer this [guide](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/) for instructions on how to create API tokens for your account. - - -## Usage - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM jira_datasource.table_name -LIMIT 10; -``` - - -The above example utilize `jira_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/jira_handler/__about__.py b/mindsdb/integrations/handlers/jira_handler/__about__.py deleted file mode 100644 index acf2f75edae..00000000000 --- a/mindsdb/integrations/handlers/jira_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Jira handler' -__package_name__ = 'mindsdb_jira_handler' -__version__ = '0.0.2' -__description__ = "MindsDB handler for Jira" -__author__ = 'Balaji Seetharaman' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/jira_handler/__init__.py b/mindsdb/integrations/handlers/jira_handler/__init__.py deleted file mode 100644 index 3d6dde83dbd..00000000000 --- a/mindsdb/integrations/handlers/jira_handler/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL -from .__about__ import __version__ as version, __description__ as description - -try: - from .jira_handler import JiraHandler as Handler - - import_error = None - -except Exception as e: - Handler = None - import_error = e - - -title = "Atlassian Jira" -name = "jira" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY - -__all__ = [ - "Handler", - "version", - "name", - "type", - "support_level", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/jira_handler/icon.svg b/mindsdb/integrations/handlers/jira_handler/icon.svg deleted file mode 100644 index cac6bcdb9f7..00000000000 --- a/mindsdb/integrations/handlers/jira_handler/icon.svg +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/jira_handler/jira_handler.py b/mindsdb/integrations/handlers/jira_handler/jira_handler.py deleted file mode 100644 index 4964cfd9a0f..00000000000 --- a/mindsdb/integrations/handlers/jira_handler/jira_handler.py +++ /dev/null @@ -1,142 +0,0 @@ -from typing import Any, Dict - -from atlassian import Jira -from requests.exceptions import HTTPError - -from mindsdb.integrations.handlers.jira_handler.jira_tables import ( - JiraProjectsTable, - JiraIssuesTable, - JiraUsersTable, - JiraGroupsTable, -) -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, - HandlerStatusResponse as StatusResponse, - RESPONSE_TYPE, -) -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class JiraHandler(APIHandler): - """ - This handler handles the connection and execution of SQL statements on Jira. - """ - - def __init__(self, name: str, connection_data: Dict, **kwargs: Any) -> None: - """ - Initializes the handler. - - Args: - name (Text): The name of the handler instance. - connection_data (Dict): The connection data required to connect to the Jira API. - kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - self._register_table("projects", JiraProjectsTable(self)) - self._register_table("issues", JiraIssuesTable(self)) - self._register_table("groups", JiraGroupsTable(self)) - self._register_table("users", JiraUsersTable(self)) - - def connect(self) -> Jira: - """ - Establishes a connection to the Jira API. - - Raises: - ValueError: If the required connection parameters are not provided. - AuthenticationError: If an authentication error occurs while connecting to the Salesforce API. - - Returns: - atlassian.jira.Jira: A connection object to the Jira API. - """ - if self.is_connected is True: - return self.connection - - is_cloud = self.connection_data.get("cloud", True) - - if is_cloud: - # Jira Cloud supports API token authentication. - if not all(key in self.connection_data for key in ["username", "api_token", "url"]): - raise ValueError("Required parameters (username, api_token, url) must be provided.") - - config = { - "username": self.connection_data["username"], - "password": self.connection_data["api_token"], - "url": self.connection_data["url"], - "cloud": is_cloud, - } - else: - # Jira Server supports personal access token authentication or open access. - if "url" not in self.connection_data: - raise ValueError("Required parameter 'url' must be provided.") - - config = {"url": self.connection_data["url"], "cloud": False} - - if "personal_access_token" in self.connection_data: - config["session"] = {"Authorization": f"Bearer {self.connection_data['personal_access_token']}"} - - try: - self.connection = Jira(**config) - self.is_connected = True - return self.connection - except Exception as unknown_error: - logger.error(f"Unknown error connecting to Jira, {unknown_error}!") - raise - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the Salesforce API. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - - try: - connection = self.connect() - connection.myself() - response.success = True - except (HTTPError, ValueError) as known_error: - logger.error(f"Connection check to Jira failed, {known_error}!") - response.error_message = str(known_error) - except Exception as unknown_error: - logger.error(f"Connection check to Jira failed due to an unknown error, {unknown_error}!") - response.error_message = str(unknown_error) - - self.is_connected = response.success - - return response - - def native_query(self, query: str) -> Response: - """ - Executes a native JQL query on Jira and returns the result. - - Args: - query (Text): The JQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - connection = self.connect() - - try: - results = connection.jql(query) - df = JiraIssuesTable(self).normalize(results["issues"]) - response = Response(RESPONSE_TYPE.TABLE, df) - except HTTPError as http_error: - logger.error(f"Error running query: {query} on Jira, {http_error}!") - response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(http_error)) - except Exception as unknown_error: - logger.error(f"Error running query: {query} on Jira, {unknown_error}!") - response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(unknown_error)) - - return response diff --git a/mindsdb/integrations/handlers/jira_handler/jira_tables.py b/mindsdb/integrations/handlers/jira_handler/jira_tables.py deleted file mode 100644 index cfff87ce38c..00000000000 --- a/mindsdb/integrations/handlers/jira_handler/jira_tables.py +++ /dev/null @@ -1,243 +0,0 @@ -from typing import List, Optional - -from atlassian import Jira -import pandas as pd - -from mindsdb.integrations.libs.api_handler import APIResource -from mindsdb.integrations.utilities.sql_utils import FilterCondition, SortColumn, FilterOperator -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class JiraTableBase(APIResource): - """Base class for Jira tables""" - - def to_dataframe(self, records: Optional[List[dict]]) -> pd.DataFrame: - """ - Convert records to DataFrame with fixed columns, handling missing optional fields. - - Args: - records: List of record dictionaries from Jira API, or None/empty list - - Returns: - DataFrame with all expected columns, missing fields filled with None - """ - if records: - df = pd.DataFrame(records) - df = df.reindex(columns=self.get_columns(), fill_value=None) - else: - df = pd.DataFrame([], columns=self.get_columns()) - return df - - -class JiraProjectsTable(JiraTableBase): - def list( - self, - conditions: Optional[List[FilterCondition]] = None, - limit: Optional[int] = None, - sort: Optional[List[SortColumn]] = None, - targets: Optional[List[str]] = None, - **kwargs, - ) -> pd.DataFrame: - client: Jira = self.handler.connect() - - projects = [] - conditions = conditions or [] - for condition in conditions: - if condition.column in ("id", "key"): - if condition.op == FilterOperator.EQUAL: - projects = [client.get_project(condition.value)] - elif condition.op == FilterOperator.IN: - projects = [client.get_project(project_id) for project_id in condition.value] - else: - raise ValueError(f"Unsupported operator {condition.op} for column {condition.column}.") - condition.applied = True - - if not projects: - projects = client.get_all_projects() - - return self.to_dataframe(projects) - - def get_columns(self) -> List[str]: - return [ - "id", - "key", - "name", - "projectTypeKey", - "simplified", - "style", - "isPrivate", - "entityId", - "uuid", - ] - - -class JiraIssuesTable(JiraTableBase): - def list( - self, - conditions: Optional[List[FilterCondition]] = None, - limit: Optional[int] = None, - sort: Optional[List[SortColumn]] = None, - targets: Optional[List[str]] = None, - **kwargs, - ) -> pd.DataFrame: - client: Jira = self.handler.connect() - - issues = [] - conditions = conditions or [] - for condition in conditions: - if condition.column in ("id", "key"): - if condition.op == FilterOperator.EQUAL: - issues = [client.get_issue(condition.value)] - elif condition.op == FilterOperator.IN: - issues = [client.get_issue(issue_id) for issue_id in condition.value] - else: - raise ValueError(f"Unsupported operator {condition.op} for column {condition.column}.") - condition.applied = True - - elif condition.column in ("project_id", "project_key", "project_name"): - if condition.op == FilterOperator.EQUAL: - issues = client.get_all_project_issues(condition.value, limit=limit) - elif condition.op == FilterOperator.IN: - for project_id in condition.value: - issues.extend(client.get_all_project_issues(project_id, limit=limit)) - - condition.applied = True - - if not issues: - project_ids = [project["id"] for project in client.get_all_projects()] - for project_id in project_ids: - issues.extend( - self._get_project_issues_with_limit(client, project_id, limit=limit, current_issues=issues) - ) - - if issues: - return self.normalize(issues) - else: - return self.to_dataframe(issues) - - def _get_project_issues_with_limit( - self, - client: Jira, - project_id: str, - limit: Optional[int] = None, - current_issues: Optional[List] = None, - ): - """ - Helper to get issues from a project, respecting the limit. - """ - if current_issues is None: - current_issues = [] - if limit: - remaining = limit - len(current_issues) - if remaining <= 0: - return [] - return client.get_all_project_issues(project_id, limit=remaining) - else: - return client.get_all_project_issues(project_id) - - def normalize(self, issues: dict) -> pd.DataFrame: - issues_df = pd.json_normalize(issues) - # Use errors='ignore' to skip columns that don't exist in the data - issues_df.rename( - columns={ - "fields.project.id": "project_id", - "fields.project.key": "project_key", - "fields.project.name": "project_name", - "fields.summary": "summary", - "fields.priority.name": "priority", - "fields.creator.displayName": "creator", - "fields.assignee.displayName": "assignee", - "fields.status.name": "status", - }, - inplace=True, - errors="ignore", - ) - issues_df = issues_df.reindex(columns=self.get_columns(), fill_value=None) - - return issues_df - - def get_columns(self) -> List[str]: - return [ - "id", - "key", - "project_id", - "project_key", - "project_name", - "summary", - "priority", - "creator", - "assignee", - "status", - ] - - -class JiraGroupsTable(JiraTableBase): - def list( - self, - conditions: Optional[List[FilterCondition]] = None, - limit: Optional[int] = None, - sort: Optional[List[SortColumn]] = None, - targets: Optional[List[str]] = None, - **kwargs, - ) -> pd.DataFrame: - client: Jira = self.handler.connect() - - if limit: - groups = client.get_groups(limit=limit)["groups"] - else: - groups = client.get_groups()["groups"] - - return self.to_dataframe(groups) - - def get_columns(self) -> List[str]: - return [ - "groupId", - "name", - "html", - ] - - -class JiraUsersTable(JiraTableBase): - def list( - self, - conditions: Optional[List[FilterCondition]] = None, - limit: Optional[int] = None, - sort: Optional[List[SortColumn]] = None, - targets: Optional[List[str]] = None, - **kwargs, - ) -> pd.DataFrame: - client: Jira = self.handler.connect() - - users = [] - conditions = conditions or [] - for condition in conditions: - if condition.column == "accountId": - if condition.op == FilterOperator.EQUAL: - users = [client.user(account_id=condition.value)] - elif condition.op == FilterOperator.IN: - users = [client.user(account_id=accountId) for accountId in condition.value] - else: - raise ValueError(f"Unsupported operator {condition.op} for column {condition.column}.") - condition.applied = True - - if not users: - if limit: - users = client.users_get_all(limit=limit) - else: - users = client.users_get_all() - - return self.to_dataframe(users) - - def get_columns(self) -> List[str]: - return [ - "accountId", - "accountType", - "emailAddress", - "displayName", - "active", - "timeZone", - "locale", - ] diff --git a/mindsdb/integrations/handlers/jira_handler/requirements.txt b/mindsdb/integrations/handlers/jira_handler/requirements.txt deleted file mode 100644 index b24be946842..00000000000 --- a/mindsdb/integrations/handlers/jira_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -atlassian-python-api \ No newline at end of file diff --git a/mindsdb/integrations/handlers/kinetica_handler/README.md b/mindsdb/integrations/handlers/kinetica_handler/README.md deleted file mode 100644 index d919c7be62a..00000000000 --- a/mindsdb/integrations/handlers/kinetica_handler/README.md +++ /dev/null @@ -1,37 +0,0 @@ -## Implementation - -This handler is implemented by extending the PostgresHandler. - -The required arguments to establish a connection are as follows: - -- `user` is the database user. -- `password` is the database password. -- `host` is the host IP address or URL. -- `port` is the port used to make TCP/IP connection. -- `database` is the database name. - -There are several optional arguments that can be used as well. - -- `sslmode` ssl modes (`disable, allow, prefer, require, verify-ca, verify-full`). - -## Usage - -In order to make use of this handler and connect to the MariaDB database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE kinetica -WITH ENGINE = "kinetica", -PARAMETERS = { - "user": "xxxxxx", - "password": "xxxxx", - "host": "abc.abc.abc.com", - "port": 5432, - "database": "test" - }; -``` - -You can use this established connection to query your table as follows. - -```sql -SELECT * FROM kinetica.home_rentals_new; -``` diff --git a/mindsdb/integrations/handlers/kinetica_handler/__about__.py b/mindsdb/integrations/handlers/kinetica_handler/__about__.py deleted file mode 100644 index 6e3090f1701..00000000000 --- a/mindsdb/integrations/handlers/kinetica_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Kinetica handler' -__package_name__ = 'mindsdb_kinetica_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Kinetica" -__author__ = 'Biswadip Paul' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/kinetica_handler/__init__.py b/mindsdb/integrations/handlers/kinetica_handler/__init__.py deleted file mode 100644 index 38f08d512b9..00000000000 --- a/mindsdb/integrations/handlers/kinetica_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .kinetica_handler import KineticaHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - - -title = 'Kinetica' -name = 'kinetica' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/kinetica_handler/connection_args.py b/mindsdb/integrations/handlers/kinetica_handler/connection_args.py deleted file mode 100644 index 1e040bd173f..00000000000 --- a/mindsdb/integrations/handlers/kinetica_handler/connection_args.py +++ /dev/null @@ -1,54 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Kinetica server.', - 'required': True, - 'label': 'User', - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the Kinetica server.', - 'required': True, - 'label': 'Password', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the Kinetica server.', - 'required': True, - 'label': 'Database', - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Kinetica server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.', - 'required': True, - 'label': 'Host', - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the Kinetica server. Must be an integer.', - 'required': True, - 'label': 'Port', - }, - schema={ - 'type': ARG_TYPE.STR, - 'description': 'The schema in which objects are searched first.', - 'required': False, - 'label': 'Schema', - }, - sslmode={ - 'type': ARG_TYPE.STR, - 'description': 'sslmode that will be used for connection.', - 'required': False, - 'label': 'sslmode', - }, -) - -connection_args_example = OrderedDict( - host='127.0.0.1', port=5432, user='root', password='password', database='database' -) diff --git a/mindsdb/integrations/handlers/kinetica_handler/icon.svg b/mindsdb/integrations/handlers/kinetica_handler/icon.svg deleted file mode 100644 index b8fa4ae9527..00000000000 --- a/mindsdb/integrations/handlers/kinetica_handler/icon.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/kinetica_handler/kinetica_handler.py b/mindsdb/integrations/handlers/kinetica_handler/kinetica_handler.py deleted file mode 100644 index e0d77a091e7..00000000000 --- a/mindsdb/integrations/handlers/kinetica_handler/kinetica_handler.py +++ /dev/null @@ -1,12 +0,0 @@ -from mindsdb.integrations.handlers.postgres_handler import Handler as PostgresHandler - - -class KineticaHandler(PostgresHandler): - """ - This handler handles connection and execution of the Kinetica statements. - """ - - name = 'kinetica' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/lancedb_handler/README.md b/mindsdb/integrations/handlers/lancedb_handler/README.md deleted file mode 100644 index bb86e31bf06..00000000000 --- a/mindsdb/integrations/handlers/lancedb_handler/README.md +++ /dev/null @@ -1,60 +0,0 @@ -# LanceDB Handler - -This is the implementation of the LanceDB for MindsDB. - -## LanceDB - -LanceDB is an open-source database for vector-search built with persistent storage, which greatly simplifies retrieval, filtering and management of embeddings. LanceDB is the first and only vector database that supports full reproducibility natively. Taking advantage of Lance columnar format. Refer this notebook https://github.com/lancedb/lancedb/blob/main/docs/src/notebooks/reproducibility.ipynb - -## Implementation - -This handler uses `lancedb` python library connect to a LanceDB instance. - -The required arguments to establish a connection are: - -* `persist_directory`: The uri of the LanceDB database. Usually a local path. -* `api_key`: If presented, connect to LanceDB cloud. Otherwise, connect to a database on file system or cloud storage. -* `region`: The region to use for LanceDB Cloud. -* `host_override`: The override url for LanceDB Cloud. - -Refer https://lancedb.github.io/lancedb/python/python/ - -## Usage - -In order to make use of this handler and connect to a local LanceDB instance in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE lancedb -WITH ENGINE = "lancedb", -PARAMETERS = { - "persist_directory" : "~/lancedb" -}; -``` - -You can insert data into a new collection like so - -```sql -CREATE TABLE lancedb.test_data15 -(SELECT * FROM myexample_db.lance_test_data); -``` - -You can query a collection within your LanceDB as follows: - -```sql -select * from lancedb.test_data15; -``` - -filter - -```sql -select * from lancedb.test_data15 -where id = '1'; -``` - -search for similar embeddings - -```sql -select * from lancedb.test_data15 -where search_vector = '[1.5, 1.5]' -; -``` diff --git a/mindsdb/integrations/handlers/lancedb_handler/__about__.py b/mindsdb/integrations/handlers/lancedb_handler/__about__.py deleted file mode 100644 index 27a6715df44..00000000000 --- a/mindsdb/integrations/handlers/lancedb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB LandeDB handler" -__package_name__ = "mindsdb_lancedb_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for LanceDB" -__author__ = "Biswadip Paul" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/lancedb_handler/__init__.py b/mindsdb/integrations/handlers/lancedb_handler/__init__.py deleted file mode 100644 index bd58ff573b4..00000000000 --- a/mindsdb/integrations/handlers/lancedb_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version -from .connection_args import connection_args, connection_args_example -try: - from .lancedb_handler import LanceDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "LanceDB" -name = "lancedb" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/lancedb_handler/connection_args.py b/mindsdb/integrations/handlers/lancedb_handler/connection_args.py deleted file mode 100644 index 092def7a0f9..00000000000 --- a/mindsdb/integrations/handlers/lancedb_handler/connection_args.py +++ /dev/null @@ -1,35 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - persist_directory={ - "type": ARG_TYPE.STR, - "description": "The uri of the database.", - "required": True, - }, - api_key={ - "type": ARG_TYPE.STR, - "description": "If presented, connect to LanceDB cloud. Otherwise, connect to a database on file system or cloud storage.", - "required": False, - "secret": True - }, - region={ - "type": ARG_TYPE.STR, - "description": "The region to use for LanceDB Cloud.", - "required": False, - }, - host_override={ - "type": ARG_TYPE.STR, - "description": "The override url for LanceDB Cloud.", - "required": False, - }, -) - -connection_args_example = OrderedDict( - persist_directory="~/lancedb", - api_key=None, - region="us-west-2", - host_override=None, -) diff --git a/mindsdb/integrations/handlers/lancedb_handler/icon.svg b/mindsdb/integrations/handlers/lancedb_handler/icon.svg deleted file mode 100644 index 4876c72e1e8..00000000000 --- a/mindsdb/integrations/handlers/lancedb_handler/icon.svg +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/lancedb_handler/lancedb_handler.py b/mindsdb/integrations/handlers/lancedb_handler/lancedb_handler.py deleted file mode 100644 index 9a5df45fb28..00000000000 --- a/mindsdb/integrations/handlers/lancedb_handler/lancedb_handler.py +++ /dev/null @@ -1,329 +0,0 @@ -from typing import List, Optional - -import lancedb -import pandas as pd -import pyarrow as pa -from lance.vector import vec_to_table -import duckdb -import json - -from mindsdb.integrations.libs.response import RESPONSE_TYPE -from mindsdb.integrations.libs.response import HandlerResponse -from mindsdb.integrations.libs.response import HandlerResponse as Response -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse -from mindsdb.integrations.libs.vectordatabase_handler import ( - FilterCondition, - FilterOperator, - TableField, - VectorStoreHandler, -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class LanceDBHandler(VectorStoreHandler): - """This handler handles connection and execution of the LanceDB statements.""" - - name = "lancedb" - - def __init__(self, name: str, **kwargs): - super().__init__(name) - self._connection_data = kwargs.get("connection_data") - - self._client_config = { - "uri": self._connection_data.get("persist_directory"), - "api_key": self._connection_data.get("api_key", None), - "region": self._connection_data.get("region"), - "host_override": self._connection_data.get("host_override"), - } - - # uri is required either for LanceDB Cloud or local - if not self._client_config["uri"]: - raise Exception( - "persist_directory is required for LanceDB connection!" - ) - # uri, api_key and region is required either for LanceDB Cloud - elif self._client_config["uri"] and self._client_config["api_key"] and not self._client_config["region"]: - raise Exception( - "region is required for LanceDB Cloud connection!" - ) - - self._client = None - self.is_connected = False - self.connect() - - def _get_client(self): - client_config = self._client_config - if client_config is None: - raise Exception("Client config is not set!") - return lancedb.connect(**client_config) - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self): - """Connect to a LanceDB database.""" - if self.is_connected is True: - return - try: - self._client = self._get_client() - self.is_connected = True - except Exception as e: - logger.error(f"Error connecting to LanceDB client, {e}!") - self.is_connected = False - - def disconnect(self): - """Close the database connection.""" - if self.is_connected is False: - return - self._client = None - self.is_connected = False - - def check_connection(self): - """Check the connection to the LanceDB database.""" - response_code = StatusResponse(False) - need_to_close = self.is_connected is False - try: - self._client.table_names() - response_code.success = True - except Exception as e: - logger.error(f"Error connecting to LanceDB , {e}!") - response_code.error_message = str(e) - finally: - if response_code.success is True and need_to_close: - self.disconnect() - if response_code.success is False and self.is_connected is True: - self.is_connected = False - - return response_code - - def _get_lancedb_operator(self, operator: FilterOperator) -> str: - # The in values are not returned with () and only one element is returned. Bug - mapping = { - FilterOperator.EQUAL: "=", - FilterOperator.NOT_EQUAL: "!=", - FilterOperator.LESS_THAN: "<", - FilterOperator.LESS_THAN_OR_EQUAL: "<=", - FilterOperator.GREATER_THAN: ">", - FilterOperator.GREATER_THAN_OR_EQUAL: ">=", - FilterOperator.IN: "in", - FilterOperator.NOT_IN: "not in", - FilterOperator.LIKE: "like", - FilterOperator.NOT_LIKE: "not like", - FilterOperator.IS_NULL: "is null", - FilterOperator.IS_NOT_NULL: "is not null", - } - - if operator not in mapping: - raise Exception(f"Operator {operator} is not supported by LanceDB!") - - return mapping[operator] - - def _translate_condition( - self, conditions: List[FilterCondition] - ) -> Optional[dict]: - """ - Translate a list of FilterCondition objects to string that can be used by LanceDB. - E.g., - [ - FilterCondition( - column="content", - op=FilterOperator.NOT_EQUAL, - value="a", - ), - FilterCondition( - column="id", - op=FilterOperator.EQUAL, - value="6", - ) - ] - --> - "content != 'a' and id = '6'" - """ - # we ignore all non-metadata conditions - if not conditions: - return - filtered_conditions = [ - condition - for condition in conditions - if condition.column.startswith(TableField.ID.value) or condition.column.startswith(TableField.CONTENT.value) - ] - - if len(filtered_conditions) == 0: - return None - - # generate the LanceDB filter string - lancedb_conditions = [] - for condition in filtered_conditions: - if isinstance(condition.value, str): - condition.value = f"'{condition.value}'" - condition_key = condition.column.split(".")[-1] - - value = condition.value - if condition.op in (FilterOperator.IN, FilterOperator.NOT_IN): - if not isinstance(condition.value, list): - value = [value] - value = '({})'.format(', '.join([repr(i) for i in value])) - else: - value = str(value) - lancedb_conditions.append( - ' '.join([condition_key, self._get_lancedb_operator(condition.op), value]) - ) - # Combine all conditions into a single string and return - return " and ".join(lancedb_conditions) if lancedb_conditions else None - - def select( - self, - table_name: str, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - ) -> pd.DataFrame: - - collection = self._client.open_table(table_name) - - filters = self._translate_condition(conditions) - # check if embedding vector filter is present - vector_filter = ( - [] - if conditions is None - else [ - condition - for condition in conditions - if condition.column == TableField.SEARCH_VECTOR.value - ] - ) - - if len(vector_filter) > 0: - vector_filter = vector_filter[0] - else: - vector_filter = None - - if vector_filter is not None: - vec = json.loads(vector_filter.value) if isinstance(vector_filter.value, str) else vector_filter.value - result = collection.search(vec).select(columns).to_pandas() - result = result.rename(columns={"_distance": TableField.DISTANCE.value}) - else: - result = self._client.open_table(table_name).to_pandas() - - new_columns = columns + [TableField.DISTANCE.value] if TableField.DISTANCE.value in result.columns else columns - - col_str = ', '.join([col for col in new_columns if col in (TableField.ID.value, TableField.CONTENT.value, TableField.METADATA.value, TableField.EMBEDDINGS.value, TableField.DISTANCE.value)]) - - where_str = f'where {filters}' if filters else '' - # implementing limit and offset. Not supported natively in lancedb - if limit and offset: - sql = f"""select {col_str} from result {where_str} limit {limit} offset {offset}""" - elif limit and not offset: - sql = f"""select {col_str} from result {where_str} limit {limit}""" - elif offset and not limit: - sql = f"""select {col_str} from result {where_str} offset {offset}""" - else: - sql = f"""select {col_str} from result {where_str}""" - - data_df = duckdb.query(sql).to_df() - return data_df - - def insert( - self, table_name: str, data: pd.DataFrame, columns: List[str] = None - ): - """ - Insert data into the LanceDB database. - In case of create table statements the there is a mismatch between the column types of the `data` pandas dataframe filled with data - and the empty base table column types which raises a pa.lib.ArrowNotImplementedError, in that case the base table is deleted (doesn't matter as it is empty) - and recreated with the right datatypes - """ - - if TableField.METADATA.value not in data.columns: - data[TableField.METADATA.value] = None - - df = data[ - [TableField.ID.value, TableField.CONTENT.value, TableField.METADATA.value, TableField.EMBEDDINGS.value] - ] - - try: - collection = self._client.open_table(table_name) - pa_data = pa.Table.from_pandas(df, preserve_index=False) - vec_data = vec_to_table(df[TableField.EMBEDDINGS.value].values.tolist()) - new_pa_data = pa_data.append_column("vector", vec_data["vector"]) - collection.add(new_pa_data) - except pa.lib.ArrowNotImplementedError: - collection_df = collection.to_pandas() - column_dtypes = collection_df.dtypes - df = df.astype(column_dtypes) - new_df = pd.concat([collection_df, df]) - new_df['id'] = new_df['id'].apply(str) - pa_data = pa.Table.from_pandas(new_df, preserve_index=False) - vec_data = vec_to_table(df[TableField.EMBEDDINGS.value].values.tolist()) - new_pa_data = pa_data.append_column("vector", vec_data["vector"]) - self.drop_table(table_name) - self._client.create_table(table_name, new_pa_data) - - def update( - self, table_name: str, data: pd.DataFrame, columns: List[str] = None - ): - """ - Update data in the LanceDB database. - TODO: not implemented yet - """ - return super().update(table_name, data, columns) - - def delete( - self, table_name: str, conditions: List[FilterCondition] = None - ): - filters = self._translate_condition(conditions) - if filters is None: - raise Exception("Delete query must have at least one condition!") - collection = self._client.open_table(table_name) - collection.delete(filters) - - def create_table(self, table_name: str, if_not_exists=True): - """ - Create a collection with the given name in the LanceDB database. - """ - - data = { - TableField.ID.value: str, - TableField.CONTENT.value: str, - TableField.METADATA.value: object, - TableField.EMBEDDINGS.value: object, - } - df = pd.DataFrame(columns=data.keys()).astype(data) - self._client.create_table(table_name, df) - - def drop_table(self, table_name: str, if_exists=True): - """ - Delete a collection from the LanceDB database. - """ - try: - self._client.drop_table(table_name) - except ValueError as e: - if not if_exists: - raise e - - def get_tables(self) -> HandlerResponse: - """ - Get the list of collections in the LanceDB database. - """ - collections = self._client.table_names() - collections_name = pd.DataFrame( - columns=["table_name"], - data=collections, - ) - return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=collections_name) - - def get_columns(self, table_name: str) -> HandlerResponse: - # check if collection exists - try: - df = self._client.open_table(table_name).to_pandas() - column_df = pd.DataFrame(df.dtypes).reset_index() - column_df.columns = ['column_name', 'data_type'] - except ValueError: - return Response( - resp_type=RESPONSE_TYPE.ERROR, - error_message=f"Table {table_name} does not exist!", - ) - return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=column_df) diff --git a/mindsdb/integrations/handlers/lancedb_handler/requirements.txt b/mindsdb/integrations/handlers/lancedb_handler/requirements.txt deleted file mode 100644 index 8645dd24dcd..00000000000 --- a/mindsdb/integrations/handlers/lancedb_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -lancedb~=0.3.1 -lance diff --git a/mindsdb/integrations/handlers/lancedb_handler/tests/__init__.py b/mindsdb/integrations/handlers/lancedb_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/lancedb_handler/tests/test_lancedb_handler.py b/mindsdb/integrations/handlers/lancedb_handler/tests/test_lancedb_handler.py deleted file mode 100644 index 454d4858c4c..00000000000 --- a/mindsdb/integrations/handlers/lancedb_handler/tests/test_lancedb_handler.py +++ /dev/null @@ -1,99 +0,0 @@ -import unittest -from mindsdb.api.executor.data_types.response_type import ( - RESPONSE_TYPE, -) -from mindsdb.integrations.handlers.lancedb_handler.lancedb_handler import ( - LanceDBHandler, -) -from mindsdb.integrations.libs.vectordatabase_handler import ( - FilterCondition, - FilterOperator, -) -import pandas as pd - - -class LanceDBHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = {'connection_data': {'persist_directory': '~/lancedb'}} - cls.handler = LanceDBHandler('test_lancedb_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_check_connection(self): - self.handler.check_connection() - - def test_2_create_table(self): - res = self.handler.create_table('test_data17') - assert res.resp_type is not RESPONSE_TYPE.ERROR - - def test_3_insert_into_table(self): - data = [{'id': 'id1', 'content': 'this is a test', 'metadata': {'test': 'test1'}, 'embeddings': [1, 2, 3, 4, 3, 5, 2, 8]}, - {'id': 'id2', 'content': 'this is a test', 'metadata': {'test': 'test2'}, 'embeddings': [4, 2, 7, 4, 2, 5, 2, 9]}, - {'id': 'id3', 'content': 'this is a test', 'metadata': {'test': 'test3'}, 'embeddings': [5, 2, 3, 2, 3, 3, 2, 7]}, - {'id': 'id3', 'content': 'this is a test', 'metadata': {'test': 'test4'}, 'embeddings': [5, 2, 3, 2, 3, 4, 2, 7]}] - df = pd.DataFrame(data) - res = self.handler.insert('test_data17', df, None) - assert res.resp_type is not RESPONSE_TYPE.ERROR - - def test_4_select(self): - res = self.handler.select( - 'test_data17', - ['id', 'content', 'metadata', 'embeddings'], - [ - FilterCondition( - column="id", - op=FilterOperator.EQUAL, - value="id3", - ) - ], - None, - None, - ) - assert res.resp_type is RESPONSE_TYPE.TABLE - - def test_5_vector_distance(self): - res = self.handler.select( - 'test_data17', - ['id', 'content', 'metadata', 'embeddings'], - [ - FilterCondition( - column="search_vector", - op=FilterOperator.EQUAL, - value="[4.0, 2.0, 7.0, 4.0, 2.0, 5.0, 2.0, 9.0]", - ) - ], - None, - None, - ) - assert res.resp_type is RESPONSE_TYPE.TABLE and 'distance' in res.data_frame.columns - - def test_6_delete(self): - res = self.handler.delete( - 'test_data17', - [ - FilterCondition( - column="id", - op=FilterOperator.EQUAL, - value="id1", - ) - ] - ) - assert res.resp_type is not RESPONSE_TYPE.ERROR - - def test_7_describe_table(self): - res = self.handler.get_columns('test_data17') - assert res.resp_type is RESPONSE_TYPE.TABLE - - def test_8_get_tables(self): - res = self.handler.get_tables() - assert res.resp_type is not RESPONSE_TYPE.ERROR - - def test_9_drop_table(self): - res = self.handler.drop_table('test_data17') - assert res.resp_type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/libsql_handler/README.md b/mindsdb/integrations/handlers/libsql_handler/README.md deleted file mode 100644 index 1329cff54b6..00000000000 --- a/mindsdb/integrations/handlers/libsql_handler/README.md +++ /dev/null @@ -1,51 +0,0 @@ -# LibSQL Handler - -This is the implementation of the LibSQL handler for MindsDB. - -## LibSQL - -libSQL is a fork of SQLite that is both Open Source, and Open Contributions. It comes with it's own server for replication, embedded replicas, multi-tenancy, and edge nodes for deploying on the edge. - -[LibSQL](https://turso.tech/libsql/) - -## Implementation - -This handler was implemented using the standard `libsql-experimental` library which has bindings for Python, and is compatible with the sqlite3 module. - -The only required argument to establish a connection is `database`. This points to the local database file that the connection is to be made to. -Optionally, you can parse `sync_url` along with `auth_token` to sync the local database with the remote database on the edge. - -## Usage - -If you have local file that need to connect into MindsDB, you have to [deploy MindsDB locally](https://docs.mindsdb.com/setup/self-hosted/pip/source), ways like via Docker or via pip. Then copy the file into the desired folder in source folder. This way MindsDB can successfully access your file. - -In order to make use of this handler and connect to a LibSQL/SQLite database in MindsDB, the following syntax can be used, - -```sql -CREATE DATABASE libsql_dev -WITH - engine='libsql', - parameters={ - "database":"example.db" - }; -``` - -OR - -With `sync_url` and `auth_token` - -```sql -CREATE DATABASE libsql_dev -WITH - engine='libsql', - parameters={ - "database": "example.db", - "sync_url": "libsql://exampledb-org.turso.io", - "auth_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c" - }; -``` - -Now, you can use this established connection to query your database as follows, -```sql -SELECT * FROM libsql_dev.example_tbl -``` diff --git a/mindsdb/integrations/handlers/libsql_handler/__about__.py b/mindsdb/integrations/handlers/libsql_handler/__about__.py deleted file mode 100644 index 61c1354848f..00000000000 --- a/mindsdb/integrations/handlers/libsql_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB LibSQL handler" -__package_name__ = "mindsdb_libsql_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for LibSQL" -__author__ = "Meet Gor" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/libsql_handler/__init__.py b/mindsdb/integrations/handlers/libsql_handler/__init__.py deleted file mode 100644 index 1d2450b1ece..00000000000 --- a/mindsdb/integrations/handlers/libsql_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .libsql_handler import LibSQLHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "LibSQL" -name = "libsql" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/libsql_handler/connection_args.py b/mindsdb/integrations/handlers/libsql_handler/connection_args.py deleted file mode 100644 index e51cebf897a..00000000000 --- a/mindsdb/integrations/handlers/libsql_handler/connection_args.py +++ /dev/null @@ -1,22 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - database={ - "type": ARG_TYPE.STR, - "description": "The database file where the data will be stored.", - }, - sync_url={ - "type": ARG_TYPE.STR, - "description": "The database URL where the data is synced with.", - }, - auth_token={ - "type": ARG_TYPE.STR, - "description": "The JWT auth token to authenticate with the sync database.", - "secret": True - }, -) - -connection_args_example = OrderedDict(database="chinook.db") diff --git a/mindsdb/integrations/handlers/libsql_handler/icon.svg b/mindsdb/integrations/handlers/libsql_handler/icon.svg deleted file mode 100644 index 71703f3be4a..00000000000 --- a/mindsdb/integrations/handlers/libsql_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/libsql_handler/libsql_handler.py b/mindsdb/integrations/handlers/libsql_handler/libsql_handler.py deleted file mode 100644 index 28ac3348478..00000000000 --- a/mindsdb/integrations/handlers/libsql_handler/libsql_handler.py +++ /dev/null @@ -1,192 +0,0 @@ -from typing import Optional - -import pandas as pd -import libsql_experimental as libsql - -from mindsdb_sql_parser import parse_sql -from mindsdb.integrations.libs.base import DatabaseHandler - -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) - - -logger = log.getLogger(__name__) - - -class LibSQLHandler(DatabaseHandler): - """ - This handler handles connection and execution of the LibSQL statements. - """ - - name = "libsql" - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = "libsql" - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - if self.is_connected is True: - return self.connection - - args = self.connection_data - # sync_url and auth_token are optional - # sync_url is used to sync the local database from the remote database - # auth_token is used as the authentication token for the remote database - if args.get("sync_url"): - self.connection = libsql.connect( - database=args["database"], - sync_url=args["sync_url"], - auth_token=args["auth_token"], - ) - else: - self.connection = libsql.connect(database=args["database"]) - - self.is_connected = True - - return self.connection - - def disconnect(self): - """ - Close any existing connections. - """ - - if self.is_connected is False: - return - - self.connection = None - self.is_connected = False - return self.is_connected - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error( - f'Error connecting to SQLite {self.connection_data["database"]}, {e}!' - ) - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - cursor = connection.cursor() - - try: - cursor.execute(query) - result = cursor.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, columns=[x[0] for x in cursor.description] - ), - ) - else: - connection.commit() - response = Response(RESPONSE_TYPE.OK) - except Exception as e: - logger.error( - f'Error running query: {query} on {self.connection_data["database"]}!' - ) - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - return self.native_query(query) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - - query = "SELECT name from sqlite_master where type= 'table';" - result = self.native_query(query) - df = result.data_frame - result.data_frame = df.rename(columns={df.columns[0]: "table_name"}) - return result - - def get_columns(self, table_name: str) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - - query = f"PRAGMA table_info([{table_name}]);" - result = self.native_query(query) - df = result.data_frame - result.data_frame = df.rename( - columns={"name": "column_name", "type": "data_type"} - ) - return result diff --git a/mindsdb/integrations/handlers/libsql_handler/requirements.txt b/mindsdb/integrations/handlers/libsql_handler/requirements.txt deleted file mode 100644 index 50eb2d0ebaf..00000000000 --- a/mindsdb/integrations/handlers/libsql_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -libsql-experimental diff --git a/mindsdb/integrations/handlers/libsql_handler/tests/__init__.py b/mindsdb/integrations/handlers/libsql_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/libsql_handler/tests/test.db b/mindsdb/integrations/handlers/libsql_handler/tests/test.db deleted file mode 100644 index 496db4b90e2..00000000000 Binary files a/mindsdb/integrations/handlers/libsql_handler/tests/test.db and /dev/null differ diff --git a/mindsdb/integrations/handlers/libsql_handler/tests/test_libsql_handler.py b/mindsdb/integrations/handlers/libsql_handler/tests/test_libsql_handler.py deleted file mode 100644 index be39dee7304..00000000000 --- a/mindsdb/integrations/handlers/libsql_handler/tests/test_libsql_handler.py +++ /dev/null @@ -1,45 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.libsql_handler.libsql_handler import LibSQLHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class LibSQLHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "database": "tests/test.db", - } - cls.handler = LibSQLHandler("test_libsql_handler", cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_create_table(self): - query = "CREATE TABLE IF NOT EXISTS user (id INTEGER PRIMARY KEY, name TEXT, age INTEGER)" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.OK - - def test_2_insert_rows(self): - query = ( - "INSERT OR IGNORE INTO user (name, age) VALUES ('Alice', 30), ('Bob', 25)" - ) - result = self.handler.native_query(query) - print(result) - assert result.type is RESPONSE_TYPE.OK - - def test_3_native_query_select(self): - query = "SELECT * FROM user" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_4_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_5_get_columns(self): - columns = self.handler.get_columns("user") - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/lightdash_handler/README.md b/mindsdb/integrations/handlers/lightdash_handler/README.md deleted file mode 100644 index a161d1516aa..00000000000 --- a/mindsdb/integrations/handlers/lightdash_handler/README.md +++ /dev/null @@ -1,63 +0,0 @@ -# Lightdash Handler - -This handler allows you to interact with a Lightdash instance - -## About Lightdash - -Lightdash instantly turns your dbt project into a full-stack BI platform. Analysts write metrics and Lightdash enables self-serve for the entire business. - -## Lightdash Handler Initialization - -You can create the database like so: - -```sql -CREATE DATABASE lightdash_datasource -WITH ENGINE = "lightdash", -PARAMETERS = { - "api_key": "...", - "base_url": "https://..." -}; -``` - -To select from various tables, you can use `SELECT` statement. You must provide a package for this to work. - -```sql -SELECT * FROM lightdash_datasource.user; -``` - -```sql -SELECT firstName FROM npm_datasource.maintainers; -``` - -Some tables requre additional parameters that can be passed through `WHERE` clause separated by `AND`s: - -```sql -SELECT * FROM lightdash_datasource.project_table -WHERE project_uuid='....'; -``` - -## Available tables - -- `user`: details of authenticated user -- `user_abilities`: list of abilities of authenticated user -- `org`: details of organization of authenticated user -- `org_projects`: list of projects under authenticated user's organization -- `org_members`: list of members of authenticated user's organization -- `project_table`: details of project defined by `project_uuid` -- `warehouse_connection`: details of the warehouse to which project with `project_uuid` is connected -- `dbt_connection`: details of dbt connection to which project with `project_uuid` is connected -- `dbt_env_vars`: list of environment variables of dbt connection to which project with `project_uuid` is connected -- `charts`: list of charts in project with `project_uuid` -- `spaces`: list of spaces in project with `project_uuid` -- `access`: list of users with access to project with `project_uuid` -- `validation`: list of validation results of the project with `project_uuid` -- `dashboards`: list of dashboards defined in space `space_uuid` of project `project_uuid` -- `queries`: list of queries in the space `space_uuid` of project `project_uuid` -- `chart_history`: history of changes of chart `chart_uuid` -- `chart_config`: configuration of chart defined by version `version_uuid` and chart `chart_uuid` -- `chart_additional_metrics`: additional metrices used in chart defined by version `version_uuid` and chart `chart_uuid` -- `chart_table_calculations`: table calculations used in chart defined by version `version_uuid` and chart `chart_uuid` -- `scheduler_logs`: logs of scheduler in project `project_uuid` -- `scheduler`: details of scheduler with `scheduler_uuid` -- `scheduler_jobs`: jobs scheduled by scheduler with `scheduler_uuid` -- `scheduler_job_status`: status of a job with `job_id` diff --git a/mindsdb/integrations/handlers/lightdash_handler/__about__.py b/mindsdb/integrations/handlers/lightdash_handler/__about__.py deleted file mode 100644 index 044c0cbb633..00000000000 --- a/mindsdb/integrations/handlers/lightdash_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Lightdash handler" -__package_name__ = "mindsdb_lightdash_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Lightdash" -__author__ = "Aditya Azad" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/lightdash_handler/__init__.py b/mindsdb/integrations/handlers/lightdash_handler/__init__.py deleted file mode 100644 index 98b89626543..00000000000 --- a/mindsdb/integrations/handlers/lightdash_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version - -try: - from .lightdash_handler import LightdashHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Lightdash" -name = "lightdash" -type = HANDLER_TYPE.DATA -icon_path = "icon.png" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/lightdash_handler/api.py b/mindsdb/integrations/handlers/lightdash_handler/api.py deleted file mode 100644 index 5c0220be41d..00000000000 --- a/mindsdb/integrations/handlers/lightdash_handler/api.py +++ /dev/null @@ -1,513 +0,0 @@ -import requests -from urllib.parse import urljoin - - -class Lightdash: - - def __init__(self, url: str, api_key: str) -> None: - self.base_url = urljoin(url, "/api/v1/") - self.api_key = api_key - - def _request(self, method: str, relative_endpoint: str, data=None): - kwargs = { - "method": method, - "url": urljoin(self.base_url, relative_endpoint), - "headers": { - "Authorization": "ApiKey " + self.api_key, - } - } - if data is not None: - kwargs["data"] = data - return requests.request(**kwargs) - - def is_connected(self) -> bool: - if self.get_user() is not None: - return True - return False - - def get_user(self): - """ - Get user's details - Return format: - {'userUuid': '831b6c26-bdc2-4a56-9818-fd8ebaa406ac', - 'email': 'test@test.com', - 'firstName': 'Test', - 'lastName': 'User', - 'organizationUuid': 'd00805a0-b0b4-400d-a136-66f620493f11', - 'organizationName': 'testing-comp', - 'organizationCreatedAt': '2023-10-20T17:46:10.005Z', - 'isTrackingAnonymized': False, - 'isMarketingOptedIn': True, - 'isSetupComplete': True, - 'role': 'admin', - 'isActive': True, - 'abilityRules': [{'action': 'view', - 'subject': 'OrganizationMemberProfile', - 'conditions': {'organizationUuid': 'd00805a0-b0b4-400d-a136-66f620493f11'}}, - {'action': ... - """ - resp = self._request("get", "user") - if resp.ok: - return resp.json()["results"] - return {} - - def get_org(self): - """ - Get user's organization details - Return format: - {'organizationUuid': 'd00805a0-b0b4-400d-a136-66f620493f11', - 'name': 'testing-comp', - 'defaultProjectUuid': 'string', - 'chartColors': ['string'], - 'needsProject': False} - """ - resp = self._request("get", "org") - if resp.ok: - return resp.json()["results"] - return {} - - def get_projects(self): - """ - Get user organization's pojects' details - Return format: - [{'name': 'Jaffle shop', - 'projectUuid': '95dfda3b-02e2-4708-a014-5967966020f3', - 'type': 'DEFAULT'}] - """ - resp = self._request("get", "org/projects") - if resp.ok: - return resp.json()["results"] - return [] - - def get_org_members(self): - """ - Get all the members in user's organization - Return format: - [{'userUuid': '831b6c26-bdc2-4a56-9818-fd8ebaa406ac', - 'firstName': 'TestName', - 'lastName': 'TestLastName', - 'email': 'test@test.com', - 'organizationUuid': 'd00805a0-b0b4-400d-a136-66f620493f11', - 'role': 'admin', - 'isActive': True, - 'isInviteExpired': False}] - """ - resp = self._request("get", "org/users") - if resp.ok: - return resp.json()["results"] - return [] - - def get_project(self, project_uuid: str): - """ - Get details of a project in user's organization - Return format: - { "dbtVersion": "v1.4", - "copiedFromProjectUuid": "string", - "pinnedListUuid": "string", - "warehouseConnection": { - "role": "string", - "type": "snowflake", - "account": "string", - "database": "string", - "warehouse": "string", - "schema": "string", - "threads": 0, - "clientSessionKeepAlive": true, - "queryTag": "string", - "accessUrl": "string", - "startOfWeek": 0 }, - "dbtConnection": { - "type": "dbt", - "target": "string", - "environment": [ { - "value": "string", - "key": "string" } ], - "profiles_dir": "string", - "project_dir": "string" }, - "type": "DEFAULT", - "name": "string", - "projectUuid": "string", - "organizationUuid": "string" } - """ - resp = self._request("get", f"projects/{project_uuid}") - if resp.ok: - return resp.json()["results"] - return {} - - def get_charts_in_project(self, project_uuid: str): - """ - List all charts in a project - Return format: - [{"name": "string", - "organizationUuid": "string", - "uuid": "string", - "description": "string", - "projectUuid": "string", - "spaceUuid": "string", - "pinnedListUuid": "string", - "spaceName": "string", - "dashboardUuid": "string", - "dashboardName": "string", - "chartType": "string"}] - """ - resp = self._request("get", f"projects/{project_uuid}/charts") - if resp.ok: - return resp.json()["results"] - return [] - - def get_spaces_in_project(self, project_uuid: str): - """ - List all spaces in a project - Return format: - [{"name": "string", - "organizationUuid": "string", - "uuid": "string", - "projectUuid": "string", - "pinnedListUuid": "string", - "pinnedListOrder": 0, - "isPrivate": true, - "dashboardCount": 0, - "chartCount": 0, - "access": [ - "string" ]}] - """ - resp = self._request("get", f"projects/{project_uuid}/spaces") - if resp.ok: - return resp.json()["results"] - return [] - - def get_project_access_list(self, project_uuid: str): - """ - Get access list for a project. This is a list of users that have been explictly granted access to the project. There may be other users that have access to the project via their organization membership - Return format: - [{"lastName": "string", - "firstName": "string", - "email": "string", - "role": "viewer", - "projectUuid": "string", - "userUuid": "string" }] - """ - resp = self._request("get", f"projects/{project_uuid}/access") - if resp.ok: - return resp.json()["results"] - return [] - - def get_validation_results(self, project_uuid: str): - """ - Get validation results for a project. This will return the results of the latest validation job - Return format: - [{"source": "chart", - "spaceUuid": "string", - "projectUuid": "string", - "errorType": "chart", - "error": "string", - "name": "string", - "createdAt": "2019-08-24T14:15:22Z", - "validationId": 0, - "chartName": "string", - "chartViews": 0, - "lastUpdatedAt": "2019-08-24T14:15:22Z", - "lastUpdatedBy": "string", - "fieldName": "string", - "chartType": "line", - "chartUuid": "string"}] - """ - resp = self._request("get", f"projects/{project_uuid}/validate") - if resp.ok: - return resp.json()["results"] - return [] - - def get_space(self, project_uuid: str, space_uuid: str): - """ - Get details for a space in a project - Return format: - { "pinnedListOrder": 0, - "pinnedListUuid": "string", - "access": [ { - "role": "viewer", - "lastName": "string", - "firstName": "string", - "userUuid": "string" } ], - "dashboards": [ { - "name": "string", - "organizationUuid": "string", - "uuid": "string", - "description": "string", - "updatedAt": "2019-08-24T14:15:22Z", - "projectUuid": "string", - "updatedByUser": { - "userUuid": "string", - "firstName": "string", - "lastName": "string" }, - "spaceUuid": "string", - "views": 0, - "firstViewedAt": "2019-08-24T14:15:22Z", - "pinnedListUuid": "string", - "pinnedListOrder": 0, - "validationErrors": [ { - "validationId": 0, - "createdAt": "2019-08-24T14:15:22Z", - "error": "string" } ] } ], - "projectUuid": "string", - "queries": [ { - "name": "string", - "uuid": "string", - "description": "string", - "updatedAt": "2019-08-24T14:15:22Z", - "updatedByUser": { - "userUuid": "string", - "firstName": "string", - "lastName": "string" }, - "spaceUuid": "string", - "pinnedListUuid": "string", - "pinnedListOrder": 0, - "firstViewedAt": "2019-08-24T14:15:22Z", - "views": 0, - "validationErrors": [ { - "validationId": 0, - "createdAt": "2019-08-24T14:15:22Z", - "error": "string" } ], - "chartType": "line" } ], - "isPrivate": true, - "name": "string", - "uuid": "string", - "organizationUuid": "string" } - """ - resp = self._request("get", f"projects/{project_uuid}/spaces/{space_uuid}") - if resp.ok: - return resp.json()["results"] - return {} - - def get_chart_version_history(self, chart_uuid: str): - """ - Get chart version history from last 30 days - Return format: - [{"createdAt": "2019-08-24T14:15:22Z", - "chartUuid": "string", - "versionUuid": "string", - "createdBy": { - "userUuid": "string", - "firstName": "string", - "lastName": "string" }}] - """ - resp = self._request("get", f"saved/{chart_uuid}/history") - if resp.ok: - return resp.json()["results"]["history"] - return [] - - def get_chart(self, chart_uuid: str, version_uuid: str): - """ - Get chart details - Return format: - { "chart": { - "dashboardName": "string", - "dashboardUuid": "string", - "pinnedListOrder": 0, - "pinnedListUuid": "string", - "spaceName": "string", - "spaceUuid": "string", - "organizationUuid": "string", - "updatedByUser": { - "userUuid": "string", - "firstName": "string", - "lastName": "string" }, - "updatedAt": "2019-08-24T14:15:22Z", - "tableConfig": { - "columnOrder": [ - "string" ] }, - "chartConfig": { - "config": { - "legendPosition": "horizontal", - "showLegend": true, - "groupSortOverrides": [ - "string" ], - "groupValueOptionOverrides": {}, - "groupColorOverrides": {}, - "groupLabelOverrides": {}, - "showPercentage": true, - "showValue": true, - "valueLabel": "hidden", - "isDonut": true, - "metricId": "string", - "groupFieldIds": [ - "string" ] }, - "type": "pie" }, - "pivotConfig": { - "columns": [ - "string" ] }, - "metricQuery": { - "additionalMetrics": [ { - "label": "string", - "type": "percentile", - "description": "string", - "sql": "string", - "hidden": true, - "round": 0, - "compact": "thousands", - "format": "km", - "table": "string", - "name": "string", - "index": 0, - "filters": [ { - "values": [ - null ], - "operator": "isNull", - "id": "string", - "target": { - "fieldRef": "string" }, - "settings": null, - "disabled": true } ], - "baseDimensionName": "string", - "uuid": "string", - "percentile": 0 } ], - "tableCalculations": [ { - "format": { - "suffix": "string", - "prefix": "string", - "compact": "thousands", - "currency": "string", - "separator": "default", - "round": 0, - "type": "default" }, - "sql": "string", - "displayName": "string", - "name": "string", - "index": 0 } ], - "limit": 0, - "sorts": [ { - "descending": true, - "fieldId": "string" } ], - "filters": { - "metrics": { - "or": [ - null ], - "id": "string" }, - "dimensions": { - "or": [ - null ], - "id": "string" } }, - "metrics": [ - "string" ], - "dimensions": [ - "string" ] }, - "tableName": "string", - "description": "string", - "name": "string", - "projectUuid": "string", - "uuid": "string" }, - "createdBy": { - "userUuid": "string", - "firstName": "string", - "lastName": "string" }, - "createdAt": "2019-08-24T14:15:22Z", - "versionUuid": "string", - "chartUuid": "string" } - """ - resp = self._request("get", f"saved/{chart_uuid}/version/{version_uuid}") - if resp.ok: - return resp.json()["results"] - return {} - - def get_scheduler_logs(self, project_uuid: str): - """ - Get scheduled logs - Return format: - { "logs": [ { - "details": {}, - "targetType": "email", - "target": "string", - "status": "scheduled", - "createdAt": "2019-08-24T14:15:22Z", - "scheduledTime": "2019-08-24T14:15:22Z", - "jobGroup": "string", - "jobId": "string", - "schedulerUuid": "string", - "task": "handleScheduledDelivery" - } ], - "dashboards": [ { - "dashboardUuid": "string", - "name": "string" } ], - "charts": [ { - "savedChartUuid": "string", - "name": "string" } ], - "users": [ { - "userUuid": "string", - "lastName": "string", - "firstName": "string" } ], - "schedulers": [ { - "options": { - "limit": 0, - "formatted": true }, - "dashboardUuid": null, - "savedChartUuid": "string", - "cron": "string", - "format": "csv", - "createdBy": "string", - "updatedAt": "2019-08-24T14:15:22Z", - "createdAt": "2019-08-24T14:15:22Z", - "message": "string", - "name": "string", - "schedulerUuid": "string", - "targets": [ { - "channel": "string", - "schedulerUuid": "string", - "updatedAt": "2019-08-24T14:15:22Z", - "createdAt": "2019-08-24T14:15:22Z", - "schedulerSlackTargetUuid": "string" } ] } ] } - """ - resp = self._request("get", f"schedulers/{project_uuid}/logs") - if resp.ok: - return resp.json()["results"] - return {} - - def get_scheduler(self, scheduler_uuid: str): - """ - Get details of a scheduler - Return format: - { "options": { - "limit": 0, - "formatted": true }, - "dashboardUuid": null, - "savedChartUuid": "string", - "cron": "string", - "format": "csv", - "createdBy": "string", - "updatedAt": "2019-08-24T14:15:22Z", - "createdAt": "2019-08-24T14:15:22Z", - "message": "string", - "name": "string", - "schedulerUuid": "string", - "targets": [ { - "channel": "string", - "schedulerUuid": "string", - "updatedAt": "2019-08-24T14:15:22Z", - "createdAt": "2019-08-24T14:15:22Z", - "schedulerSlackTargetUuid": "string" } ] } - """ - resp = self._request("get", f"schedulers/{scheduler_uuid}") - if resp.ok: - return resp.json()["results"] - return {} - - def get_scheduler_jobs(self, scheduler_uuid: str): - """ - Get jobs scheduled by a scheduler - Return format: - [ { "id": "string", - "date": "2019-08-24T14:15:22Z" } ] - """ - resp = self._request("get", f"schedulers/{scheduler_uuid}/jobs") - if resp.ok: - return resp.json()["results"] - return [] - - def get_scheduler_job_status(self, job_id: str): - """ - Get a generic job status - Return format: - { "status": "string" } - """ - resp = self._request("get", f"schedulers/job/{job_id}/status") - if resp.ok: - return resp.json()["results"] - return {} diff --git a/mindsdb/integrations/handlers/lightdash_handler/icon.png b/mindsdb/integrations/handlers/lightdash_handler/icon.png deleted file mode 100644 index 62a9614bc96..00000000000 Binary files a/mindsdb/integrations/handlers/lightdash_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/lightdash_handler/lightdash_handler.py b/mindsdb/integrations/handlers/lightdash_handler/lightdash_handler.py deleted file mode 100644 index de872ba4e8f..00000000000 --- a/mindsdb/integrations/handlers/lightdash_handler/lightdash_handler.py +++ /dev/null @@ -1,108 +0,0 @@ -from collections import OrderedDict -from mindsdb_sql_parser import parse_sql - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE -from mindsdb.integrations.handlers.lightdash_handler.api import Lightdash -from mindsdb.integrations.handlers.lightdash_handler.lightdash_tables import ( - UserTable, - UserAbilityTable, - OrgTable, - OrgProjectsTable, - OrgMembersTable, - ProjectTable, - WarehouseConnectionTable, - DBTConnectionTable, - DBTEnvironmentVarsTable, - ChartsTable, - SpacesTable, - AccessTable, - ValidationTable, - DashboardsTable, - QueriesTable, - ChartHistoryTable, - ChartConfigTable, - ChartAdditionalMetricsTable, - ChartTableCalculationsTable, - SchedulerLogsTable, - SchedulerTable, - SchedulerJobsTable, - SchedulerJobStatus, -) -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse - - -class LightdashHandler(APIHandler): - - def __init__(self, name: str, **kwargs) -> None: - super().__init__(name) - self.connection = None - self.is_connected = False - self.api_key = kwargs.get("connection_data", {}).get("api_key", "") - self.base_url = kwargs.get("connection_data", {}).get("base_url", "") - _tables = [ - UserTable, - UserAbilityTable, - OrgTable, - OrgProjectsTable, - OrgMembersTable, - ProjectTable, - WarehouseConnectionTable, - DBTConnectionTable, - DBTEnvironmentVarsTable, - ChartsTable, - SpacesTable, - AccessTable, - ValidationTable, - DashboardsTable, - QueriesTable, - ChartHistoryTable, - ChartConfigTable, - ChartAdditionalMetricsTable, - ChartTableCalculationsTable, - SchedulerLogsTable, - SchedulerTable, - SchedulerJobsTable, - SchedulerJobStatus, - ] - for Table in _tables: - self._register_table(Table.name, Table(self)) - self.connect() - - def connect(self) -> Lightdash: - self.connection = Lightdash(self.base_url, self.api_key) - return self.connection - - def check_connection(self) -> StatusResponse: - resp = StatusResponse(False) - if self.connection and not self.connection.is_connected(): - resp.error = "Client not connected" - else: - resp.success = True - return resp - - def native_query(self, query: str) -> StatusResponse: - ast = parse_sql(query) - return self.query(ast) - - -connection_args = OrderedDict( - api_key={ - 'type': ARG_TYPE.STR, - 'description': 'API Token for accessing Lightdash instance', - 'required': True, - 'label': 'API Key', - }, - base_url={ - 'type': ARG_TYPE.STR, - 'description': 'Base URL of Lightdash instance', - 'required': True, - 'label': 'Base URL', - } -) - - -connection_args_example = OrderedDict( - api_key='23d6b9e0c2fab7eba2e8b7e452cead47', - base_url='http://localhost:8080/' -) diff --git a/mindsdb/integrations/handlers/lightdash_handler/lightdash_tables.py b/mindsdb/integrations/handlers/lightdash_handler/lightdash_tables.py deleted file mode 100644 index 4a762ba51b0..00000000000 --- a/mindsdb/integrations/handlers/lightdash_handler/lightdash_tables.py +++ /dev/null @@ -1,648 +0,0 @@ -from typing import List - -import pandas as pd -from mindsdb_sql_parser import ast - -from mindsdb.integrations.utilities.handlers.query_utilities import ( - SELECTQueryExecutor, - SELECTQueryParser, -) -from mindsdb.integrations.libs.api_handler import APIHandler, APITable -from mindsdb.integrations.utilities.sql_utils import conditions_to_filter - - -def val_to_string(d, k): - if k in d: - d[k] = str(d[k]) - - -def move_under(d, key_contents_to_move, key_to_move_under=None): - if key_contents_to_move not in d: - return - for k, v in d[key_contents_to_move].items(): - if key_to_move_under: - d[key_to_move_under][k] = v - else: - d[k] = v - del d[key_contents_to_move] - - -def select_keys(d, keys): - new_d = {} - for key in keys: - new_d[key] = d.get(key, "") - return new_d - - -class CustomAPITable(APITable): - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.handler.connect() - - def get_columns(self, ignore: List[str] = []) -> List[str]: - return [item for item in self.columns if item not in ignore] - - def select(self, query: ast.Select) -> pd.DataFrame: - raise NotImplementedError() - - def parse_select(self, query: ast.Select, table_name: str): - select_statement_parser = SELECTQueryParser(query, table_name, self.get_columns()) - self.selected_columns, self.where_conditions, self.order_by_conditions, self.result_limit = select_statement_parser.parse_query() - - def get_where_param(self, query: ast.Select, param: str): - params = conditions_to_filter(query.where) - if param not in params: - raise Exception(f"WHERE condition does not have '{param}' selector") - return params[param] - - def apply_query_params(self, df, query): - select_statement_parser = SELECTQueryParser(query, self.name, self.get_columns()) - selected_columns, _, order_by_conditions, result_limit = select_statement_parser.parse_query() - select_statement_executor = SELECTQueryExecutor(df, selected_columns, [], order_by_conditions, result_limit) - return select_statement_executor.execute_query() - - -class UserTable(CustomAPITable): - name: str = "user" - columns: List[str] = [ - 'userUuid', - 'email', - 'firstName', - 'lastName', - 'organizationUuid', - 'organizationName', - 'organizationCreatedAt', - 'isTrackingAnonymized', - 'isMarketingOptedIn', - 'isSetupComplete', - 'role', - 'isActive', - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = select_keys(self.connection.get_user(), self.columns) - df = pd.DataFrame.from_records([data]) - return self.apply_query_params(df, query) - - -class UserAbilityTable(CustomAPITable): - name: str = "user_ability" - columns: List[str] = [ - 'action', - 'subject', - 'conditions' - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = select_keys(self.connection.get_user(), ["abilityRules"]) - for d in data: - val_to_string(d, "condition") - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class OrgTable(CustomAPITable): - name: str = "org" - columns: List[str] = [ - 'organizationUuid', - 'defaultProjectUuid' - 'name', - 'chartColors', - 'needsProject', - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = select_keys(self.connection.get_org(), self.columns) - val_to_string(data, "chartColors") - df = pd.DataFrame.from_records([data]) - return self.apply_query_params(df, query) - - -class OrgProjectsTable(CustomAPITable): - name: str = "org_projects" - columns: List[str] = [ - 'name', - 'projectUuid', - 'type', - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get_projects() - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class OrgMembersTable(CustomAPITable): - name: str = "org_members" - columns: List[str] = [ - 'userUuid', - 'firstName', - 'lastName', - 'email', - 'organizationUuid', - 'role', - 'isActive', - 'isInviteExpired', - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get_org_members() - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class ProjectTable(CustomAPITable): - name: str = "project_table" - columns: List[str] = [ - 'organizationUuid', - 'projectUuid', - 'name', - 'type', - 'pinnedListUuid', - 'copiedFromProjectUuid', - 'dbtVersion', - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - project_uuid = self.get_where_param(query, 'project_uuid') - data = select_keys(self.connection.get_project(project_uuid), self.columns) - df = pd.DataFrame.from_records([data]) - return self.apply_query_params(df, query) - - -class WarehouseConnectionTable(CustomAPITable): - name: str = "warehouse_connection" - columns: List[str] = [ - "role", - "type", - "account", - "database", - "warehouse", - "schema", - "threads", - "clientSessionKeepAlive", - "queryTag", - "accessUrl", - "startOfWeek", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - project_uuid = self.get_where_param(query, 'project_uuid') - data = select_keys(self.connection.get_project(project_uuid).get("warehouseConnection", {}), self.columns) - df = pd.DataFrame.from_records([data]) - return self.apply_query_params(df, query) - - -class DBTConnectionTable(CustomAPITable): - name: str = "dbt_connection" - columns: List[str] = [ - "type", - "target", - "profiles_dir", - "project_dir", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - project_uuid = self.get_where_param(query, "project_uuid") - data = select_keys(self.connection.get_project(project_uuid).get("dbtConnection", {}), self.columns) - df = pd.DataFrame.from_records([data]) - return self.apply_query_params(df, query) - - -class DBTEnvironmentVarsTable(CustomAPITable): - name: str = "dbt_env_vars" - columns: List[str] = [ - "value", - "key", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - project_uuid = self.get_where_param(query, "project_uuid") - data = self.connection.get_project(project_uuid).get("dbtConnection", {}).get("environment", []) - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class ChartsTable(CustomAPITable): - name: str = "charts" - columns: List[str] = [ - "name", - "organizationUuid", - "uuid", - "description", - "projectUuid", - "spaceUuid", - "pinnedListUuid", - "spaceName", - "dashboardUuid", - "dashboardName", - "chartType", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - project_uuid = self.get_where_param(query, "project_uuid") - data = self.connection.get_charts_in_project(project_uuid) - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class SpacesTable(CustomAPITable): - name: str = "spaces" - columns: List[str] = [ - "name", - "organizationUuid", - "uuid", - "projectUuid", - "pinnedListUuid", - "pinnedListOrder", - "isPrivate", - "dashboardCount", - "chartCount", - "access", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - project_uuid = self.get_where_param(query, "project_uuid") - data = self.connection.get_spaces_in_project(project_uuid) - for d in data: - val_to_string(d, "access") - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class AccessTable(CustomAPITable): - name: str = "access" - columns: List[str] = [ - "lastName", - "firstName", - "email", - "role", - "projectUuid", - "userUuid", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - project_uuid = self.get_where_param(query, "project_uuid") - data = self.connection.get_project_access_list(project_uuid) - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class ValidationTable(CustomAPITable): - name: str = "validation" - columns: List[str] = [ - "source", - "spaceUuid", - "projectUuid", - "errorType", - "error", - "name", - "createdAt", - "validationId", - "chartName", - "chartViews", - "lastUpdatedAt", - "lastUpdatedBy", - "fieldName", - "chartType", - "chartUuid", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - project_uuid = self.get_where_param(query, "project_uuid") - data = self.connection.get_validation_results(project_uuid) - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class DashboardsTable(CustomAPITable): - name: str = "dashboards" - columns: List[str] = [ - "name", - "organizationUuid", - "uuid", - "description", - "updatedAt", - "projectUuid", - "spaceUuid", - "views", - "firstViewedAt", - "pinnedListUuid", - "pinnedListOrder", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - project_uuid = self.get_where_param(query, "project_uuid") - space_uuid = self.get_where_param(query, "space_uuid") - data = [] - for row in self.connection.get_space(project_uuid, space_uuid).get("dashboards", []): - data.append(select_keys(row, self.columns)) - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class QueriesTable(CustomAPITable): - name: str = "queries" - columns: List[str] = [ - "name", - "uuid", - "description", - "updatedAt", - "spaceUuid", - "pinnedListUuid", - "pinnedListOrder", - "firstViewedAt", - "views", - "chartType", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - project_uuid = self.get_where_param(query, "project_uuid") - space_uuid = self.get_where_param(query, "space_uuid") - data = [] - for row in self.connection.get_space(project_uuid, space_uuid).get("queries", []): - data.append(select_keys(row, self.columns)) - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class ChartHistoryTable(CustomAPITable): - name: str = "chart_history" - columns: List[str] = [ - "createdAt", - "chartUuid", - "versionUuid", - "createdBy", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - chart_uuid = self.get_where_param(query, "chart_uuid") - data = [] - for row in self.connection.get_chart_version_history(chart_uuid): - d = select_keys(row, self.columns) - val_to_string(d, "createdBy") - data.append(d) - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class ChartConfigTable(CustomAPITable): - name: str = "chart_config" - columns: List[str] = [ - "legendPosition", - "showLegend", - "groupSortOverrides", - "groupValueOptionOverrides", - "groupColorOverrides", - "groupLabelOverrides", - "showPercentage", - "showValue", - "valueLabel", - "isDonut", - "metricId", - "groupFieldIds" - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - chart_uuid = self.get_where_param(query, "chart_uuid") - version_uuid = self.get_where_param(query, "version_uuid") - raw_data = self.connection.get_chart(chart_uuid, version_uuid).get("chart", {}).get("chart_config", {}) - config_data = raw_data.get("config", {}) - val_to_string(config_data, "groupSortOverrides") - val_to_string(config_data, "groupValueOptionOverrides") - val_to_string(config_data, "groupColorOverrides") - val_to_string(config_data, "groupLabelOverrides") - val_to_string(config_data, "groupFieldIds") - config_data = select_keys(config_data, self.columns) - df = pd.DataFrame.from_records([{**config_data, "type": raw_data.get("type", "")}], columns=self.columns) - return self.apply_query_params(df, query) - - -class ChartAdditionalMetricsTable(CustomAPITable): - name: str = "chart_additional_metrics" - columns: List[str] = [ - "label", - "type", - "description", - "sql", - "hidden", - "round", - "compact", - "format", - "table", - "name", - "index", - "filters", - "baseDimensionName", - "uuid", - "percentile", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - chart_uuid = self.get_where_param(query, "chart_uuid") - version_uuid = self.get_where_param(query, "version_uuid") - data = self.connection.get_chart(chart_uuid, version_uuid).get("metricQuery", {}).get("additionalMetrics", []) - for d in data: - val_to_string(data, "filters") - d = select_keys(d, self.columns) - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class ChartTableCalculationsTable(CustomAPITable): - name: str = "chart_table_calculations" - columns: List[str] = [ - "suffix", - "prefix", - "compact", - "currency", - "separator", - "round", - "type", - "sql", - "displayName", - "name", - "index", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - chart_uuid = self.get_where_param(query, "chart_uuid") - version_uuid = self.get_where_param(query, "version_uuid") - data = self.connection.get_chart(chart_uuid, version_uuid).get("metricQuery", {}).get("tableCalculations", []) - for d in data: - move_under(d, "format") - d = select_keys(d, self.columns) - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class SchedulerLogsTable(CustomAPITable): - name: str = "scheduler_logs" - columns: List[str] = [ - "details", - "targetType", - "target", - "status", - "createdAt", - "scheduledTime", - "jobGroup", - "jobId", - "schedulerUuid", - "task", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - project_uuid = self.get_where_param(query, "project_uuid") - data = self.connection.get_scheduler_logs(project_uuid).get("logs", []) - for d in data: - val_to_string(d, "details") - d = select_keys(d, self.columns) - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class SchedulerTable(CustomAPITable): - name: str = "scheduler" - columns: List[str] = [ - "options", - "dashboardUuid", - "savedChartUuid", - "cron", - "format", - "createdBy", - "updatedAt", - "createdAt", - "message", - "name", - "schedulerUuid", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - scheduler_uuid = self.get_where_param(query, "scheduler_uuid") - data = select_keys(self.connection.get_scheduler(scheduler_uuid), self.columns) - val_to_string(data, "options") - df = pd.DataFrame.from_records([data], columns=self.columns) - return self.apply_query_params(df, query) - - -class SchedulerJobsTable(CustomAPITable): - name: str = "scheduler_jobs" - columns: List[str] = [ - "id", - "date", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - scheduler_uuid = self.get_where_param(query, "scheduler_uuid") - data = self.connection.get_scheduler_jobs(scheduler_uuid) - for d in data: - d = select_keys(d, self.columns) - df = pd.DataFrame.from_records(data, columns=self.columns) - return self.apply_query_params(df, query) - - -class SchedulerJobStatus(CustomAPITable): - name: str = "scheduler_job_status" - columns: List[str] = [ - "status", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - job_id = self.get_where_param(query, "job_id") - data = self.connection.get_scheduler_jobs(job_id) - data = select_keys(data, self.columns) - df = pd.DataFrame.from_records([data], columns=self.columns) - return self.apply_query_params(df, query) diff --git a/mindsdb/integrations/handlers/lightdash_handler/tests/__init__.py b/mindsdb/integrations/handlers/lightdash_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/lindorm_handler/README.md b/mindsdb/integrations/handlers/lindorm_handler/README.md deleted file mode 100644 index 3f7fba7b9f9..00000000000 --- a/mindsdb/integrations/handlers/lindorm_handler/README.md +++ /dev/null @@ -1,34 +0,0 @@ -# Lindorm Handler - -This is the implementation of the Lindrom for MindsDB. - -## Lindorm -Lindorm is Alibaba Cloud's data governance platform that helps users protect sensitive data and ensure regulatory compliance. It provides discovery, classification, access control, and monitoring capabilities to help customers find, understand, and control their data across cloud services and on-premises systems. - -## Implementation - -This handler uses `phoenixdb` python library connect to a Lindorm database instance. The handler is implemented in `lindorm_handler.py` and the tests are in `test_lindorm_handler.py`. - -The required arguments to establish a connection are: - -* `url`: the url of database while connecting -* `autocommit`: Whether or not to autocommit changes -* `lindorm_user`: Username for authentication -* `lindorm_password`: Password for authentication - - -## Usage - -replace your lindorm database url, username and password in the following command - - -```sql -CREATE DATABASE lindorm_datasource -WITH ENGINE = 'lindorm', -PARAMETERS = { - "url": "", - "autocommit": True, - "lindorm_user":"root" , - "lindorm_password": "UWtx4ebU" -}; -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/lindorm_handler/__about__.py b/mindsdb/integrations/handlers/lindorm_handler/__about__.py deleted file mode 100644 index 279200a2860..00000000000 --- a/mindsdb/integrations/handlers/lindorm_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Lindorm handler" -__package_name__ = "mindsdb_lindorm_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Lindorm" -__author__ = "Someshfengde" -__github__ = "https://github.com/someshfengde" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/lindorm_handler/__init__.py b/mindsdb/integrations/handlers/lindorm_handler/__init__.py deleted file mode 100644 index 138c342e717..00000000000 --- a/mindsdb/integrations/handlers/lindorm_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version - -try: - from .lindorm_handler import LindormDBHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Lindorm" -name = "lindorm" -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path" -] diff --git a/mindsdb/integrations/handlers/lindorm_handler/icon.svg b/mindsdb/integrations/handlers/lindorm_handler/icon.svg deleted file mode 100644 index 4e8d211cb75..00000000000 --- a/mindsdb/integrations/handlers/lindorm_handler/icon.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/lindorm_handler/lindorm_handler.py b/mindsdb/integrations/handlers/lindorm_handler/lindorm_handler.py deleted file mode 100644 index 5ffd5de86a4..00000000000 --- a/mindsdb/integrations/handlers/lindorm_handler/lindorm_handler.py +++ /dev/null @@ -1,220 +0,0 @@ -from typing import Optional -import pandas as pd -import phoenixdb -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.base import DatabaseHandler -from pyphoenix.sqlalchemy_phoenix import PhoenixDialect -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -logger = log.getLogger(__name__) - - -class LindormHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Apache Phoenix statements. - """ - - name = 'lindorm' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = 'phoenix' - optional_parameters = ['autocommit', 'lindorm_user', 'lindorm_password'] - for parameter in optional_parameters: - if parameter not in connection_data: - connection_data[parameter] = None - - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - if self.is_connected is True: - return self.connection - - lindorm_connection_data = {'lindorm_user': self.connection_data['lindorm_user'], 'lindorm_password': self.connection_data['lindorm_password']} - - self.connection = phoenixdb.connect( - url=self.connection_data['url'], - autocommit=self.connection_data['autocommit'], - **lindorm_connection_data - ) - self.is_connected = True - - return self.connection - - def disconnect(self): - """ Close any existing connections - - Should switch self.is_connected. - """ - - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return self.is_connected - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to the Phoenix Query Server, {e}!') - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - cursor = connection.cursor() - - try: - cursor.execute(query) - result = cursor.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, - columns=[x[0] for x in cursor.description] - ) - ) - else: - connection.commit() - response = Response(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f'Error running query: {query} on the Lindorm Query Server!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - cursor.close() - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - - renderer = SqlalchemyRender(PhoenixDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - - query = """ - SELECT DISTINCT TABLE_NAME, TABLE_SCHEM FROM SYSTEM.CATALOG - """ - result = self.native_query(query) - df = result.data_frame - df = df[df['TABLE_SCHEM'] != 'SYSTEM'] - df = df.drop('TABLE_SCHEM', axis=1) - result.data_frame = df.rename(columns={df.columns[0]: 'table_name'}) - return result - - def get_columns(self, table_name: str) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - cursor = connection.cursor() - - try: - query = f"SELECT * from {table_name} LIMIT 5" - cursor.execute(query) - cursor.fetchall() - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - [(x[0], x[1]) for x in cursor.description], - columns=['column_name', 'data_type'] - ) - ) - - except Exception as e: - logger.error(f'Error running query: {query} on the Phoenix Query Server!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - cursor.close() - if need_to_close is True: - self.disconnect() - - return response diff --git a/mindsdb/integrations/handlers/lindorm_handler/requirements.txt b/mindsdb/integrations/handlers/lindorm_handler/requirements.txt deleted file mode 100644 index 526500be75b..00000000000 --- a/mindsdb/integrations/handlers/lindorm_handler/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -pyphoenix -phoenixdb -protobuf==4.25.8 \ No newline at end of file diff --git a/mindsdb/integrations/handlers/lindorm_handler/tests/__init__.py b/mindsdb/integrations/handlers/lindorm_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/lindorm_handler/tests/test_lindorm_handler.py b/mindsdb/integrations/handlers/lindorm_handler/tests/test_lindorm_handler.py deleted file mode 100644 index ee0884970e7..00000000000 --- a/mindsdb/integrations/handlers/lindorm_handler/tests/test_lindorm_handler.py +++ /dev/null @@ -1,33 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.lindorm_handler.lindorm_handler import LindormHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class LindormHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "url": 'http://127.0.0.1:8765', - "autocommit": True - } - cls.handler = LindormHandler('test_phoenix_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM USERS" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_get_columns(self): - columns = self.handler.get_columns('USERS') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/luma_handler/README.md b/mindsdb/integrations/handlers/luma_handler/README.md deleted file mode 100644 index 8e6f766f876..00000000000 --- a/mindsdb/integrations/handlers/luma_handler/README.md +++ /dev/null @@ -1,79 +0,0 @@ -# Luma Handler - -Luma handler for MindsDB provides interfaces to connect to LUMA Events via APIs and pull event data into MindsDB. - ---- - -## Table of Contents - -- [Luma Handler](#luma-handler) - - [Table of Contents](#table-of-contents) - - [About LUMA](#about-luma) - - [LUMA Handler Implementation](#luma-handler-implementation) - - [LUMA Handler Initialization](#luma-handler-initialization) - - [Implemented Features](#implemented-features) - - [TODO Features](#todo-features) - - [Example Usage](#example-usage) - ---- - -## About LUMA - -From beautiful event pages to effortless invites and ticketing, Luma is all you need to host a memorable event. - -## LUMA Handler Implementation - -This handler was implemented using the `requests` library that makes http calls to https://docs.lu.ma/reference/getting-started-with-your-api - -## LUMA Handler Initialization - -The Luma handler is initialized with the following parameters: - -- `api_key`: API Key - -Read about creating an API key [here](https://docs.lu.ma/reference/getting-started-with-your-api). - -## Implemented Features - -- [x] LUMA List Events -- [x] LUMA Get an event - -## TODO Features - -- [ ] Get Event Guest List - This needs actual guests to be enrolled -- [ ] Update Event Guest Status - This needs actual guests to be enrolled -- [ ] LUMA Create an event - Need to implement it in handler and tables - -## Example Usage - -The first step is to create a database with the new `luma` engine. - -~~~~sql -CREATE DATABASE mindsdb_luma -WITH ENGINE = 'luma', -PARAMETERS = { - "api_key": "api_key" -}; -~~~~ - -Use the established connection to query your database: - -~~~~sql -SELECT * FROM mindsdb_luma.events; -~~~~ - -~~~~sql -SELECT * FROM mindsdb_luma.events where event_id="evt-HQ36IFDwncocuGy"; -~~~~ - -Create an Event - -~~~~sql -INSERT INTO mindsdb_luma.events (name, start_at, timezone, end_at, require_rsvp_approval, geo_address_json_type, geo_address_json_place_id, geo_address_json_description, geo_latitude, geo_longitude, meeting_url) -VALUES -('New York Party', '2023-12-31T23:59:00Z', 'America/New_York', '2024-01-01T00:01:00Z', False, 'google', 'ChIJmQJIxlVYwokRLgeuocVOGVU', 'Landmark - twin towers', '40.756072', '-73.986834', ''); -~~~~ - -For creating an event, please refer https://docs.lu.ma/reference/create-event-1. - -Fields `name`, `start_at` and `timezone` are mandatory in the insert query. \ No newline at end of file diff --git a/mindsdb/integrations/handlers/luma_handler/__about__.py b/mindsdb/integrations/handlers/luma_handler/__about__.py deleted file mode 100644 index ecb7f4e11ba..00000000000 --- a/mindsdb/integrations/handlers/luma_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Luma handler" -__package_name__ = "mindsdb_luma_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Luma" -__author__ = "Abhilash K R" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/luma_handler/__init__.py b/mindsdb/integrations/handlers/luma_handler/__init__.py deleted file mode 100644 index 90ccd10f694..00000000000 --- a/mindsdb/integrations/handlers/luma_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .luma_handler import LumaHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Luma" -name = "luma" -type = HANDLER_TYPE.DATA -icon_path = "icon.png" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", - "connection_args_example", - "connection_args", -] diff --git a/mindsdb/integrations/handlers/luma_handler/connection_args.py b/mindsdb/integrations/handlers/luma_handler/connection_args.py deleted file mode 100644 index 06fb757e787..00000000000 --- a/mindsdb/integrations/handlers/luma_handler/connection_args.py +++ /dev/null @@ -1,18 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - api_key={ - "type": ARG_TYPE.STR, - "description": "Luma API Key", - "required": True, - "label": "api_key", - "secret": True - } -) - -connection_args_example = OrderedDict( - api_key="api_key" -) diff --git a/mindsdb/integrations/handlers/luma_handler/icon.png b/mindsdb/integrations/handlers/luma_handler/icon.png deleted file mode 100644 index 328acad5d09..00000000000 Binary files a/mindsdb/integrations/handlers/luma_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/luma_handler/luma.py b/mindsdb/integrations/handlers/luma_handler/luma.py deleted file mode 100644 index d8ea5fd71c8..00000000000 --- a/mindsdb/integrations/handlers/luma_handler/luma.py +++ /dev/null @@ -1,47 +0,0 @@ -import json -import requests - - -class LumaClient: - def __init__(self, api_key): - self.auth_token = api_key - self.luma_base_endpoint = "https://api.lu.ma/" - self.validate_api_key() - - def make_request(self, url, method='GET', payload=None): - if method not in ['GET', 'POST']: - raise ValueError('Invalid HTTP request method') - headers = {"accept": "application/json", "content-type": "application/json"} - if self.auth_token: - headers['x-luma-api-key'] = self.auth_token - request_method = getattr(requests, method.lower()) - resp = request_method(url, json=payload, headers=headers) - return {"content": resp.json(), "code": resp.status_code} - - def validate_api_key(self): - url = f'{self.luma_base_endpoint}public/v1/user/get-self' - content = self.make_request(url) - if content['code'] != 200: - raise Exception("User Authentication failed - " + json.dumps(content["content"])) - return content - - def create_event(self, data): - url = f'{self.luma_base_endpoint}public/v1/event/create' - content = self.make_request(url, method="POST", payload=data) - if content['code'] != 200: - raise Exception("Create failed - " + json.dumps(content["content"])) - return content - - def get_event(self, event_api_id): - url = f'{self.luma_base_endpoint}public/v1/event/get?api_id={event_api_id}' - content = self.make_request(url) - if content['code'] != 200: - raise Exception("Get event failed - " + json.dumps(content["content"])) - return content - - def list_events(self): - url = f'{self.luma_base_endpoint}public/v1/calendar/list-events?series_mode=sessions' - content = self.make_request(url) - if content['code'] != 200: - raise Exception("Get event failed - " + json.dumps(content["content"])) - return content diff --git a/mindsdb/integrations/handlers/luma_handler/luma_handler.py b/mindsdb/integrations/handlers/luma_handler/luma_handler.py deleted file mode 100644 index dbc8b097c51..00000000000 --- a/mindsdb/integrations/handlers/luma_handler/luma_handler.py +++ /dev/null @@ -1,77 +0,0 @@ -from mindsdb_sql_parser import parse_sql - -from mindsdb.integrations.handlers.luma_handler.luma_tables import ( - LumaEventsTable -) -from mindsdb.integrations.handlers.luma_handler.luma import LumaClient -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) - - -class LumaHandler(APIHandler): - """The Luma handler implementation""" - - def __init__(self, name: str, **kwargs): - """Initialize the Luma handler. - - Parameters - ---------- - name : str - name of a handler instance - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.kwargs = kwargs - self.luma_client = None - self.is_connected = False - - events_data = LumaEventsTable(self) - self._register_table("events", events_data) - - def connect(self) -> StatusResponse: - """Set up the connection required by the handler. - - Returns - ------- - StatusResponse - connection object - """ - resp = StatusResponse(False) - try: - self.luma_client = LumaClient(self.connection_data.get("api_key")) - resp.success = True - self.is_connected = True - except Exception as ex: - resp.success = False - resp.error_message = ex - return resp - - def check_connection(self) -> StatusResponse: - """Check connection to the handler. - - Returns - ------- - StatusResponse - Status confirmation - """ - return self.connect() - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - - Parameters - ---------- - query : str - query in a native format - - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/luma_handler/luma_tables.py b/mindsdb/integrations/handlers/luma_handler/luma_tables.py deleted file mode 100644 index 4a91423e7ce..00000000000 --- a/mindsdb/integrations/handlers/luma_handler/luma_tables.py +++ /dev/null @@ -1,164 +0,0 @@ -import pandas as pd -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor, INSERTQueryParser -from mindsdb_sql_parser import ast - - -class LumaEventsTable(APITable): - """The Luma Get Event Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://docs.lu.ma/reference/get-event-1 and https://docs.lu.ma/reference/calendar-list-events" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Luma events - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'events', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - filter_flag = False - for op, arg1, arg2 in where_conditions: - if arg1 == 'event_id': - if op == '=': - search_params["event_id"] = arg2 - filter_flag = True - else: - raise NotImplementedError("Only '=' operator is supported for event_id column.") - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - df = pd.DataFrame(columns=self.get_columns()) - - if filter_flag: - response = self.handler.luma_client.get_event(search_params["event_id"]) - event = response["content"]["event"] - df = pd.json_normalize(event) - else: - response = self.handler.luma_client.list_events() - content = response["content"]["entries"] - events_only = [event["event"] for event in content] - df = pd.json_normalize(events_only) - - select_statement_executor = SELECTQueryExecutor( - df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def get_columns(self) -> list: - return ["api_id", - "cover_url", - "name", - "description", - "series_api_id", - "start_at", - "duration_interval", - "end_at", - "geo_latitude", - "geo_longitude", - "url", - "timezone", - "event_type", - "user_api_id", - "visibility", - "geo_address_json.city", - "geo_address_json.type", - "geo_address_json.region", - "geo_address_json.address", - "geo_address_json.country", - "geo_address_json.latitude", - "geo_address_json.place_id", - "geo_address_json.longitude", - "geo_address_json.city_state", - "geo_address_json.description", - "geo_address_json.full_address"] - - def _parse_event_insert_data(self, event): - data = {} - data["name"] = event["name"] - data["start_at"] = event["start_at"] - data["timezone"] = event["timezone"] - - if "end_at" in event: - data["end_at"] = event["end_at"] - - if "require_rsvp_approval" in event: - data["require_rsvp_approval"] = event["require_rsvp_approval"] - - if "geo_latitude" in event: - data["geo_latitude"] = event["geo_latitude"] - - if "geo_longitude" in event: - data["geo_longitude"] = event["geo_longitude"] - - if "meeting_url" in event: - data["meeting_url"] = event["meeting_url"] - - data["geo_address_json"] = {} - - if "geo_address_json_type" in event: - data["geo_address_json"]["type"] = event["geo_address_json_type"] - - if "geo_address_json_place_id" in event: - data["geo_address_json"]["place_id"] = event["geo_address_json_place_id"] - - if "geo_address_json_description" in event: - data["geo_address_json"]["description"] = event["geo_address_json_description"] - - return data - - def insert(self, query: ast.ASTNode) -> None: - """Inserts data into the API endpoint. - - Parameters - ---------- - query : ast.Insert - Given SQL INSERT query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - insert_statement_parser = INSERTQueryParser( - query, - supported_columns=["name", "start_at", "timezone", "end_at", "require_rsvp_approval", "geo_address_json_type", "geo_address_json_place_id", "geo_address_json_description", "geo_latitude", "geo_longitude", "meeting_url"], - mandatory_columns=["name", "start_at", "timezone"], - all_mandatory=False - ) - - event_data = insert_statement_parser.parse_query() - - for event in event_data: - parsed_event_data = self._parse_event_insert_data(event) - self.handler.luma_client.create_event(parsed_event_data) diff --git a/mindsdb/integrations/handlers/luma_handler/requirements.txt b/mindsdb/integrations/handlers/luma_handler/requirements.txt deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/unused/unit/handler_tests/test_mariadb_handler.py b/mindsdb/integrations/handlers/mariadb_handler/tests/test_mariadb_handler.py similarity index 88% rename from tests/unused/unit/handler_tests/test_mariadb_handler.py rename to mindsdb/integrations/handlers/mariadb_handler/tests/test_mariadb_handler.py index fd3db056f7e..7aac98c1dc8 100644 --- a/tests/unused/unit/handler_tests/test_mariadb_handler.py +++ b/mindsdb/integrations/handlers/mariadb_handler/tests/test_mariadb_handler.py @@ -17,7 +17,7 @@ "user": "root", "password": "supersecret", "database": "test", - "ssl": False + "ssl": False, } } @@ -38,7 +38,7 @@ def get_certificates(container): cur_dir = os.path.dirname(os.path.abspath(__file__)) archive_path = os.path.join(cur_dir, CERTS_ARCHIVE) with open(archive_path, "wb") as f: - bits, _ = container.get_archive('/var/lib/mysql') + bits, _ = container.get_archive("/var/lib/mysql") for chunk in bits: f.write(chunk) @@ -85,7 +85,7 @@ def handler(request): if with_ssl: get_certificates(container) - handler = MariaDBHandler('test_mariadb_handler', **HANDLER_KWARGS) + handler = MariaDBHandler("test_mariadb_handler", **HANDLER_KWARGS) yield handler # normal teardown @@ -113,7 +113,7 @@ def test_native_query_show_dbs(self, handler): dbs = handler.native_query("SHOW DATABASES;") dbs = dbs.data_frame assert dbs is not None, "expected to get some data, but got None" - assert 'Database' in dbs, f"expected to get 'Database' column in response:\n{dbs}" + assert "Database" in dbs, f"expected to get 'Database' column in response:\n{dbs}" dbs = list(dbs["Database"]) expected_db = HANDLER_KWARGS["connection_data"]["database"] assert expected_db in dbs, f"expected to have {expected_db} db in response: {dbs}" @@ -127,10 +127,19 @@ def test_describe_table(self, handler): describe_data = described.data_frame self.check_valid_response(described) got_columns = list(describe_data.iloc[:, 0]) - want_columns = ["number_of_rooms", "number_of_bathrooms", - "sqft", "location", "days_on_market", - "initial_price", "neighborhood", "rental_price"] - assert got_columns == want_columns, f"expected to have next columns in rentals table:\n{want_columns}\nbut got:\n{got_columns}" + want_columns = [ + "number_of_rooms", + "number_of_bathrooms", + "sqft", + "location", + "days_on_market", + "initial_price", + "neighborhood", + "rental_price", + ] + assert got_columns == want_columns, ( + f"expected to have next columns in rentals table:\n{want_columns}\nbut got:\n{got_columns}" + ) def test_create_table(self, handler): new_table = "test_mdb" @@ -165,5 +174,5 @@ def get_table_names(self, handler): res = handler.get_tables() tables = res.data_frame assert tables is not None, "expected to have some tables in the db, but got None" - assert 'table_name' in tables, f"expected to get 'table_name' column in the response:\n{tables}" - return list(tables['table_name']) + assert "table_name" in tables, f"expected to get 'table_name' column in the response:\n{tables}" + return list(tables["table_name"]) diff --git a/mindsdb/integrations/handlers/materialize_handler/README.md b/mindsdb/integrations/handlers/materialize_handler/README.md deleted file mode 100644 index 233700a79b2..00000000000 --- a/mindsdb/integrations/handlers/materialize_handler/README.md +++ /dev/null @@ -1,41 +0,0 @@ -# Materialize Handler - -This is the implementation of the Materialize handler for MindsDB. - -## Materialize -Materialize is a new storage engine for PostgreSQL, bringing a modern approach to database capacity, capabilities and performance to the world's most-loved database platform. - -Materialize consists of an extension, building on the innovative table access method framework and other standard Postgres extension interfaces. By extending and enhancing the current table access methods, Materialize opens the door to a future of more powerful storage models that are optimized for cloud and modern hardware architectures. -## Implementation - -This handler was implemented by extending Postgres connector. - -The required arguments to establish a connection are: - -* `host`: the host name of the Materialize connection -* `port`: the port to use when connecting -* `user`: the user to authenticate -* `password`: the password to authenticate the user -* `database`: database name - -## Usage - -In order to make use of this handler and connect to a Materialize server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE materialize_datasource -WITH ENGINE = "materialize", -PARAMETERS = { - "user": "USERNAME", - "password": "|I<34|", - "host": "hostname", - "port": 6875, - "database": "materialize" -} -``` - -Now, you can use this established connection to query your database as follows: - -```sql -SELECT * FROM materialize_datasource.loveU LIMIT 10; -``` diff --git a/mindsdb/integrations/handlers/materialize_handler/__about__.py b/mindsdb/integrations/handlers/materialize_handler/__about__.py deleted file mode 100644 index 6cac9d214b1..00000000000 --- a/mindsdb/integrations/handlers/materialize_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Materialize handler' -__package_name__ = 'mindsdb_materialize_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Materialize" -__author__ = 'Parthiv Makwana' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/materialize_handler/__init__.py b/mindsdb/integrations/handlers/materialize_handler/__init__.py deleted file mode 100644 index cabbb9ea647..00000000000 --- a/mindsdb/integrations/handlers/materialize_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -try: - from .materialize_handler import MaterializeHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = 'Materialize' -name = 'materialize' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/materialize_handler/icon.svg b/mindsdb/integrations/handlers/materialize_handler/icon.svg deleted file mode 100644 index 953b12af312..00000000000 --- a/mindsdb/integrations/handlers/materialize_handler/icon.svg +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/materialize_handler/materialize_handler.py b/mindsdb/integrations/handlers/materialize_handler/materialize_handler.py deleted file mode 100644 index 9053a2e6d53..00000000000 --- a/mindsdb/integrations/handlers/materialize_handler/materialize_handler.py +++ /dev/null @@ -1,43 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.handlers.postgres_handler import Handler as PostgresHandler -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -class MaterializeHandler(PostgresHandler): - """ - This handler handles connection and execution of the Materialize statements. - """ - - name = 'materialize' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Materialize server/database.', - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Materialize server.', - }, - password={ - 'type': ARG_TYPE.STR, - 'description': 'The password to authenticate the user with the Materialize server.', - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'Specify port to connect Materialize server', - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'Specify database name to connect Materialize server', - }, -) - -connection_args_example = OrderedDict( - host='127.0.0.1', port=6875, password='', user='USER', database='materialize' -) diff --git a/mindsdb/integrations/handlers/materialize_handler/tests/__init__.py b/mindsdb/integrations/handlers/materialize_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/materialize_handler/tests/test_materialize_handler.py b/mindsdb/integrations/handlers/materialize_handler/tests/test_materialize_handler.py deleted file mode 100644 index a91233c4ba9..00000000000 --- a/mindsdb/integrations/handlers/materialize_handler/tests/test_materialize_handler.py +++ /dev/null @@ -1,54 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.materialize_handler.materialize_handler import MaterializeHandler -from mindsdb.integrations.libs.response import RESPONSE_TYPE - - -class materializeHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "localhost", - "port": 6875, - "user": "postgres", - "password": "", - "database": "materialize", - } - } - cls.handler = MaterializeHandler('test_materialize_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_connect(self): - assert self.handler.connect() - - def test_2_create_table(self): - query = "CREATE Table IF NOT EXISTS Lover(name varchar(101));" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_3_insert(self): - query = "INSERT INTO LOVER VALUES('Shiv Shakti');" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_4_native_query_select(self): - query = "SELECT * FROM LOVER;" - result = self.handler.query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is RESPONSE_TYPE.TABLE - - def test_6_get_columns(self): - columns = self.handler.get_columns('LOVER') - - query = "DROP Table IF EXISTS Lover;" - self.handler.query(query) - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/matrixone_handler/README.md b/mindsdb/integrations/handlers/matrixone_handler/README.md deleted file mode 100644 index e6433d0139c..00000000000 --- a/mindsdb/integrations/handlers/matrixone_handler/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# Matrixone Handler - -This is the implementation of the Matrixone handler for MindsDB. - -## Matrixone -MatrixOne is a future-oriented hyper-converged cloud and edge native DBMS that supports transactional, analytical, and streaming workloads with a simplified and distributed database engine, across multiple data centers, clouds, edges and other heterogeneous infrastructures. - -For more Info Click [HERE](https://github.com/matrixorigin/matrixone) - -## Implementation -This handler was implemented using the `PyMySQL`, a Python library that allows you to use Python code to run SQL commands on Matrixone Database. - -The required arguments to establish a connection are, -* `user`: username associated with database -* `password`: password to authenticate your access -* `host`: host to server IP Address or hostname -* `port`: port through which TCPIP connection is to be made -* `database`: Database name to be connected -* `ssl`: If you want to enable SSL Security **(Boolean)** -* `ssl_ca`: Path or URL of the Certificate Authority (CA) certificate file -* `ssl_cert`: Path name or URL of the server public key certificate file -* `ssl_key`: The path name or URL of the server private key file - - -## Usage -In order to make use of this handler and connect to Matrixone in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE matrixone_datasource -WITH -engine='matrixone', -parameters={ - "user":"dump", - "password":"111", - "host":"127.0.0.1", - "port":6001, - "database":"mo_catalog" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM Matrixone_datasource.demo; -~~~~ diff --git a/mindsdb/integrations/handlers/matrixone_handler/__about__.py b/mindsdb/integrations/handlers/matrixone_handler/__about__.py deleted file mode 100644 index 7b06574e0e5..00000000000 --- a/mindsdb/integrations/handlers/matrixone_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB MatrixOne handler' -__package_name__ = 'mindsdb_matrixone_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Matrixone" -__author__ = 'Parthiv Makwana' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/matrixone_handler/__init__.py b/mindsdb/integrations/handlers/matrixone_handler/__init__.py deleted file mode 100644 index 83e7b4ba466..00000000000 --- a/mindsdb/integrations/handlers/matrixone_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .matrixone_handler import MatrixOneHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'MatrixOne' -name = 'matrixone' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/matrixone_handler/connection_args.py b/mindsdb/integrations/handlers/matrixone_handler/connection_args.py deleted file mode 100644 index a89408c71e5..00000000000 --- a/mindsdb/integrations/handlers/matrixone_handler/connection_args.py +++ /dev/null @@ -1,52 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the MatrixOne server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the MatrixOne server.', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the MatrixOne server.' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the MatrixOne server. ' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the MatrixOne server. Must be an integer.' - }, - ssl={ - 'type': ARG_TYPE.BOOL, - 'description': 'Set it to False to disable ssl.' - }, - ssl_ca={ - 'type': ARG_TYPE.PATH, - 'description': 'Path or URL of the Certificate Authority (CA) certificate file' - }, - ssl_cert={ - 'type': ARG_TYPE.PATH, - 'description': 'Path name or URL of the server public key certificate file' - }, - ssl_key={ - 'type': ARG_TYPE.PATH, - 'description': 'The path name or URL of the server private key file' - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=6001, - user='dump', - password='111', - database='mo_catalog' -) diff --git a/mindsdb/integrations/handlers/matrixone_handler/icon.svg b/mindsdb/integrations/handlers/matrixone_handler/icon.svg deleted file mode 100644 index 43e3705d212..00000000000 --- a/mindsdb/integrations/handlers/matrixone_handler/icon.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/matrixone_handler/matrixone_handler.py b/mindsdb/integrations/handlers/matrixone_handler/matrixone_handler.py deleted file mode 100644 index 9c8ff28469c..00000000000 --- a/mindsdb/integrations/handlers/matrixone_handler/matrixone_handler.py +++ /dev/null @@ -1,168 +0,0 @@ -from typing import Optional - -import pandas as pd -import pymysql as matone -from pymysql.cursors import DictCursor as dict -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - - -logger = log.getLogger(__name__) - - -class MatrixOneHandler(DatabaseHandler): - """ - This handler handles connection and execution of the MatrixOne statements. - """ - - name = 'matrixone' - - def __init__(self, name, connection_data: Optional[dict], **kwargs): - super().__init__(name) - self.mysql_url = None - self.parser = parse_sql - self.dialect = 'mysql' - self.connection_data = connection_data - self.database = self.connection_data.get('database') - - self.connection = None - self.is_connected = False - - def connect(self): - if self.is_connected is True: - return self.connection - - config = { - 'host': self.connection_data.get('host'), - 'port': self.connection_data.get('port'), - 'user': self.connection_data.get('user'), - 'password': self.connection_data.get('password'), - 'database': self.connection_data.get('database') - } - - ssl = self.connection_data.get('ssl') - if ssl is True: - ssl_ca = self.connection_data.get('ssl_ca') - ssl_cert = self.connection_data.get('ssl_cert') - ssl_key = self.connection_data.get('ssl_key') - config['client_flags'] = [matone.constants.ClientFlag.SSL] - if ssl_ca is not None: - config["ssl_ca"] = ssl_ca - if ssl_cert is not None: - config["ssl_cert"] = ssl_cert - if ssl_key is not None: - config["ssl_key"] = ssl_key - - connection = matone.connect(**config) - self.is_connected = True - self.connection = connection - return self.connection - - def disconnect(self): - if self.is_connected is False: - return - self.connection.close() - self.is_connected = False - return - - def check_connection(self) -> StatusResponse: - """ - Check the connection of the MatrixOne database - :return: success status and error message if error occurs - """ - - result = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - connection = self.connect() - result.success = connection.open - except Exception as e: - logger.error(f'Error connecting to MatrixOne {self.connection_data["database"]}, {e}!') - result.error_message = str(e) - - if result.success is True and need_to_close: - self.disconnect() - if result.success is False and self.is_connected is True: - self.is_connected = False - - return result - - def native_query(self, query: str) -> Response: - """ - Receive SQL query and runs it - :param query: The SQL query to run in MatrixOne - :return: returns the records from the current recordset - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - with connection.cursor(cursor=dict) as cur: - try: - cur.execute(query) - if cur._rows: - result = cur.fetchall() - response = Response( - RESPONSE_TYPE.TABLE, - pd.DataFrame( - result, - # columns=[x[0] for x in cur.description] - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except Exception as e: - logger.error(f'Error running query: {query} on {self.connection_data["database"]}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - connection.rollback() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Retrieve the data from the SQL statement. - """ - renderer = SqlalchemyRender('mysql') - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Get a list with all of the tabels in MatrixOne - """ - q = "SHOW TABLES;" - result = self.native_query(q) - df = result.data_frame - result.data_frame = df.rename(columns={df.columns[0]: 'table_name'}) - return result - - def get_columns(self, table_name) -> Response: - """ - Show details about the table - """ - q = f"SHOW COLUMNS FROM {table_name};" - result = self.native_query(q) - df = result.data_frame - result.data_frame = df.rename(columns={ - df.columns[0]: 'COLUMN_NAME', - df.columns[1]: 'DATA TYPE' - }) - - return result diff --git a/mindsdb/integrations/handlers/matrixone_handler/requirements.txt b/mindsdb/integrations/handlers/matrixone_handler/requirements.txt deleted file mode 100644 index d4a7eda50c1..00000000000 --- a/mindsdb/integrations/handlers/matrixone_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pymysql diff --git a/mindsdb/integrations/handlers/matrixone_handler/tests/__init__.py b/mindsdb/integrations/handlers/matrixone_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/matrixone_handler/tests/test_matrixone_handler.py b/mindsdb/integrations/handlers/matrixone_handler/tests/test_matrixone_handler.py deleted file mode 100644 index 9baa18158b8..00000000000 --- a/mindsdb/integrations/handlers/matrixone_handler/tests/test_matrixone_handler.py +++ /dev/null @@ -1,55 +0,0 @@ -import unittest - -from mindsdb.integrations.handlers.matrixone_handler.matrixone_handler import MatrixOneHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class MatrixOneHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "localhost", - "port": 6001, - "user": "dump", - "password": "111", - "database": "mo_catalog", - "ssl": False - } - } - cls.handler = MatrixOneHandler('test_mysql_handler', cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_drop_table(self): - res = self.handler.query("DROP TABLE IF EXISTS PREM;") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_2_create_table(self): - res = self.handler.query("CREATE TABLE IF NOT EXISTS PREM (Premi varchar(50));") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_3_insert_table(self): - res = self.handler.query("INSERT INTO PREM VALUES('Radha <3 Krishna');") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_4_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_5_select_query(self): - query = "SELECT * FROM PREM;" - result = self.handler.native_query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_6_get_columns(self): - result = self.handler.get_columns('PREM') - assert result.type is not RESPONSE_TYPE.ERROR - - def test_7_check_connection(self): - self.handler.check_connection() - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/maxdb_handler/README.md b/mindsdb/integrations/handlers/maxdb_handler/README.md deleted file mode 100644 index fb5d115116e..00000000000 --- a/mindsdb/integrations/handlers/maxdb_handler/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# Sap MaxDB Handler - -This is the implementation of the Sap MaxDB handler for MindsDB. - -## Sap MaxDB (What is it?) -SAP MaxDB is a high-performance, scalable, and reliable relational database management system (RDBMS) that supports a wide range of applications. It is designed to handle large amounts of data with minimal downtime and maximum availability. MaxDB provides advanced features such as backup and recovery, high availability, and online data compression, making it a popular choice for enterprise applications. - - -## Implementation -This handler was implemented using the [JDBC driver](https://dbschema.com/jdbc-drivers/SAPMaxDbJdbcDriver.zip) provided by SAP MaxDB. To establish connection with the database, [JayDeBeApi](https://pypi.org/project/JayDeBeApi/) library is used. The JayDeBeApi module allows you to connect from Python code to databases using Java JDBC. It provides a Python DB-API v2.0 to that database. - -To establish a connection with SAP MaxDB, the following arguments are required: -* `host`: IP address of the computer where the database server is running. -* `port`: The number used by the operating system to identify a specific process or service on the server. -* `user`: Username used to authenticate and authorize access to a specific database. -* `password`: Secret authentication credential that is associated with a specific user account. -* `database`: Database name to be connected. -* `jdbc_location`: The location of the jar file which contains the JDBC driver - -## Usage -In order to make use of this handler and connect to MaxDB in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE maxdb_datasource -WITH engine='maxdb', -parameters={ - "host": "localhost", - "port": "7210", - "user": "username", - "password": "password", - "database": "DatabaseName" - "jdbc_location": "/path/to/jdbc/sapdbc.jar" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM maxdb_datasource.TEST_TABLE; -~~~~ \ No newline at end of file diff --git a/mindsdb/integrations/handlers/maxdb_handler/__about__.py b/mindsdb/integrations/handlers/maxdb_handler/__about__.py deleted file mode 100644 index b6abee532a2..00000000000 --- a/mindsdb/integrations/handlers/maxdb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB SAP MaxDB handler' -__package_name__ = 'mindsdb_maxdb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for SAP MAXDB" -__author__ = 'Marsid Mali & Sergi Poula' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/maxdb_handler/__init__.py b/mindsdb/integrations/handlers/maxdb_handler/__init__.py deleted file mode 100644 index d11a9842e77..00000000000 --- a/mindsdb/integrations/handlers/maxdb_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .connection_args import connection_args, connection_args_example -try: - from .maxdb_handler import MaxDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - -title = "SAP MaxDB" -name = "maxdb" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/maxdb_handler/connection_args.py b/mindsdb/integrations/handlers/maxdb_handler/connection_args.py deleted file mode 100644 index 027e98b32ff..00000000000 --- a/mindsdb/integrations/handlers/maxdb_handler/connection_args.py +++ /dev/null @@ -1,42 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the SAP MaxDB server.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the SAP MaxDB. Must be an integer.' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the SAP MaxDB server.' - }, - jdbc_location={ - 'type': ARG_TYPE.STR, - 'description': 'The location of the jar file which contains the JDBC class.' - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the SAP MaxDB server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the SAP MaxDB server.', - 'secret': True - } -) - - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=7210, - user='DBADMIN', - password='password', - database="MAXDB", - jdbc_location='/Users/marsid/Desktop/sapdbc.jar', -) diff --git a/mindsdb/integrations/handlers/maxdb_handler/icon.svg b/mindsdb/integrations/handlers/maxdb_handler/icon.svg deleted file mode 100644 index d97dbf954d5..00000000000 --- a/mindsdb/integrations/handlers/maxdb_handler/icon.svg +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/maxdb_handler/maxdb_handler.py b/mindsdb/integrations/handlers/maxdb_handler/maxdb_handler.py deleted file mode 100644 index bdc7499abbf..00000000000 --- a/mindsdb/integrations/handlers/maxdb_handler/maxdb_handler.py +++ /dev/null @@ -1,180 +0,0 @@ -from typing import Optional - -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.integrations.libs.base import DatabaseHandler - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, - HandlerResponse, -) -import pandas as pd -import jaydebeapi as jd - -logger = log.getLogger(__name__) - - -class MaxDBHandler(DatabaseHandler): - """ - This handler handles connection and execution of the SAP MaxDB statements. - """ - - name = "maxdb" - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """Initialize the handler - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.kwargs = kwargs - self.parser = parse_sql - self.connection_config = connection_data - self.database = connection_data["database"] - self.host = connection_data["host"] - self.port = connection_data["port"] - self.user = connection_data["user"] - self.password = connection_data["password"] - self.jdbc_location = connection_data["jdbc_location"] - self.connection = None - self.is_connected = False - - def __del__(self): - """ - Destructor for the SAP MaxDB class. - """ - if self.is_connected is True: - self.disconnect() - - def connect(self) -> StatusResponse: - """ - Establishes a connection to the SAP MaxDB server. - Returns: - HandlerStatusResponse - """ - if self.is_connected: - return self.connection - - jdbc_url = f"jdbc:sapdb://{self.host}:{self.port}/{self.database}" - jdbc_class = "com.sap.dbtech.jdbc.DriverSapDB" - - self.connection = jd.connect(jdbc_class, jdbc_url, [self.user, self.password], self.jdbc_location) - self.is_connected = True - return self.connection - - def disconnect(self): - """Close any existing connections - Should switch self.is_connected. - """ - if self.is_connected is False: - return - try: - self.connection.close() - self.is_connected = False - except Exception as e: - logger.error(f"Error while disconnecting to {self.database}, {e}") - - return - - def check_connection(self) -> StatusResponse: - """Check connection to the handler - Returns: - HandlerStatusResponse - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f"Error connecting to database {self.database}, {e}!") - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> HandlerResponse: - """Receive raw query and act upon it somehow. - Args: - query (Any): query in native format (str for sql databases, - etc) - Returns: - HandlerResponse - """ - need_to_close = self.is_connected is False - conn = self.connect() - with conn.cursor() as cur: - try: - cur.execute(query) - if cur.description: - result = cur.fetchall() - response = Response( - RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(result, columns=[x[0] for x in cur.description]) - ) - else: - response = Response(RESPONSE_TYPE.OK) - self.connection.commit() - except Exception as e: - logger.error(f"Error running query: {query} on {self.database}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - self.connection.rollback() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INSERT, DELETE, etc - Returns: - HandlerResponse - """ - renderer = SqlalchemyRender("postgres") - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Gets a list of table names in the database. - - Returns: - list: A list of table names in the database. - """ - - query = f"SELECT TABLENAME FROM DOMAIN.TABLES WHERE TYPE = 'TABLE' AND SCHEMANAME = '{self.user}'" - result = self.native_query(query) - df = result.data_frame - result.data_frame = df.rename(columns={df.columns[0]: "table_name"}) - return result - - def get_columns(self, table_name: str) -> Response: - """ - Gets a list of column names in the specified table. - - Args: - table_name (str): The name of the table to get column names from. - - Returns: - list: A list of column names in the specified table. - """ - - query = f"SELECT COLUMNNAME,DATATYPE FROM DOMAIN.COLUMNS WHERE TABLENAME ='{table_name}'" - result = self.native_query(query) - df = result.data_frame - result.data_frame = df.rename(columns={"name": "column_name", "type": "data_type"}) - return self.native_query(query) diff --git a/mindsdb/integrations/handlers/maxdb_handler/requirements.txt b/mindsdb/integrations/handlers/maxdb_handler/requirements.txt deleted file mode 100644 index 78d1c7fe94b..00000000000 --- a/mindsdb/integrations/handlers/maxdb_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -jaydebeapi diff --git a/mindsdb/integrations/handlers/maxdb_handler/tests/__init__.py b/mindsdb/integrations/handlers/maxdb_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/maxdb_handler/tests/test_maxdb_handler.py b/mindsdb/integrations/handlers/maxdb_handler/tests/test_maxdb_handler.py deleted file mode 100644 index 2eeefd4d134..00000000000 --- a/mindsdb/integrations/handlers/maxdb_handler/tests/test_maxdb_handler.py +++ /dev/null @@ -1,55 +0,0 @@ -import unittest - -from mindsdb.integrations.handlers.maxdb_handler.maxdb_handler import MaxDBHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class SurrealdbHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "127.0.0.1", - "port": "7210", - "user": "MAXDB", - "password": "password", - "database": "MAXDB", - "jdbc_location": "/path/to/jdbc/sapdbc.jar" - } - } - cls.handler = MaxDBHandler('test_maxdb_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_create_table(self): - res = self.handler.native_query("CREATE TABLE TEST_TABLE (id INT PRIMARY KEY,name VARCHAR(50))") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_2_insert(self): - res = self.handler.native_query("INSERT INTO TEST_TABLE (id, name) VALUES (1, 'MARSID')") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_3_select_query(self): - query = "SELECT * FROM TEST_TABLE" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_4_get_columns(self): - columns = self.handler.get_columns('TEST_TABLE') - assert columns.type is not RESPONSE_TYPE.ERROR - - def test_5_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_6_drop_table(self): - res = self.handler.native_query("DROP TABLE TEST_TABLE") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_7_disconnect(self): - assert self.handler.disconnect() is None - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/mediawiki_handler/README.md b/mindsdb/integrations/handlers/mediawiki_handler/README.md deleted file mode 100644 index 5abed87d45d..00000000000 --- a/mindsdb/integrations/handlers/mediawiki_handler/README.md +++ /dev/null @@ -1,77 +0,0 @@ -# MediaWiki Handler - -MediaWiki handler for MindsDB provides interfaces to connect to the MediaWiki API and pull data into MindsDB. - ---- - -## Table of Contents - -- [MediaWiki Handler](#mediawiki-handler) - - [Table of Contents](#table-of-contents) - - [About MediaWiki](#about-githhub) - - [MediaWiki Handler Implementation](#mediawiki-handler-implementation) - - [MediaWiki Handler Initialization](#mediawiki-handler-initialization) - - [Implemented Features](#implemented-features) - - [Limitations](#limitations) - - [TODO](#todo) - - [Example Usage](#example-usage) - ---- - -## About MediaWiki - -MediaWiki is a free server-based wiki software, licensed under the GNU General Public License (GPL). It's designed to serve a website that gets millions of hits per day. -
-https://www.mediawiki.org/wiki/Manual:What_is_MediaWiki%3F - -## MediaWiki Handler Implementation - -This handler was implemented using [MediaWikiAPI](https://github.com/lehinevych/MediaWikiAPI), the Python wrapper for the MediaWiki API. - -## MediaWiki Handler Initialization - -The MediaWiki handler does not require any parameters to be initialized. - -## Implemented Features - -- [x] MediaWiki Pages Table - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - -## Limitations -- Only the page ID, title, original title, content, summary, url and categories are returned for each page. -- WHERE clause only supports filtering by page ID and title. - -Note: If a query is made without a WHERE clause, the handler will return 20 random pages. - -## TODO -- [ ] Support INSERT, UPDATE and DELETE for Pages table -- [ ] Support more columns for Pages table - -## Example Usage - -The first step is to create a database with the new `mediawiki` engine: - -~~~~sql -CREATE DATABASE mediawiki_datasource -WITH ENGINE = 'mediawiki' -~~~~ - -Use the established connection to query your database: - -~~~~sql -SELECT * FROM mediawiki_datasource.pages -~~~~ - -Run more advanced queries: - -~~~~sql -SELECT * -FROM mediawiki_datasource.pages -WHERE title = 'Barack' -ORDER BY pageid -LIMIT 5 -~~~~ \ No newline at end of file diff --git a/mindsdb/integrations/handlers/mediawiki_handler/__about__.py b/mindsdb/integrations/handlers/mediawiki_handler/__about__.py deleted file mode 100644 index 5352a6ac753..00000000000 --- a/mindsdb/integrations/handlers/mediawiki_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB MediaWiki handler" -__package_name__ = "mindsdb_mediawiki_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for MediaWiki" -__author__ = "Minura Punchihewa" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/mediawiki_handler/__init__.py b/mindsdb/integrations/handlers/mediawiki_handler/__init__.py deleted file mode 100644 index 513c85cf1cb..00000000000 --- a/mindsdb/integrations/handlers/mediawiki_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .mediawiki_handler import MediaWikiHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "MediaWiki" -name = "mediawiki" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/mediawiki_handler/icon.svg b/mindsdb/integrations/handlers/mediawiki_handler/icon.svg deleted file mode 100644 index 7b898f91341..00000000000 --- a/mindsdb/integrations/handlers/mediawiki_handler/icon.svg +++ /dev/null @@ -1,153 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/mediawiki_handler/mediawiki_handler.py b/mindsdb/integrations/handlers/mediawiki_handler/mediawiki_handler.py deleted file mode 100644 index 2e507d73fd7..00000000000 --- a/mindsdb/integrations/handlers/mediawiki_handler/mediawiki_handler.py +++ /dev/null @@ -1,88 +0,0 @@ -from mediawikiapi import MediaWikiAPI - -from mindsdb.integrations.handlers.mediawiki_handler.mediawiki_tables import PagesTable -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) - -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - -logger = log.getLogger(__name__) - - -class MediaWikiHandler(APIHandler): - """ - The MediaWiki handler implementation. - """ - - name = 'mediawiki' - - def __init__(self, name: str, **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - pages_data = PagesTable(self) - self._register_table("pages", pages_data) - - def connect(self): - """ - Set up the connection required by the handler. - Returns - ------- - StatusResponse - connection object - """ - if self.is_connected is True: - return self.connection - - self.connection = MediaWikiAPI() - - self.is_connected = True - - return self.connection - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - - try: - self.connect() - response.success = True - except Exception as e: - logger.error('Error connecting to MediaWiki!') - response.error_message = str(e) - - self.is_connected = response.success - - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - Parameters - ---------- - query : str - query in a native format - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/mediawiki_handler/mediawiki_tables.py b/mindsdb/integrations/handlers/mediawiki_handler/mediawiki_tables.py deleted file mode 100644 index e31ecc45c34..00000000000 --- a/mindsdb/integrations/handlers/mediawiki_handler/mediawiki_tables.py +++ /dev/null @@ -1,98 +0,0 @@ -import pandas as pd - -from typing import List - -from mindsdb.integrations.libs.api_handler import APITable - -from mindsdb_sql_parser import ast - -from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor - -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class PagesTable(APITable): - """The MediaWiki Pages Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls MediaWiki pages data. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Sendinblue Email Campaigns matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'pages', - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - title, page_id = None, None - for condition in where_conditions: - if condition[1] == 'title': - if condition[0] != '=': - raise ValueError(f"Unsupported operator '{condition[0]}' for column '{condition[1]}' in WHERE clause.") - title = condition[2] - elif condition[1] == 'pageid': - if condition[0] != '=': - raise ValueError(f"Unsupported operator '{condition[0]}' for column '{condition[1]}' in WHERE clause.") - page_id = condition[2] - else: - raise ValueError(f"Unsupported column '{condition[1]}' in WHERE clause.") - - pages_df = pd.json_normalize(self.get_pages(title=title, page_id=page_id, limit=result_limit)) - - select_statement_executor = SELECTQueryExecutor( - pages_df, - selected_columns, - [], - order_by_conditions - ) - pages_df = select_statement_executor.execute_query() - - return pages_df - - def get_columns(self) -> List[str]: - return ['pageid', 'title', 'original_title', 'content', 'summary', 'url', 'categories'] - - def get_pages(self, title: str = None, page_id: int = None, limit: int = 20): - query_parts = [] - - query_parts.append(f'intitle:{title}') if title is not None else None - query_parts.append(f'pageid:{page_id}') if page_id is not None else None - - search_query = ' | '.join(query_parts) - - connection = self.handler.connect() - - if search_query: - return [self.convert_page_to_dict(connection.page(result, auto_suggest=False)) for result in connection.search(search_query, results=limit)] - else: - return [self.convert_page_to_dict(connection.page(result, auto_suggest=False)) for result in connection.random(pages=limit)] - - def convert_page_to_dict(self, page): - result = {} - attributes = self.get_columns() - - for attribute in attributes: - try: - result[attribute] = getattr(page, attribute) - except KeyError: - logger.debug(f"Error accessing '{attribute}' attribute. Skipping...") - - return result diff --git a/mindsdb/integrations/handlers/mediawiki_handler/requirements.txt b/mindsdb/integrations/handlers/mediawiki_handler/requirements.txt deleted file mode 100644 index 8636706feec..00000000000 --- a/mindsdb/integrations/handlers/mediawiki_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -mediawikiapi \ No newline at end of file diff --git a/mindsdb/integrations/handlers/mendeley_handler/README.md b/mindsdb/integrations/handlers/mendeley_handler/README.md deleted file mode 100644 index 8917607bc3b..00000000000 --- a/mindsdb/integrations/handlers/mendeley_handler/README.md +++ /dev/null @@ -1,90 +0,0 @@ -# Mendeley API Handler -This handler integrates with the Mendeley API. - -### Connect to the Mendeley API -We start by creating a database to connect to the Mendeley API. In order to do that we need the client id and client secret that are created after registering an application at https://dev.mendeley.com/myapps.html . More information on the matter can be found at https://dev.mendeley.com/reference/topics/application_registration.html . - -``` -CREATE DATABASE my_mendeley -WITH - ENGINE = 'mendeley' - PARAMETERS = { - "client_id" : "the client id", - "client_secret" : "the client secret" - }; -``` - -### Search for documents -Using the Mendeley Handler you can find information about documents of your interest such as a document's id, title, type, source, year, identifiers, keywords, link and authors. -In order to conduct your search you can choose from a number of supported parameters. Those parameters are: - -### First category of parameters - -* title – Title. -* author – Author. -* source – Source. -* abstract – Abstract. -* min_year – Minimum year for documents to return. -* max_year – Maximum year for documents to return. -* open_access – If β€˜true’, only returns open access documents. - -### Second category of parameters - -* arxiv – ArXiV ID. -* doi – DOI. -* isbn – ISBN. -* issn – ISSN. -* pmid – PubMed ID. -* scopus – Scopus ID (EID). -* filehash – SHA-1 filehash. - -### Third category of parameters - -* id – the ID of the document to get - - -The first category of parameters is not considered very specific compared to the other two, so the use of parameters from only that category may result in a document catalog and not a single document. If one is in need of information about a specific document and can use parameters from many categories, it is in his best interest to use those of the second or third category, since they return the most specific result, a single document (provided the validity of the parameters and the existence of the document in mendeley catalogs) - -### Fields returned - -Through the use of the handler one has access to a document's: - - * title - * type - * source - * year - * pmid - * sgr - * issn - * scopus - * doi - * pui - * authors - * keywords - * link - * id - -Of course through the use of SELECT, one can choose what fields to display. - -### Examples - -If using parameters from the first category, since the result may not be a single document but more, one can use LIMIT to specify the number of documents to return. The default number of documents returned is 30. - -``` -SELECT * -FROM my_mendeley.catalog_search_data -WHERE title = "COVID-19 diagnosis and management: a comprehensive review" -LIMIT 10; -``` - -``` -SELECT * -FROM my_mendeley.catalog_search_data -WHERE doi = "10.1111/joim.13091" -``` - -``` -SELECT * -FROM my_mendeley.catalog_search_data -WHERE id = "c3503ef8-26eb-3666-87db-03ccc422293a" -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/mendeley_handler/__about__.py b/mindsdb/integrations/handlers/mendeley_handler/__about__.py deleted file mode 100644 index 42521c3411c..00000000000 --- a/mindsdb/integrations/handlers/mendeley_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Mendeley handler' -__package_name__ = 'mindsdb_mendeley_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Mendeley" -__author__ = 'Christina Nikolovieni' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/mendeley_handler/__init__.py b/mindsdb/integrations/handlers/mendeley_handler/__init__.py deleted file mode 100644 index 8245c12e27b..00000000000 --- a/mindsdb/integrations/handlers/mendeley_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .mendeley_handler import MendeleyHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Mendeley' -name = 'mendeley' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' -permanent = False - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/mendeley_handler/icon.svg b/mindsdb/integrations/handlers/mendeley_handler/icon.svg deleted file mode 100644 index d130bd0f449..00000000000 --- a/mindsdb/integrations/handlers/mendeley_handler/icon.svg +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - - - - - diff --git a/mindsdb/integrations/handlers/mendeley_handler/mendeley_handler.py b/mindsdb/integrations/handlers/mendeley_handler/mendeley_handler.py deleted file mode 100644 index 86d1f986a43..00000000000 --- a/mindsdb/integrations/handlers/mendeley_handler/mendeley_handler.py +++ /dev/null @@ -1,217 +0,0 @@ -from mindsdb_sql_parser import parse_sql -import pandas as pd -from mendeley import Mendeley -from mindsdb.integrations.libs.api_handler import APIHandler -from mendeley.session import MendeleySession -from mindsdb.integrations.handlers.mendeley_handler.mendeley_tables import CatalogSearchTable -from mindsdb.utilities import log -from typing import Dict -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse -) - -logger = log.getLogger(__name__) - - -class MendeleyHandler(APIHandler): - - def __init__(self, name, **kwargs): - """ constructor - Args: - name (str): the handler name - """ - super().__init__(name) - - self.connection_args = kwargs.get('connection_data', {}) - - self.client_id = self.connection_args.get('client_id', None) - self.client_secret = self.connection_args.get('client_secret', None) - self.session = self.connect() - - self.session = None - self.is_connected = False - - catalog_search_data = CatalogSearchTable(self) - self.catalog_search_data = catalog_search_data - self._register_table('catalog_search_data', catalog_search_data) - - def connect(self) -> MendeleySession: - """ The connect method sets up the connection required by the handler. - In order establish a connection with Mendeley API one needs the client id and client secret that are - created after registering the application at https://dev.mendeley.com/myapps.html . More information on the matter - can be found at https://dev.mendeley.com/reference/topics/application_registration.html . - In order to have access to Mendeley data we use "session". - - Returns: - HandlerStatusResponse """ - - if self.is_connected: - return self.session - - mendeley = Mendeley(self.client_id, self.client_secret) - auth = mendeley.start_client_credentials_flow() - self.session = auth.authenticate() - - self.is_connected = True - return self.session - - def check_connection(self) -> StatusResponse: - """ The check_connection method checks the connection to the handler - Returns: - HandlerStatusResponse - """ - response = StatusResponse(False) - - try: - self.connect() - response.success = True - - except Exception as e: - logger.error(f'Error connecting to Mendeley: {e}!') - response.error_message = str(e) - - self.is_connected = response.success - return response - - def native_query(self, query_string: str): - """The native_query method receives raw query and acts upon it. - Args: - query_string (str): query in native format - Returns: - HandlerResponse - """ - ast = parse_sql(query_string) - return self.query(ast) - - def get_authors(self, data): - """The get_authors method receives the data - a specific document returned by the API, gets the names of the authors - and combines them in a string, so as to allow the use of DataFrame. - Args: - data (CatalogDocument): document returned by API - Returns: - authors string - """ - authors = "" - sum = 0 - if data.authors is not None: - for x in data.authors: - if sum + 1 == len(data.authors) and x.first_name is not None and x.last_name is not None: - authors = authors + x.first_name + " " + x.last_name - else: - if x.first_name is not None and x.last_name is not None: - authors = authors + x.first_name + " " + x.last_name + ", " - sum = sum + 1 - return authors - - def get_keywords(self, data): - """The get_keywords method receives the data-a specific document returned by the API, gets the specified keywords - and combines them in a string, so as to allow the use of DataFrame. - Args: - data (CatalogDocument) : document returned by the API - Returns: - keywords string - """ - keywords = "" - sum = 0 - if data.keywords is not None: - for x in data.keywords: - if sum + 1 == len(data.keywords): - keywords = keywords + x + " " - else: - if x is not None: - keywords = keywords + x + ", " - sum = sum + 1 - return keywords - - def create_dict(self, data): - """The create_dict method receives the data-a specific document returned by the API, gets the resources-fields of the document, - as specified in Mendley documentation, and puts them in a dictionary. - - Args: - data (CatalogDocument) : document returned by API - Returns: - dict dictionary - """ - dict = {} - dict["title"] = data.title - dict["type"] = data.type - dict["source"] = data.source - dict["year"] = data.year - if data.identifiers is not None: - dict["pmid"] = data.identifiers.get("pmid") - dict["sgr"] = data.identifiers.get("sgr") - dict["issn"] = data.identifiers.get("issn") - dict["scopus"] = data.identifiers.get("scopus") - dict["doi"] = data.identifiers.get("doi") - dict["pui"] = data.identifiers.get("pui") - dict["authors"] = self.get_authors(data) - if data.keywords is not None: - dict["keywords"] = self.get_keywords(data) - else: - dict["keywords"] = None - dict["link"] = data.link - dict["id"] = data.id - return dict - - def call_mendeley_api(self, method_name: str, params: Dict) -> pd.DataFrame: - """The method call_mendeley_api is used to communicate with Mendeley. Depending on the method used there are three different types - of search conducted. - The advanced_search results in a CatalogSearch resource, which, depending on the parameters used, could either be a number of different documents (CatalogDocument), - a single one or none. - The by_identifier search is more specific in nature and can result either in one or no CatalogDocuments. - The get search has the same results as the by_identifier. - If the method specified does not exist, an NotImplementedError is raised. - Args: - method_name (str) : name of method - params (Dict): Dictionary containing the parameters used in the search - Returns: - DataFrame - """ - - self.session = self.connect() - - if method_name == 'advanced_search': - search_params = { - 'title': params.get("title"), - 'author': params.get("author"), - 'source': params.get("source"), - 'abstract': params.get("abstract"), - 'min_year': params.get("min_year"), - 'max_year': params.get("max_year"), - 'open_access': params.get("open_access") - } - data = self.session.catalog.advanced_search(**search_params) - sum = 0 - df = pd.DataFrame() - for x in data.list(page_size=params["limit"]).items: - if sum == 0: - df = pd.DataFrame(self.create_dict(x), index=[0]) - sum += 1 - else: - df = df.append(self.create_dict(x), ignore_index=True) - sum += 1 - if df.empty: - raise NotImplementedError(('Insufficient or wrong input given')) - else: - return df - - elif method_name == 'identifier_search': - search_params = { - 'arxiv': params.get("arxiv"), - 'doi': params.get("doi"), - 'isbn': params.get("isbn"), - 'issn': params.get("issn"), - 'pmid': params.get("pmid"), - 'scopus': params.get("scopus"), - 'filehash': params.get("filehash") - } - data = self.session.catalog.by_identifier(**search_params) - df = pd.DataFrame(self.create_dict(data), index=[0]) - return df - - elif method_name == 'get': - data = self.session.catalog.get(params.get("id")) - df = pd.DataFrame(self.create_dict(data), index=[0]) - return df - - raise NotImplementedError('Method name {} not supported by Mendeley API Handler'.format(method_name)) diff --git a/mindsdb/integrations/handlers/mendeley_handler/mendeley_tables.py b/mindsdb/integrations/handlers/mendeley_handler/mendeley_tables.py deleted file mode 100644 index 2d9b9846178..00000000000 --- a/mindsdb/integrations/handlers/mendeley_handler/mendeley_tables.py +++ /dev/null @@ -1,129 +0,0 @@ -from mindsdb_sql_parser import ast -import pandas as pd -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions - - -class CatalogSearchTable(APITable): - - def select(self, query: ast.Select) -> pd.DataFrame: - """The select method implements the mappings from the ast.Select and calls the actual API through the call_mendeley_api. - Firstly, it is used to put the parameters specified in the query in a dictionary, which is then used when calling the method call_mendeley_api. - If no conditions are specified, an error is raised since the search cannot be conducted. - - Args: - query (ast.Select): query used to specify the wanted results - Returns: - result (DataFrame): the result of the query - """ - - conditions = extract_comparison_conditions(query.where) - - params = {} - - # Since there are three different types of search, and each of them takes different parameters, we use the parameters that lead - # to the most specific results. For example, in the case of the user specifying the title and the doi of a document, priority is given to - # the doi. - - if query.limit is not None: - params['limit'] = query.limit.value - else: - params['limit'] = 30 - - for op, arg1, arg2 in conditions: - - if arg1 in ['arxiv', 'doi', 'isbn', 'issn', 'pmid', 'scopus', 'filehash']: - - if op != '=': - raise NotImplementedError - params[arg1] = arg2 - - result = self.handler.call_mendeley_api( - method_name='identifier_search', - params=params) - - break - - elif arg1 == 'id': - if op != '=': - raise NotImplementedError - params['id'] = arg2 - - result = self.handler.call_mendeley_api( - method_name='get', - params=params) - - break - - elif "title" or "author" or "source" or "abstract" or "min_year" or "max_year" or "open_access" or "view" in conditions: - - if arg1 == 'title': - if op != '=': - raise NotImplementedError - params['title'] = arg2 - - elif arg1 == 'author': - if op != '=': - raise NotImplementedError - params['author'] = arg2 - - elif arg1 == 'source': - if op != '=': - raise NotImplementedError - params['source'] = arg2 - - elif arg1 == 'abstract': - if op != '=': - raise NotImplementedError - params['abstract'] = arg2 - - elif arg1 == 'min_year': - params['min_year'] = arg2 - - elif arg1 == 'max_year': - params['max_year'] = arg2 - - elif arg1 == 'open_access': - if op != '=': - raise NotImplementedError - params['open_access'] = arg2 - - result = self.handler.call_mendeley_api( - method_name='advanced_search', - params=params) - - if conditions == []: - raise ValueError('Please give input for the search to be conducted.') - - columns = [] - - for target in query.targets: - if isinstance(target, ast.Star): - columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - return result[columns] - - def get_columns(self): - """ get_columns method returns the columns returned by the API""" - return [ - - 'title', - 'type', - 'source', - 'year', - 'pmid', - 'sgr', - 'issn', - 'scopus', - 'doi', - 'pui', - 'authors', - 'keywords', - 'link', - 'id' - ] diff --git a/mindsdb/integrations/handlers/mendeley_handler/requirements.txt b/mindsdb/integrations/handlers/mendeley_handler/requirements.txt deleted file mode 100644 index 5932bd09300..00000000000 --- a/mindsdb/integrations/handlers/mendeley_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -mendeley \ No newline at end of file diff --git a/mindsdb/integrations/handlers/mendeley_handler/tests/__init__.py b/mindsdb/integrations/handlers/mendeley_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/mendeley_handler/tests/test_mendeley_handler.py b/mindsdb/integrations/handlers/mendeley_handler/tests/test_mendeley_handler.py deleted file mode 100644 index 2a45aa22d0f..00000000000 --- a/mindsdb/integrations/handlers/mendeley_handler/tests/test_mendeley_handler.py +++ /dev/null @@ -1,110 +0,0 @@ -import unittest - -from mindsdb.integrations.handlers.mendeley_handler.mendeley_handler import MendeleyHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class MendeleyHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": {"client_id": 15253, "client_secret": "BxmSvbrRW5iYEIQR"} - } - cls.handler = MendeleyHandler("test_mendeley_handler", **cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_check_connection(self): - self.handler.check_connection() - - def test_2_select(self): - query = "SELECT * FROM catalog_search_data WHERE doi='10.1111/joim.13091'" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_3_select(self): - query = "SELECT * FROM catalog_search_data WHERE id='af1a0408-7409-3a8b-ad91-8accd4f8849a'" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_4_select(self): - query = "SELECT * FROM catalog_search_data WHERE title='The American Mineralogist crystal structure database'" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_select(self): - query = "SELECT * FROM catalog_search_data WHERE id='8e86b541-84fd-30ef-9eed-e8c9af847ca3' AND doi='10.1093/ajcn/77.1.71'" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_6_select(self): - query = "SELECT * FROM catalog_search_data WHERE issn='15570878'" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_7_select(self): - query = "SELECT * FROM catalog_search_data WHERE source='American Journal of Clinical Nutrition'" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_8_select(self): - query = "SELECT * FROM catalog_search_data WHERE source='American Journal of Clinical Nutrition'AND id='4eeda257-8db4-3dad-80c8-6912356d3887'" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_9_select(self): - query = "SELECT * FROM catalog_search_data WHERE source='American Journal of Clinical Nutrition'AND max_year='2020'" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_10_call_mendeley_api_invalid_method_name(self): - with self.assertRaises(NotImplementedError): - self.handler.call_mendeley_api("method1", None) - - def test_11_select_invalid_condition_name(self): - with self.assertRaises(NotImplementedError): - query = "SELECT * FROM catalog_search_data WHERE name='American Journal of Clinical Nutrition'" - self.handler.native_query(query) - - def test_12_select_invalid_operator(self): - with self.assertRaises(NotImplementedError): - query = "SELECT * FROM catalog_search_data WHERE source>'American Journal of Clinical Nutrition'AND max_year='2020' " - self.handler.native_query(query) - - def test_13_select_invalid_column_name(self): - with self.assertRaises(KeyError): - query = "SELECT name FROM catalog_search_data WHERE source='American Journal of Clinical Nutrition'AND max_year='2020' " - self.handler.native_query(query) - - def test_14_get_columns(self): - columns = self.handler.catalog_search_data.get_columns() - - expected_columns = [ - - 'title', - 'type', - 'source', - 'year', - 'pmid', - 'sgr', - 'issn', - 'scopus', - 'doi', - 'pui', - 'authors', - 'keywords', - 'link', - 'id' - ] - - self.assertListEqual(columns, expected_columns) - - def test_15_select_invalid_condition_name(self): - with self.assertRaises(ValueError): - query = "SELECT * FROM catalog_search_data" - self.handler.native_query(query) - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/milvus_handler/README.md b/mindsdb/integrations/handlers/milvus_handler/README.md deleted file mode 100644 index 7409c6f1d0f..00000000000 --- a/mindsdb/integrations/handlers/milvus_handler/README.md +++ /dev/null @@ -1,138 +0,0 @@ -# Milvus Handler - -This is the implementation of the Milvus for MindsDB. - -## Milvus - -Milvus is an open-source and blazing fast vector database built for scalable similarity search. - -## Implementation - -This handler uses `pymilvus` python library connect to a Milvus instance. - -The required arguments to establish a connection are: - -* `uri`: uri for milvus database, can be set to local ".db" file or docker or cloud service -* `token`: token to support docker or cloud service according to uri option - -The optional arguments to establish a connection are: - -These are used for `SELECT` queries: -* `search_default_limit`: default limit to be passed in select statements (default=100) -* `search_metric_type`: metric type used for searches (default="L2") -* `search_ignore_growing`: whether to ignore growing segments during similarity searches (default=False) -* `search_params`: specific to the `search_metric_type` (default={"nprobe": 10}) - -These are used for `CREATE` queries: -* `create_auto_id`: whether to auto generate id when inserting records with no ID (default=False) -* `create_id_max_len`: maximum length of the id field when creating a table (default=64) -* `create_embedding_dim`: embedding dimension for creating table (default=8) -* `create_dynamic_field`: whether or not the created tables have dynamic fields or not (default=True) -* `create_content_max_len`: max length of the content column (default=200) -* `create_content_default_value`: default value of content column (default='') -* `create_schema_description`: description of the created schemas (default='') -* `create_alias`: alias of the created schemas (default='default') -* `create_index_params`: parameters of the index created on embeddings column (default={}) -* `create_index_metric_type`: metric used to create the index (default='L2') -* `create_index_type`: the type of index (default='AUTOINDEX') - -For more information about how these perameters map to Milvus API, look at Milvus' documentation - -## Usage - -Before continuing, make sure that `pymilvus` version is same as your Milvus instance version. You can check and change the `requirements.txt` file in this directory to accomodate that. This integration is tested on version `2.3` - -### Setting up milvus using docker locally - -To set up docker locally, refer to this [link](https://milvus.io/docs/install_standalone-docker.md). You can deploy milvus as a cluster or as a standalone service. - -### Creating connection - -In order to make use of this handler and connect to a Milvus server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE milvus_datasource -WITH - ENGINE = 'milvus', - PARAMETERS = { - "uri": "./milvus_local.db", - "token": "", - "create_embedding_dim": 3, - "create_auto_id": true -}; -``` - -### Dropping connection - -To drop the connection, use this command - -```sql -DROP DATABASE milvus_datasource; -``` - -### Creating tables - -To insert data from a pre-existing table, use `CREATE` - -```sql -CREATE TABLE milvus_datasource.test -(SELECT * FROM sqlitedb.test); -``` - -### Dropping collections - -Dropping a collection is not supported - -### Querying and selecting - -To query database using a search vector, you can use `search_vector` in `WHERE` clause - -Caveats: -- If you omit `LIMIT`, the `search_default_limit` is used since Milvus requires it -- Metadata column is not supported, but if the collection has dynamic schema enabled, you can query like normal, see the example below -- Dynamic fields cannot be displayed but can be queried - -```sql -SELECT * from milvus_datasource.test -WHERE search_vector = '[3.0, 1.0, 2.0, 4.5]' -LIMIT 10; -``` - -If you omit the `search_vector`, this becomes a basic search and `LIMIT` or `search_default_limit` amount of entries in collection are returned - -```sql -SELECT * from milvus_datasource.test -``` - -You can use `WHERE` clause on dynamic fields like normal SQL - -```sql -SELECT * FROM milvus_datasource.createtest -WHERE category = "science"; -``` - -### Deleting records - -You can delete entries using `DELETE` just like in SQL. - -Caveats: -- Milvus only supports deleting entities with clearly specified primary keys -- You can only use `IN` operator - -```sql -DELETE FROM milvus_datasource.test -WHERE id IN (1, 2, 3); -``` - -### Inserting records - -You can also insert individual rows like so: - -```sql -INSERT INTO milvus_test.testable (id,content,metadata,embeddings) -VALUES ("id3", 'this is a test', '{"test": "test"}', '[1.0, 8.0, 9.0]'); -``` - -### Updating - -Updating records is not supported by Milvus API. You can try using combination of `DELETE` and `INSERT` diff --git a/mindsdb/integrations/handlers/milvus_handler/__about__.py b/mindsdb/integrations/handlers/milvus_handler/__about__.py deleted file mode 100644 index 7dc326ae672..00000000000 --- a/mindsdb/integrations/handlers/milvus_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Milvus handler" -__package_name__ = "mindsdb_milvus_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Milvus" -__author__ = "Aditya Azad" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/milvus_handler/__init__.py b/mindsdb/integrations/handlers/milvus_handler/__init__.py deleted file mode 100644 index 21f987e756e..00000000000 --- a/mindsdb/integrations/handlers/milvus_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version -from .connection_args import connection_args, connection_args_example -try: - from .milvus_handler import MilvusHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Milvus" -name = "milvus" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/milvus_handler/connection_args.py b/mindsdb/integrations/handlers/milvus_handler/connection_args.py deleted file mode 100644 index 98e8e1937fe..00000000000 --- a/mindsdb/integrations/handlers/milvus_handler/connection_args.py +++ /dev/null @@ -1,112 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - uri={ - "type": ARG_TYPE.STR, - "description": "uri of milvus service", - "required": True, - }, - token={ - "type": ARG_TYPE.STR, - "description": "token to support docker or cloud service", - "required": False, - }, - search_default_limit={ - "type": ARG_TYPE.INT, - "description": "default limit to be passed in select statements", - "required": False, - }, - search_metric_type={ - "type": ARG_TYPE.STR, - "description": "metric type used for searches", - "required": False, - }, - search_ignore_growing={ - "type": ARG_TYPE.BOOL, - "description": "whether to ignore growing segments during similarity searches", - "required": False, - }, - search_params={ - "type": ARG_TYPE.DICT, - "description": "specific to the `search_metric_type`", - "required": False, - }, - create_auto_id={ - "type": ARG_TYPE.BOOL, - "description": "whether to auto generate id when inserting records with no ID (default=False)", - "required": False, - }, - create_id_max_len={ - "type": ARG_TYPE.STR, - "description": "maximum length of the id field when creating a table (default=64)", - "required": False, - }, - create_embedding_dim={ - "type": ARG_TYPE.INT, - "description": "embedding dimension for creating table (default=8)", - "required": False, - }, - create_dynamic_field={ - "type": ARG_TYPE.BOOL, - "description": "whether or not the created tables have dynamic fields or not (default=True)", - "required": False, - }, - create_content_max_len={ - "type": ARG_TYPE.INT, - "description": "max length of the content column (default=200)", - "required": False, - }, - create_content_default_value={ - "type": ARG_TYPE.STR, - "description": "default value of content column (default='')", - "required": False, - }, - create_schema_description={ - "type": ARG_TYPE.STR, - "description": "description of the created schemas (default='')", - "required": False, - }, - create_alias={ - "type": ARG_TYPE.STR, - "description": "alias of the created schemas (default='default')", - "required": False, - }, - create_index_params={ - "type": ARG_TYPE.DICT, - "description": "parameters of the index created on embeddings column (default={})", - "required": False, - }, - create_index_metric_type={ - "type": ARG_TYPE.STR, - "description": "metric used to create the index (default='L2')", - "required": False, - }, - create_index_type={ - "type": ARG_TYPE.STR, - "description": "the type of index (default='AUTOINDEX')", - "required": False, - }, -) - -connection_args_example = OrderedDict( - uri="./milvus_local.db", - token="", - search_default_limit=100, - search_metric_type="L2", - search_ignore_growing=True, - search_params={"nprobe": 10}, - create_auto_id=False, - create_id_max_len=64, - create_embedding_dim=8, - create_dynamic_field=True, - create_content_max_len=200, - create_content_default_value="", - create_schema_description="MindsDB generated table", - create_alias="default", - create_index_params={}, - create_index_metric_type="L2", - create_index_type="AUTOINDEX", -) diff --git a/mindsdb/integrations/handlers/milvus_handler/icon.svg b/mindsdb/integrations/handlers/milvus_handler/icon.svg deleted file mode 100644 index 5f585d224e3..00000000000 --- a/mindsdb/integrations/handlers/milvus_handler/icon.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/milvus_handler/milvus_handler.py b/mindsdb/integrations/handlers/milvus_handler/milvus_handler.py deleted file mode 100644 index 6f9aa86cc6e..00000000000 --- a/mindsdb/integrations/handlers/milvus_handler/milvus_handler.py +++ /dev/null @@ -1,332 +0,0 @@ -from typing import List, Optional - -import pandas as pd -import json -from pymilvus import MilvusClient, CollectionSchema, DataType, FieldSchema - -from mindsdb.integrations.libs.response import RESPONSE_TYPE -from mindsdb.integrations.libs.response import HandlerResponse -from mindsdb.integrations.libs.response import HandlerResponse as Response -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse -from mindsdb.integrations.libs.vectordatabase_handler import FilterCondition, FilterOperator, TableField, VectorStoreHandler -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class MilvusHandler(VectorStoreHandler): - """This handler handles connection and execution of the Milvus statements.""" - - name = "milvus" - - def __init__(self, name: str, **kwargs): - super().__init__(name) - self.milvus_client = None - self._connection_data = kwargs["connection_data"] - # Extract parameters used while searching and leave the rest for establishing connection - self._search_limit = 100 - if "search_default_limit" in self._connection_data: - self._search_limit = self._connection_data["search_default_limit"] - self._search_params = { - "search_metric_type": "L2", - "search_ignore_growing": False, - "search_params": {"nprobe": 10}, - } - for search_param_name in self._search_params: - if search_param_name in self._connection_data: - self._search_params[search_param_name] = self._connection_data[search_param_name] - # Extract parameters used for creating tables - self._create_table_params = { - "create_auto_id": False, - "create_id_max_len": 64, - "create_embedding_dim": 8, - "create_dynamic_field": True, - "create_content_max_len": 200, - "create_content_default_value": "", - "create_schema_description": "MindsDB generated table", - "create_alias": "default", - "create_index_params": {}, - "create_index_metric_type": "L2", - "create_index_type": "AUTOINDEX", - } - for create_table_param in self._create_table_params: - if create_table_param in self._connection_data: - self._create_table_params[create_table_param] = self._connection_data[create_table_param] - self.is_connected = False - self.connect() - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self): - """Connect to a Milvus database.""" - if self.is_connected is True: - return - try: - self.milvus_client = MilvusClient(**self._connection_data) - self.is_connected = True - except Exception as e: - logger.error(f"Error connecting to Milvus client: {e}!") - self.is_connected = False - - def disconnect(self): - """Close the database connection.""" - if self.is_connected is False: - return - self.milvus_client.close() - self.is_connected = False - - def check_connection(self): - """Check the connection to the Milvus database.""" - response_code = StatusResponse(False) - try: - response_code.success = self.milvus_client is not None - except Exception as e: - logger.error(f"Error checking Milvus connection: {e}!") - response_code.error_message = str(e) - return response_code - - def get_tables(self) -> HandlerResponse: - """Get the list of collections in the Milvus database.""" - collections = self.milvus_client.list_collections() - collections_name = pd.DataFrame( - columns=["TABLE_NAME"], - data=[collection for collection in collections], - ) - return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=collections_name) - - def drop_table(self, table_name: str, if_exists=True): - """Delete a collection from the Milvus database.""" - try: - self.milvus_client.drop_collection(collection_name=table_name) - except Exception as e: - if not if_exists: - raise Exception(f"Error dropping table '{table_name}': {e}") - - def _get_milvus_operator(self, operator: FilterOperator) -> str: - mapping = { - FilterOperator.EQUAL: "==", - FilterOperator.NOT_EQUAL: "!=", - FilterOperator.LESS_THAN: "<", - FilterOperator.LESS_THAN_OR_EQUAL: "<=", - FilterOperator.GREATER_THAN: ">", - FilterOperator.GREATER_THAN_OR_EQUAL: ">=", - FilterOperator.IN: "in", - FilterOperator.NOT_IN: "not in", - FilterOperator.LIKE: "like", - FilterOperator.NOT_LIKE: "not like", - } - if operator not in mapping: - raise Exception(f"Operator {operator} is not supported by Milvus!") - return mapping[operator] - - def _translate_conditions(self, conditions: Optional[List[FilterCondition]], exclude_id: bool = True) -> Optional[str]: - """ - Translate a list of FilterCondition objects a string that can be used by Milvus. - E.g., - [ - FilterCondition( - column="metadata.price", - op=FilterOperator.LESS_THAN, - value=1000, - ), - FilterCondition( - column="metadata.price", - op=FilterOperator.GREATER_THAN, - value=300, - ) - ] - Is converted to: "(price < 1000) and (price > 300)" - If exclude_id is set to true then id column is ignored - """ - if not conditions: - return - # Ignore all non-metadata conditions - filtered_conditions = [ - condition - for condition in conditions - if condition.column.startswith(TableField.METADATA.value) or condition.column.startswith(TableField.ID.value) - ] - if len(filtered_conditions) == 0: - return None - # Translate each metadata condition into a dict - milvus_conditions = [] - for condition in filtered_conditions: - if isinstance(condition.value, str): - condition.value = f"'{condition.value}'" - milvus_conditions.append(f"({condition.column.split('.')[-1]} {self._get_milvus_operator(condition.op)} {condition.value})") - # Combine all metadata conditions into a single string and return - return " and ".join(milvus_conditions) if milvus_conditions else None - - def select( - self, - table_name: str, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - ): - self.milvus_client.load_collection(collection_name=table_name) - # Find vector filter in conditions - vector_filter = ( - [] - if conditions is None - else [ - condition.value - for condition in conditions - if condition.column == TableField.SEARCH_VECTOR.value - ] - ) - - # Generate search arguments - search_arguments = {} - # TODO: check if distance in columns work - if columns: - search_arguments["output_fields"] = columns - else: - search_arguments["output_fields"] = [schema_obj.name for schema_obj in self.SCHEMA] - search_arguments["filter"] = self._translate_conditions(conditions) - # NOTE: According to api sum of offset and limit should be less than 16384. - api_limit = 16384 - if limit is not None and offset is not None and limit + offset >= api_limit: - raise Exception(f"Sum of limit and offset should be less than {api_limit}") - - if limit is not None: - search_arguments["limit"] = limit - else: - search_arguments["limit"] = self._search_limit - if offset is not None: - search_arguments["offset"] = offset - - # Vector search - if vector_filter: - search_arguments["data"] = vector_filter - search_arguments["anns_field"] = TableField.EMBEDDINGS.value - if "search_params" not in search_arguments: - search_arguments["search_params"] = {} - search_arguments["search_params"]["metric_type"] = self._search_params["search_metric_type"] - search_arguments["search_params"]["ignore_growing"] = self._search_params["search_ignore_growing"] - results = self.milvus_client.search(table_name, **search_arguments)[0] - columns_required = [TableField.ID.value, TableField.DISTANCE.value] - if TableField.CONTENT.value in columns: - columns_required.append(TableField.CONTENT.value) - if TableField.EMBEDDINGS.value in columns: - columns_required.append(TableField.EMBEDDINGS.value) - data = {k: [] for k in columns_required} - for hit in results: - for col in columns_required: - if col != TableField.DISTANCE.value: - data[col].append(hit["entity"].get(col)) - else: - data[TableField.DISTANCE.value].append(hit["distance"]) - return pd.DataFrame(data) - else: - # Basic search - if not search_arguments["filter"]: - search_arguments["filter"] = "" - search_arguments["output_fields"] = [ - TableField.ID.value, - TableField.CONTENT.value, - TableField.EMBEDDINGS.value, - ] if not columns else columns - results = self.milvus_client.query(table_name, **search_arguments) - return pd.DataFrame.from_records(results) - - def create_table(self, table_name: str, if_not_exists=True): - """Create a collection with default parameters in the Milvus database as described in documentation.""" - id = FieldSchema( - name=TableField.ID.value, - dtype=DataType.VARCHAR, - is_primary=True, - max_length=self._create_table_params["create_id_max_len"], - auto_id=self._create_table_params["create_auto_id"] - ) - embeddings = FieldSchema( - name=TableField.EMBEDDINGS.value, - dtype=DataType.FLOAT_VECTOR, - dim=self._create_table_params["create_embedding_dim"] - ) - content = FieldSchema( - name=TableField.CONTENT.value, - dtype=DataType.VARCHAR, - max_length=self._create_table_params["create_content_max_len"], - default_value=self._create_table_params["create_content_default_value"] - ) - schema = CollectionSchema( - fields=[id, content, embeddings], - description=self._create_table_params["create_schema_description"], - enable_dynamic_field=self._create_table_params["create_dynamic_field"] - ) - collection_name = table_name - self.milvus_client.create_collection( - collection_name=collection_name, - schema=schema - ) - index_params = self.milvus_client.prepare_index_params() - index_params.add_index( - field_name=TableField.EMBEDDINGS.value, - index_type=self._create_table_params["create_index_type"], - metric_type=self._create_table_params["create_index_metric_type"], - params=self._create_table_params.get("create_params", {}) - ) - self.milvus_client.create_index( - collection_name=collection_name, - index_params=index_params, - ) - - def insert( - self, table_name: str, data: pd.DataFrame, columns: List[str] = None - ): - """Insert data into the Milvus collection.""" - self.milvus_client.load_collection(collection_name=table_name) - if columns: - data = data[columns] - if TableField.METADATA.value in data.columns: - rows = data[TableField.METADATA.value].to_list() - for i, row in enumerate(rows): - if isinstance(row, str): - rows[i] = json.loads(row) - data = pd.concat([data, pd.DataFrame.from_records(rows)], axis=1) - data.drop(TableField.METADATA.value, axis=1, inplace=True) - data_list = data.to_dict(orient="records") - for data_dict in data_list: - if TableField.EMBEDDINGS.value in data_dict and isinstance(data_dict[TableField.EMBEDDINGS.value], str): - data_dict[TableField.EMBEDDINGS.value] = json.loads(data_dict[TableField.EMBEDDINGS.value]) - self.milvus_client.insert(table_name, data_list) - - def delete( - self, table_name: str, conditions: List[FilterCondition] = None - ): - # delete only supports IN operator - for condition in conditions: - if condition.op in [FilterOperator.EQUAL, FilterOperator.IN]: - condition.op = FilterOperator.IN - if not isinstance(condition.value, list): - condition.value = [condition.value] - filters = self._translate_conditions(conditions, exclude_id=False) - if not filters: - raise Exception("Some filters are required, use DROP TABLE to delete everything") - if self.milvus_client.has_collection(collection_name=table_name): - self.milvus_client.delete(table_name, filter=filters) - - def get_columns(self, table_name: str) -> HandlerResponse: - """Get columns in a Milvus collection""" - try: - self.milvus_client.has_collection(collection_name=table_name) - except Exception as e: - return Response( - resp_type=RESPONSE_TYPE.ERROR, - error_message=f"Error finding table: {e}", - ) - try: - field_names = {field["name"] for field in self.milvus_client.describe_collection(collection_name=table_name)["fields"]} - schema = [mindsdb_schema_field for mindsdb_schema_field in self.SCHEMA if mindsdb_schema_field["name"] in field_names] - data = pd.DataFrame(schema) - data.columns = ["COLUMN_NAME", "DATA_TYPE"] - return HandlerResponse(data_frame=data) - except Exception as e: - return Response( - resp_type=RESPONSE_TYPE.ERROR, - error_message=f"Error finding table: {e}", - ) diff --git a/mindsdb/integrations/handlers/milvus_handler/requirements.txt b/mindsdb/integrations/handlers/milvus_handler/requirements.txt deleted file mode 100644 index 4872c767649..00000000000 --- a/mindsdb/integrations/handlers/milvus_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pymilvus==2.3 diff --git a/mindsdb/integrations/handlers/milvus_handler/tests/__init__.py b/mindsdb/integrations/handlers/milvus_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/mlflow_handler/requirements.txt b/mindsdb/integrations/handlers/mlflow_handler/requirements.txt index 40d1d2b4967..3ccfa559cbe 100644 --- a/mindsdb/integrations/handlers/mlflow_handler/requirements.txt +++ b/mindsdb/integrations/handlers/mlflow_handler/requirements.txt @@ -1,2 +1,3 @@ mlflow +protobuf>=6.33.5 # not directly required, pinned by Snyk to avoid a vulnerability sqlparse>=0.5.4 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/unused/unit/ml_handlers/test_mlflow.py b/mindsdb/integrations/handlers/mlflow_handler/tests/test_mlflow.py similarity index 63% rename from tests/unused/unit/ml_handlers/test_mlflow.py rename to mindsdb/integrations/handlers/mlflow_handler/tests/test_mlflow.py index cc7b5547895..f333c93b621 100644 --- a/tests/unused/unit/ml_handlers/test_mlflow.py +++ b/mindsdb/integrations/handlers/mlflow_handler/tests/test_mlflow.py @@ -1,6 +1,6 @@ # How to run: # env PYTHONPATH=./:$PYTHONPATH pytest tests/unit/ml_handlers/test_mlflow.py -ls - +import requests import time import pytest from unittest.mock import patch @@ -14,23 +14,16 @@ # TODO: fix patches class TestMLFlow(BaseExecutorTest): def run_sql(self, sql): - return self.command_executor.execute_command( - parse_sql(sql) - ) - - @patch('mlflow.tracking.MlflowClient') - @patch.object(MLflowHandler, '_check_model_url') - @patch('mindsdb.integrations.handlers.mlflow_handler.mlflow_handler.requests.post') - def test_mlflow( - self, - mock_internal_post, - mock_handler_url_method, - mock_mlflow_client - ): - mock_mlflow_client.search_registered_models.side_effect = ['test_mlflow'] - mock_internal_post.side_effect = requests.Request(json=['negative_sentiment']) + return self.command_executor.execute_command(parse_sql(sql)) + + @patch("mlflow.tracking.MlflowClient") + @patch.object(MLflowHandler, "_check_model_url") + @patch("mindsdb.integrations.handlers.mlflow_handler.mlflow_handler.requests.post") + def test_mlflow(self, mock_internal_post, mock_handler_url_method, mock_mlflow_client): + mock_mlflow_client.search_registered_models.side_effect = ["test_mlflow"] + mock_internal_post.side_effect = requests.Request(json=["negative_sentiment"]) mock_handler_url_method.side_effect = True - ret = self.run_sql(''' + ret = self.run_sql(""" CREATE PREDICTOR mindsdb.test_mlflow PREDICT c USING @@ -39,19 +32,19 @@ def test_mlflow( mlflow_server_url='http://0.0.0.0:5001/', mlflow_server_path='sqlite:////mlflow.db', predict_url='http://localhost:5000/invocations'; - ''') + """) assert ret.error_code is None time.sleep(3) - ret = self.run_sql(''' + ret = self.run_sql(""" SELECT p.* FROM mindsdb.test_mlflow as p WHERE text="The tsunami is coming, seek high ground"; - ''') + """) assert ret.error_code is None - assert ret.c == '0' # what is it? + assert ret.c == "0" # what is it? -if __name__ == '__main__': - pytest.main(['test_mlflow.py']) +if __name__ == "__main__": + pytest.main(["test_mlflow.py"]) diff --git a/mindsdb/integrations/handlers/monetdb_handler/README.md b/mindsdb/integrations/handlers/monetdb_handler/README.md deleted file mode 100644 index 92f5a59a39b..00000000000 --- a/mindsdb/integrations/handlers/monetdb_handler/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# MonetDB Handler - -This is the implementation of the MonetDB handler for MindsDB. - -## MonetDB -MonetDB is an open-source column-oriented relational database management system originally developed at the Centrum Wiskunde & Informatica in the Netherlands. It is designed to provide high performance on complex queries against large databases, such as combining tables with hundreds of columns and millions of rows. - -## Implementation -This handler was implemented using the `pymonetdb`, a Python library that allows you to use Python code to run SQL commands on MonetDB Database. - -The required arguments to establish a connection are, -* `user`: username asscociated with database -* `password`: password to authenticate your access -* `host`: host to server IP Address or hostname -* `port`: port through which TCPIP connection is to be made -* `database`: Database name to be connected -* `schema_name `: schema name to get tables. (_Optional_) **Default it will select current schema.** - -## Usage -In order to make use of this handler and connect to DB2 in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE monetdb_datasource -WITH -engine='monetdb', -parameters={ - "user":"monetdb", - "password":"monetdb", - "host":"127.0.0.1", - "port":50000, - "schema_name":"sys", - "database":"demo" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM monetdb_datasource.demo; -~~~~ diff --git a/mindsdb/integrations/handlers/monetdb_handler/__about__.py b/mindsdb/integrations/handlers/monetdb_handler/__about__.py deleted file mode 100644 index a8306cd30c6..00000000000 --- a/mindsdb/integrations/handlers/monetdb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB MonetDB handler" -__package_name__ = "mindsdb_monetdb_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for MonetDB" -__author__ = "Parthiv Makwana" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/monetdb_handler/__init__.py b/mindsdb/integrations/handlers/monetdb_handler/__init__.py deleted file mode 100644 index 4539f41a894..00000000000 --- a/mindsdb/integrations/handlers/monetdb_handler/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example - -try: - from .monetdb_handler import MonetDBHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "MonetDB" -name = "monetdb" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/monetdb_handler/connection_args.py b/mindsdb/integrations/handlers/monetdb_handler/connection_args.py deleted file mode 100644 index c6be76539ef..00000000000 --- a/mindsdb/integrations/handlers/monetdb_handler/connection_args.py +++ /dev/null @@ -1,43 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - "type": ARG_TYPE.STR, - "description": "The host name or IP address of the MonetDB server/database.", - }, - database={ - "type": ARG_TYPE.STR, - "description": """ - The database name to use when connecting with the MonetDB server. - """, - }, - user={ - "type": ARG_TYPE.STR, - "description": "The user name used to authenticate with the MonetDB server.", - }, - password={ - "type": ARG_TYPE.PWD, - "description": "The password to authenticate the user with the MonetDB server.", - "secret": True, - }, - port={ - "type": ARG_TYPE.INT, - "description": "Specify port to connect MonetDB through TCP/IP", - }, - schema_name={ - "type": ARG_TYPE.STR, - "description": "Specify the schema name for Listing Table ", - }, -) - -connection_args_example = OrderedDict( - host="127.0.0.1", - port=50000, - password="monetdb", - user="monetdb", - schemaName="sys", - database="demo", -) diff --git a/mindsdb/integrations/handlers/monetdb_handler/icon.svg b/mindsdb/integrations/handlers/monetdb_handler/icon.svg deleted file mode 100644 index 576aa5dd64c..00000000000 --- a/mindsdb/integrations/handlers/monetdb_handler/icon.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/monetdb_handler/monetdb_handler.py b/mindsdb/integrations/handlers/monetdb_handler/monetdb_handler.py deleted file mode 100644 index ac8e90c909e..00000000000 --- a/mindsdb/integrations/handlers/monetdb_handler/monetdb_handler.py +++ /dev/null @@ -1,198 +0,0 @@ -from typing import Optional -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) - -import pandas as pd -import pymonetdb as mdb -from .utils.monet_get_id import schema_id, table_id -from sqlalchemy_monetdb.dialect import MonetDialect - -logger = log.getLogger(__name__) - - -class MonetDBHandler(DatabaseHandler): - name = "monetdb" - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """Initialize the handler - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - self.kwargs = kwargs - self.parser = parse_sql - self.database = connection_data["database"] - self.user = connection_data["user"] - self.password = connection_data["password"] - self.schemaName = connection_data["schema_name"] if "schema_name" in connection_data else None - self.host = connection_data["host"] - self.port = connection_data["port"] - - self.connection = None - self.is_connected = False - - def connect(self): - """Set up any connections required by the handler - Should return output of check_connection() method after attempting - connection. Should switch self.is_connected. - Returns: - Connection Object - """ - if self.is_connected is True: - return self.connection - - try: - self.connection = mdb.connect( - database=self.database, - hostname=self.host, - port=self.port, - username=self.user, - password=self.password, - ) - - self.is_connected = True - except Exception as e: - logger.error(f"Error while connecting to {self.database}, {e}") - - return self.connection - - def disconnect(self): - """Close any existing connections - Should switch self.is_connected. - """ - if self.is_connected is False: - return - try: - self.connection.close() - self.is_connected = False - except Exception as e: - logger.error(f"Error while disconnecting to {self.database}, {e}") - - return - - def check_connection(self) -> StatusResponse: - """Check connection to the handler - Returns: - HandlerStatusResponse - """ - responseCode = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - responseCode.success = True - except Exception as e: - logger.error(f"Error connecting to database {self.database}, {e}!") - responseCode.error_message = str(e) - finally: - if responseCode.success is True and need_to_close: - self.disconnect() - if responseCode.success is False and self.is_connected is True: - self.is_connected = False - - return responseCode - - def native_query(self, query: str) -> StatusResponse: - """Receive raw query and act upon it somehow. - Args: - query (Any): query in native format (str for sql databases, - etc) - Returns: - HandlerResponse - """ - need_to_close = self.is_connected is False - conn = self.connect() - cur = conn.cursor() - try: - cur.execute(query) - - if len(cur._rows) > 0: - result = cur.fetchall() - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame(result, columns=[x[0] for x in cur.description]), - ) - else: - response = Response(RESPONSE_TYPE.OK) - self.connection.commit() - except Exception as e: - logger.error(f"Error running query: {query} on {self.database}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - self.connection.rollback() - - cur.close() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: HandlerResponse - """ - - renderer = SqlalchemyRender(MonetDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """Return list of entities - Return list of entities that will be accesible as tables. - Returns: HandlerResponse: shoud have same columns as information_schema.tables - (https://dev.mysql.com/doc/refman/8.0/en/information-schema-tables-table.html) - Column 'TABLE_NAME' is mandatory, other is optional. - """ - self.connect() - schema = schema_id(connection=self.connection, schema_name=self.schemaName) - - q = f""" - SELECT name as TABLE_NAME - FROM sys.tables - WHERE system = False - AND type = 0 - AND schema_id = {schema} - """ - - return self.query(q) - - def get_columns(self, table_name: str) -> StatusResponse: - """Returns a list of entity columns - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse: shoud have same columns as information_schema.columns - (https://dev.mysql.com/doc/refman/8.0/en/information-schema-columns-table.html) - Column 'COLUMN_NAME' is mandatory, other is optional. Hightly - recomended to define also 'DATA_TYPE': it should be one of - python data types (by default it str). - """ - self.connect() - table = table_id( - connection=self.connection, - table_name=table_name, - schema_name=self.schemaName, - ) - - q = f""" - SELECT - name as COLUMN_NAME, - type as DATA_TYPE - FROM sys.columns - WHERE table_id = {table} - """ - return self.query(q) diff --git a/mindsdb/integrations/handlers/monetdb_handler/requirements.txt b/mindsdb/integrations/handlers/monetdb_handler/requirements.txt deleted file mode 100644 index 89834499eff..00000000000 --- a/mindsdb/integrations/handlers/monetdb_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -pymonetdb -sqlalchemy-monetdb \ No newline at end of file diff --git a/mindsdb/integrations/handlers/monetdb_handler/tests/__init__.py b/mindsdb/integrations/handlers/monetdb_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/monetdb_handler/tests/test_monetdb_handler.py b/mindsdb/integrations/handlers/monetdb_handler/tests/test_monetdb_handler.py deleted file mode 100644 index 5863dc427be..00000000000 --- a/mindsdb/integrations/handlers/monetdb_handler/tests/test_monetdb_handler.py +++ /dev/null @@ -1,49 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.monetdb_handler.monetdb_handler import MonetDBHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class MonetDBHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "127.0.0.1", - "port": 50000, - "user": "monetdb", - "password": "monetdb", - "database": "demo", - } - } - cls.handler = MonetDBHandler("test_monet_handler", cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_drop_table(self): - res = self.handler.query("DROP TABLE IF EXISTS PREM;") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_2_create_table(self): - res = self.handler.query("CREATE TABLE IF NOT EXISTS PREM (Premi varchar(50));") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_3_insert_table(self): - res = self.handler.query("INSERT INTO PREM VALUES('Radha <3 Krishna');") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_4_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_5_select_query(self): - query = "SELECT * FROM PREM;" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE or RESPONSE_TYPE.OK - - def test_6_check_connection(self): - self.handler.check_connection() - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/monetdb_handler/utils/__init__.py b/mindsdb/integrations/handlers/monetdb_handler/utils/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/monetdb_handler/utils/monet_get_id.py b/mindsdb/integrations/handlers/monetdb_handler/utils/monet_get_id.py deleted file mode 100644 index c717a60fbf3..00000000000 --- a/mindsdb/integrations/handlers/monetdb_handler/utils/monet_get_id.py +++ /dev/null @@ -1,49 +0,0 @@ -from sqlalchemy import exc - - -def schema_id(connection, schema_name=None): - """Fetch the id for schema""" - cur = connection.cursor() - if schema_name is None: - cur.execute("SELECT current_schema") - schema_name = cur.fetchall()[0][0] - - query = f""" - SELECT id - FROM sys.schemas - WHERE name = '{schema_name}' - """ - - cur.execute(query) - - try: - schema_id = cur.fetchall()[0][0] - except Exception: - raise exc.InvalidRequestError(schema_name) - - return schema_id - - -def table_id(connection, table_name, schema_name=None): - """Fetch the id for schema.table_name, defaulting to current schema if - schema is None - """ - - schema_idm = schema_id(connection=connection, schema_name=schema_name) - - q = f""" - SELECT id - FROM sys.tables - WHERE name = '{table_name}' - AND schema_id = {schema_idm} - """ - - cur = connection.cursor() - cur.execute(q) - - try: - table_id = cur.fetchall()[0][0] - except Exception: - raise exc.NoSuchTableError(table_name) - - return table_id diff --git a/mindsdb/integrations/handlers/mongodb_handler/README.md b/mindsdb/integrations/handlers/mongodb_handler/README.md deleted file mode 100644 index eecadf83365..00000000000 --- a/mindsdb/integrations/handlers/mongodb_handler/README.md +++ /dev/null @@ -1,118 +0,0 @@ ---- -title: MongoDB -sidebarTitle: MongoDB ---- - -This documentation describes the integration of MindsDB with [MongoDB](https://www.mongodb.com/company/what-is-mongodb), a document database with the scalability and flexibility that you want with the querying and indexing that you need. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). - -## Connection - -Establish a connection to MongoDB from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE mongodb_datasource -WITH - ENGINE = 'mongodb', - PARAMETERS = { - "host": "mongodb+srv://admin:admin@demo.mongodb.net/public" - }; -``` - -Required connection parameters include the following: - -* `host`: The host name, IP address or connection string of the MongoDB server. - -Optional connection parameters include the following: - -* `username`: The username associated with the database. -* `password`: The password to authenticate your access. -* `port`: The port through which TCP/IP connection is to be made. -* `database`: The database name to be connected. This will be required if the connection string is missing the `/database` path. - -## Usage - -Retrieve data from a specified collection by providing the integration name and collection name: - -```sql -SELECT * -FROM mongodb_datasource.my_collection -LIMIT 10; -``` - - -The above examples utilize `mongodb_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - - -At the moment, this integration only supports `SELECT` and `UPDATE` queries. - - - -**For this connection, we strongly suggest using the Mongo API instead of the SQL API.** - -MindsDB has a dedicated [Mongo API](/sdks/mongo/mindsdb-mongo-ql-overview) that allows you to use the full power of the MindsDB platform. -Using the Mongo API feels more natural for MongoDB users and allows you to use all the features of MindsDB. - -You can find the instructions on how to connect MindsDB to [MongoDB Compass](/connect/mongo-compass) or [MongoDB Shell](/connect/mongo-shell) and proceed with the [Mongo API documentation](/sdks/mongo/mindsdb-mongo-ql-overview) for further details. - - - -Once you connected MindsDB to MongoDB Compass or MongoDB Shell, you can run this command to connect your database to MindsDB: - -```sql -test> use mindsdb -mindsdb> db.databases.insertOne({ - name: "mongo_datasource", - engine: "mongodb", - connection_args: { - "host": "mongodb+srv://user:pass@db.xxxyyy.mongodb.net/" - } - }); -``` - -Then you can query your data, like this: - -```sql -mindsdb> use mongo_datasource -mongo_datasource> db.demo.find({}).limit(3) -``` - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the MongoDB server. -* **Checklist**: - 1. Make sure the MongoDB server is active. - 2. Confirm that host and credentials provided are correct. Try a direct MongoDB connection using a client like MongoDB Compass. - 3. Ensure a stable network between MindsDB and MongoDB. For example, if you are using MongoDB Atlas, ensure that the IP address of the machine running MindsDB is whitelisted. - - - -`Unknown statement` - -* **Symptoms**: Errors related to the issuing of unsupported queries to MongoDB via the integration. -* **Checklist**: - 1. Ensure the query is a `SELECT` or `UPDATE` query. - - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing collection names containing special characters. -* **Checklist**: - 1. Ensure table names with special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel-data - * Incorrect: SELECT * FROM integration.'travel-data' - * Correct: SELECT * FROM integration.\`travel-data\` - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/mongodb_handler/__about__.py b/mindsdb/integrations/handlers/mongodb_handler/__about__.py deleted file mode 100644 index 7553dae467c..00000000000 --- a/mindsdb/integrations/handlers/mongodb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB MongoDB handler' -__package_name__ = 'mindsdb_mongodb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for MongoDB" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/mongodb_handler/__init__.py b/mindsdb/integrations/handlers/mongodb_handler/__init__.py deleted file mode 100644 index 9cf9d095e17..00000000000 --- a/mindsdb/integrations/handlers/mongodb_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .mongodb_handler import MongoDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - - -title = 'MongoDB' -name = 'mongodb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/mongodb_handler/connection_args.py b/mindsdb/integrations/handlers/mongodb_handler/connection_args.py deleted file mode 100644 index 6330719a035..00000000000 --- a/mindsdb/integrations/handlers/mongodb_handler/connection_args.py +++ /dev/null @@ -1,46 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - username={ - 'type': ARG_TYPE.STR, - 'description': 'The username used to authenticate with the MongoDB server.', - 'required': True, - 'label': 'User' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the MongoDB server.', - 'required': True, - 'label': 'Password', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the MongoDB server.', - 'required': False, - 'label': 'Database' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the MongoDB server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.', - 'required': True, - 'label': 'Host' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the MongoDB server. Must be an integer.', - 'required': True, - 'label': 'Port' - }, -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=27017, - username='mongo', - password='password', - database='database' -) diff --git a/mindsdb/integrations/handlers/mongodb_handler/icon.svg b/mindsdb/integrations/handlers/mongodb_handler/icon.svg deleted file mode 100644 index 2540bc69cc9..00000000000 --- a/mindsdb/integrations/handlers/mongodb_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/mongodb_handler/mongodb_handler.py b/mindsdb/integrations/handlers/mongodb_handler/mongodb_handler.py deleted file mode 100644 index 39e67ccfa20..00000000000 --- a/mindsdb/integrations/handlers/mongodb_handler/mongodb_handler.py +++ /dev/null @@ -1,358 +0,0 @@ -import re -import time -import threading - -from bson import ObjectId -from mindsdb_sql_parser.ast.base import ASTNode -import pandas as pd -import pymongo -from pymongo import MongoClient -from pymongo.errors import ServerSelectionTimeoutError, OperationFailure, ConfigurationError, InvalidURI -from typing import Text, List, Dict, Any, Union - -from mindsdb.integrations.handlers.mongodb_handler.utils.mongodb_query import MongoQuery -from mindsdb.integrations.handlers.mongodb_handler.utils.mongodb_parser import MongodbParser -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) -from mindsdb.utilities import log -from .utils.mongodb_render import MongodbRender - - -logger = log.getLogger(__name__) - - -class MongoDBHandler(DatabaseHandler): - """ - This handler handles the connection and execution of SQL statements on MongoDB. - """ - - _SUBSCRIBE_SLEEP_INTERVAL = 0.5 - name = "mongodb" - - def __init__(self, name: Text, **kwargs: Any) -> None: - """ - Initializes the handler. - - Args: - name (Text): The name of the handler instance. - kwargs: Arbitrary keyword arguments including the connection data. - """ - super().__init__(name) - connection_data = kwargs["connection_data"] - self.host = connection_data.get("host") - self.port = int(connection_data.get("port") or 27017) - self.user = connection_data.get("username") - self.password = connection_data.get("password") - self.database = connection_data.get("database") - self.flatten_level = connection_data.get("flatten_level", 0) - - self.connection = None - self.is_connected = False - - def __del__(self) -> None: - """ - Closes the connection when the handler instance is deleted. - """ - if self.is_connected: - self.disconnect() - - def connect(self) -> MongoClient: - """ - Establishes a connection to the MongoDB host. - - Raises: - ValueError: If the expected connection parameters are not provided. - - Returns: - pymongo.MongoClient: A connection object to the MongoDB host. - """ - kwargs = {} - if isinstance(self.user, str) and len(self.user) > 0: - kwargs["username"] = self.user - - if isinstance(self.password, str) and len(self.password) > 0: - kwargs["password"] = self.password - - if re.match(r"/?.*tls=true", self.host.lower()): - kwargs["tls"] = True - - if re.match(r"/?.*tls=false", self.host.lower()): - kwargs["tls"] = False - - try: - connection = MongoClient(self.host, port=self.port, **kwargs) - except InvalidURI as invalid_uri_error: - logger.error(f"Invalid URI provided for MongoDB connection: {invalid_uri_error}!") - raise - except ConfigurationError as config_error: - logger.error(f"Configuration error connecting to MongoDB: {config_error}!") - raise - except Exception as unknown_error: - logger.error(f"Unknown error connecting to MongoDB: {unknown_error}!") - raise - - # Get the database name from the connection if it's not provided. - if self.database is None: - self.database = connection.get_database().name - - self.is_connected = True - self.connection = connection - return self.connection - - def subscribe( - self, stop_event: threading.Event, callback: callable, table_name: Text, columns: List = None, **kwargs: Any - ) -> None: - """ - Subscribes to changes in a MongoDB collection and calls the provided callback function when changes occur. - - Args: - stop_event (threading.Event): An event object to stop the subscription. - callback (callable): The callback function to call when changes occur. - table_name (Text): The name of the collection to subscribe to. - columns (List): A list of columns to monitor for changes. - kwargs: Arbitrary keyword arguments. - """ - con = self.connect() - cur = con[self.database][table_name].watch() - - while True: - if stop_event.is_set(): - cur.close() - return - - res = cur.try_next() - if res is None: - time.sleep(self._SUBSCRIBE_SLEEP_INTERVAL) - continue - - _id = res["documentKey"]["_id"] - if res["operationType"] == "insert": - if columns is not None: - updated_columns = set(res["fullDocument"].keys()) - if not set(columns) & set(updated_columns): - # Do nothing. - continue - - callback(row=res["fullDocument"], key={"_id": _id}) - - if res["operationType"] == "update": - if columns is not None: - updated_columns = set(res["updateDescription"]["updatedFields"].keys()) - if not set(columns) & set(updated_columns): - # Do nothing. - continue - - # Get the full document. - full_doc = con[self.database][table_name].find_one(res["documentKey"]) - callback(row=full_doc, key={"_id": _id}) - - def disconnect(self) -> None: - """ - Closes the connection to the MongoDB host if it's currently open. - """ - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the MongoDB host. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - con = self.connect() - con.server_info() - - # Check if the database exists. - if self.database not in con.list_database_names(): - raise ValueError(f"Database {self.database} not found!") - - response.success = True - except ( - InvalidURI, - ServerSelectionTimeoutError, - OperationFailure, - ConfigurationError, - ValueError, - ) as known_error: - logger.error(f"Error connecting to MongoDB {self.database}, {known_error}!") - response.error_message = str(known_error) - except Exception as unknown_error: - logger.error(f"Unknown error connecting to MongoDB {self.database}, {unknown_error}!") - response.error_message = str(unknown_error) - - if response.success and need_to_close: - self.disconnect() - - elif not response.success and self.is_connected: - self.is_connected = False - - return response - - def native_query(self, query: Union[Text, Dict, MongoQuery]) -> Response: - """ - Executes a SQL query on the MongoDB host and returns the result. - - Args: - query (str): The SQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - if isinstance(query, str): - query = MongodbParser().from_string(query) - - if isinstance(query, dict): - # Fallback for the previous API. - mquery = MongoQuery(query["collection"]) - - for c in query["call"]: - mquery.add_step({"method": c["method"], "args": c["args"]}) - - query = mquery - - collection = query.collection - database = self.database - - con = self.connect() - - # Check if the collection exists. - if collection not in con[database].list_collection_names(): - return Response( - RESPONSE_TYPE.ERROR, error_message=f"Collection {collection} not found in database {database}!" - ) - - try: - cursor = con[database][collection] - - for step in query.pipeline: - fnc = getattr(cursor, step["method"]) - cursor = fnc(*step["args"]) - - result = [] - if not isinstance(cursor, pymongo.results.UpdateResult): - for row in cursor: - result.append(self.flatten(row, level=self.flatten_level)) - - else: - return Response(RESPONSE_TYPE.OK) - - if len(result) > 0: - df = pd.DataFrame(result) - else: - columns = list(self.get_columns(collection).data_frame.Field) - df = pd.DataFrame([], columns=columns) - - response = Response(RESPONSE_TYPE.TABLE, df) - except Exception as e: - logger.error(f"Error running query: {query} on {self.database}.{collection}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - return response - - def flatten(self, row: Dict, level: int = 0) -> Dict: - """ - Flattens a nested dictionary to a single level. - - Args: - row (Dict): The dictionary to flatten. - level (int): The number of levels to flatten. If 0, the entire dictionary is flattened. - - Returns: - Dict: The flattened dictionary. - """ - add = {} - del_keys = [] - edit_keys = {} - - for k, v in row.items(): - # Convert ObjectId to string. - if isinstance(v, ObjectId): - edit_keys[k] = str(v) - if level > 0: - if isinstance(v, dict): - for k2, v2 in self.flatten(v, level=level - 1).items(): - add[f"{k}.{k2}"] = v2 - del_keys.append(k) - - if add: - row.update(add) - for key in del_keys: - del row[key] - if edit_keys: - row.update(edit_keys) - - return row - - def query(self, query: ASTNode) -> Response: - """ - Executes a SQL query represented by an ASTNode on the MongoDB host and retrieves the data. - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. - """ - renderer = MongodbRender() - mquery = renderer.to_mongo_query(query) - return self.native_query(mquery) - - def get_tables(self) -> Response: - """ - Retrieves a list of all non-system tables (collections) in the MongoDB host. - - Returns: - Response: A response object containing a list of tables (collections) in the MongoDB host. - """ - con = self.connect() - collections = con[self.database].list_collection_names() - collections_ar = [[i] for i in collections] - df = pd.DataFrame(collections_ar, columns=["table_name"]) - - response = Response(RESPONSE_TYPE.TABLE, df) - - return response - - def get_columns(self, table_name: Text) -> Response: - """ - Retrieves column (field) details for a specified table (collection) in the MongoDB host. - The first record in the collection is used to determine the column details. - - Args: - table_name (Text): The name of the table (collection) for which to retrieve column (field) information. - - Raises: - ValueError: If the 'table_name' is not a valid string. - - Returns: - Response: A response object containing the column details. - """ - if not table_name or not isinstance(table_name, str): - raise ValueError("Invalid table name provided.") - - con = self.connect() - record = con[self.database][table_name].find_one() - - data = [] - if record is not None: - record = self.flatten(record) - - for k, v in record.items(): - data.append([k, type(v).__name__]) - - df = pd.DataFrame(data, columns=["Field", "Type"]) - - response = Response(RESPONSE_TYPE.TABLE, df) - return response diff --git a/mindsdb/integrations/handlers/mongodb_handler/requirements.txt b/mindsdb/integrations/handlers/mongodb_handler/requirements.txt deleted file mode 100644 index 8018841143e..00000000000 --- a/mindsdb/integrations/handlers/mongodb_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pymongo == 4.8.0 diff --git a/mindsdb/integrations/handlers/mongodb_handler/tests/__init__.py b/mindsdb/integrations/handlers/mongodb_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/mongodb_handler/tests/seed.json b/mindsdb/integrations/handlers/mongodb_handler/tests/seed.json deleted file mode 100644 index b8a99c75194..00000000000 --- a/mindsdb/integrations/handlers/mongodb_handler/tests/seed.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "col_one": 1, - "col_two": -1, - "col_three": 0.1, - "col_four": "A" - }, - { - "col_one": 2, - "col_two": -2, - "col_three": 0.2, - "col_four": "B" - }, - { - "col_one": 3, - "col_two": -3, - "col_three": 0.3, - "col_four": "C" - } -] \ No newline at end of file diff --git a/mindsdb/integrations/handlers/mongodb_handler/tests/test_mongodb_handler.py b/mindsdb/integrations/handlers/mongodb_handler/tests/test_mongodb_handler.py deleted file mode 100644 index 23b605245f1..00000000000 --- a/mindsdb/integrations/handlers/mongodb_handler/tests/test_mongodb_handler.py +++ /dev/null @@ -1,125 +0,0 @@ -import pytest -import json -from pymongo import MongoClient - -from mindsdb_sql_parser.ast import Identifier, Select, Star -from mindsdb.integrations.handlers.mongodb_handler.mongodb_handler import MongoDBHandler -from mindsdb.integrations.libs.response import RESPONSE_TYPE - - -HANDLER_KWARGS = { - "connection_data": { - "host": "127.0.0.1", - "port": "27017", - "username": "test_user", - "password": "supersecret", - "database": "mongo_test_db", - } -} - -expected_columns = ["_id", "col_one", "col_two", "col_three", "col_four"] - - -def seed_db(): - """Seed the test DB with some data""" - creds = HANDLER_KWARGS["connection_data"] - uri = f"mongodb://{creds['username']}:{creds['password']}@{creds['host']}" - conn = MongoClient(uri) - db = conn[HANDLER_KWARGS["connection_data"]["database"]] # noqa - - with open("mindsdb/integrations/handlers/mongodb_handler/tests/seed.json", "r") as f: - json.load(f) - conn.close() - - -@pytest.fixture(scope="module") -def handler(request): - seed_db() - handler = MongoDBHandler("mongo_handler", **HANDLER_KWARGS) - return handler - - -def check_valid_response(res): - if res.resp_type == RESPONSE_TYPE.TABLE: - assert res.data_frame is not None, "expected to have some data, but got None" - assert ( - res.error_code == 0 - ), f"expected to have zero error_code, but got {res.error_code}" - assert ( - res.error_message is None - ), f"expected to have None in error message, but got {res.error_message}" - - -""" TESTS """ - -# TODO - Subscribe - - -class TestMongoDBConnection: - def test_connect(self, handler): - handler.connect() - assert handler.is_connected, "connection error" - - def test_check_connection(self, handler): - res = handler.check_connection() - assert res.success is True, res.error_message - - -# TODO - Subscribe - - -class TestMongoDBQuery: - def test_native_query(self, handler): - query_string = "db.test.find()" - response = handler.native_query(query_string) - dbs = response.data_frame - assert dbs is not None, "expected to get some data, but got None" - assert "col_one" in dbs, f"expected to get 'col_one' column in response:\n{dbs}" - - def test_select_query(self, handler): - limit = 3 - query = Select( - targets=[Star()], - from_table=Identifier(parts=["test"]), - ) - res = handler.query(query) - check_valid_response(res) - got_rows = res.data_frame.shape[0] - want_rows = limit - assert ( - got_rows == want_rows - ), f"expected to have {want_rows} rows in response but got: {got_rows}" - - -class TestMongoDBTables: - def test_get_tables(self, handler): - res = handler.get_tables() - tables = res.data_frame - test_table = list(tables["table_name"]) - assert ( - tables is not None - ), "expected to have some table_name in the db, but got None" - assert ( - "table_name" in tables - ), f"expected to get 'table_name' column in the response:\n{tables}" - # get a specific table from the tables list - assert ( - "test" in test_table - ), f"expected to have 'test' table in the db but got: {test_table}" - - -class TestMongoDBColumns: - def test_get_columns(self, handler): - described = handler.get_columns("test") - describe_data = described.data_frame - check_valid_response(described) - got_columns = list(describe_data.iloc[:, 0]) - assert ( - got_columns == expected_columns - ), f"expected to have next columns in test table:\n{expected_columns}\nbut got:\n{got_columns}" - - -class TestMongoDBDisconnect: - def test_disconnect(self, handler): - handler.disconnect() - assert handler.is_connected is False, "failed to disconnect" diff --git a/mindsdb/integrations/handlers/mongodb_handler/utils/__init__.py b/mindsdb/integrations/handlers/mongodb_handler/utils/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/mongodb_handler/utils/mongodb_ast.py b/mindsdb/integrations/handlers/mongodb_handler/utils/mongodb_ast.py deleted file mode 100644 index 6cd38453f29..00000000000 --- a/mindsdb/integrations/handlers/mongodb_handler/utils/mongodb_ast.py +++ /dev/null @@ -1,232 +0,0 @@ -import re -import ast as py_ast -import typing as t - -from mindsdb_sql_parser.ast import OrderBy, Identifier, Star, Select, Constant, BinaryOperation, Tuple, Latest - - -class MongoToAst: - """ - Converts query mongo to AST format - """ - - def from_mongoqeury(self, query): - # IS NOT USED YET AND NOT FINISHED - - collection = query.collection - - filter, projection = None, None - sort, limit, skip = None, None, None - for step in query.pipeline: - if step["method"] == "find": - filter = step["args"][0] - if len(step) > 1: - projection = step["args"][1] - elif step["method"] == "sort": - sort = step["args"][0] - elif step["method"] == "limit": - limit = step["args"][0] - elif step["method"] == "skip": - skip = step["args"][0] - - return self.find(collection, filter=filter, sort=sort, projection=projection, limit=limit, skip=skip) - - def find( - self, collection: t.Union[list, str], filter=None, sort=None, projection=None, limit=None, skip=None, **kwargs - ): - # https://www.mongodb.com/docs/v4.2/reference/method/db.collection.find/ - - order_by = None - if sort is not None: - # sort is dict - order_by = [] - for col, direction in sort.items(): - order_by.append(OrderBy(field=Identifier(parts=[col]), direction="DESC" if direction == -1 else "ASC")) - - if projection is not None: - targets = [] - for col, alias in projection.items(): - # it is only identifiers - if isinstance(alias, str): - alias = Identifier(parts=[alias]) - else: - alias = None - targets.append(Identifier(path_str=col, alias=alias)) - else: - targets = [Star()] - - where = None - if filter is not None: - where = self.convert_filter(filter) - - # convert to AST node - # collection can be string or list - if isinstance(collection, list): - collection = Identifier(parts=collection) - else: - collection = Identifier(path_str=collection) - - node = Select( - targets=targets, - from_table=collection, - where=where, - order_by=order_by, - ) - if limit is not None: - node.limit = Constant(value=limit) - - if skip is not None and skip != 0: - node.offset = Constant(value=skip) - - return node - - def convert_filter(self, filter): - cond_ops = { - "$and": "and", - "$or": "or", - } - - ast_filter = None - for k, v in filter.items(): - if k in ("$or", "$and"): - # suppose it is one key in dict - - op = cond_ops[k] - - nodes = [] - for cond in v: - nodes.append(self.convert_filter(cond)) - - if len(nodes) == 1: - return nodes[0] - - # compose as tree - arg1 = nodes[0] - for node in nodes[1:]: - arg1 = BinaryOperation(op=op, args=[arg1, node]) - - return arg1 - if k in ("$where", "$expr"): - # try to parse simple expression like 'this.saledate > this.latest' - return MongoWhereParser(v).to_ast() - - # is filter - arg1 = Identifier(parts=[k]) - - op, value = self.handle_filter(v) - arg2 = Constant(value=value) - ast_com = BinaryOperation(op=op, args=[arg1, arg2]) - if ast_filter is None: - ast_filter = ast_com - else: - ast_filter = BinaryOperation(op="and", args=[ast_filter, ast_com]) - return ast_filter - - def handle_filter(self, value): - ops = {"$ge": ">=", "$gt": ">", "$lt": "<", "$le": "<=", "$ne": "!=", "$eq": "="} - in_ops = {"$in": "in", "$nin": "not in"} - - if isinstance(value, dict): - key, value = list(value.items())[0] - if key in ops: - op = ops[key] - return op, value - - if key in in_ops: - op = in_ops[key] - if not isinstance(value, list): - raise NotImplementedError(f"Unknown type {key}, {value}") - value = Tuple(value) - - return op, value - - raise NotImplementedError(f"Unknown type {key}") - - elif isinstance(value, list): - raise NotImplementedError(f"Unknown filter {value}") - else: - # is simple type - op = "=" - value = value - return op, value - - -class MongoWhereParser: - def __init__(self, query): - self.query = query - - def to_ast(self): - # parse as python string - # replace '=' with '==' - query = re.sub(r"([^=><])=([^=])", r"\1==\2", self.query) - - tree = py_ast.parse(query, mode="eval") - return self.process(tree.body) - - def process(self, node): - if isinstance(node, py_ast.BoolOp): - # is AND or OR - op = node.op.__class__.__name__ - # values can be more than 2 - arg1 = self.process(node.values[0]) - for val1 in node.values[1:]: - arg2 = self.process(val1) - arg1 = BinaryOperation(op=op, args=[arg1, arg2]) - - return arg1 - - if isinstance(node, py_ast.Compare): - # it is - if len(node.ops) != 1: - raise NotImplementedError(f"Multiple ops {node.ops}") - op = self.compare_op(node.ops[0]) - arg1 = self.process(node.left) - arg2 = self.process(node.comparators[0]) - return BinaryOperation(op=op, args=[arg1, arg2]) - - if isinstance(node, py_ast.Name): - # is special operator: latest, ... - if node.id == "latest": - return Latest() - - if isinstance(node, py_ast.Constant): - # it is constant - return Constant(value=node.value) - - # ---- python 3.7 objects ----- - if isinstance(node, py_ast.Str): - return Constant(value=node.s) - - if isinstance(node, py_ast.Num): - return Constant(value=node.n) - - # ----------------------------- - - if isinstance(node, py_ast.Attribute): - # is 'this.field' - is attribute - if node.value.id != "this": - raise NotImplementedError(f"Unknown variable {node.value.id}") - return Identifier(parts=[node.attr]) - - raise NotImplementedError(f"Unknown node {node}") - - def compare_op(self, op): - opname = op.__class__.__name__ - - # TODO: in, not - - ops = { - "Eq": "=", - "NotEq": "!=", - "Gt": ">", - "Lt": "<", - "GtE": ">=", - "LtE": "<=", - } - if opname not in ops: - raise NotImplementedError(f"Unknown $where op: {opname}") - return ops[opname] - - @staticmethod - def test(cls): - assert cls('this.a ==1 and "te" >= latest').to_string() == "a = 1 AND 'te' >= LATEST" diff --git a/mindsdb/integrations/handlers/mongodb_handler/utils/mongodb_parser.py b/mindsdb/integrations/handlers/mongodb_handler/utils/mongodb_parser.py deleted file mode 100644 index a4025133070..00000000000 --- a/mindsdb/integrations/handlers/mongodb_handler/utils/mongodb_parser.py +++ /dev/null @@ -1,137 +0,0 @@ -import ast as py_ast - -import dateutil.parser -from bson import ObjectId - -from .mongodb_query import MongoQuery - - -class MongodbParser: - """ - Converts string into MongoQuery - """ - - def from_string(self, call_str): - tree = py_ast.parse(call_str.strip(), mode="eval") - calls = self.process(tree.body) - # first call contents collection - method1 = calls[0]["method"] - if len(method1) < 2: - raise IndexError("Collection not found") - collection = method1[-2] - - mquery = MongoQuery(collection) - - # keep only last name - calls[0]["method"] = [method1[-1]] - - # convert method names: get first item of list - for c in calls: - mquery.add_step({"method": c["method"][0], "args": c["args"]}) - - return mquery - - def process(self, node): - if isinstance(node, py_ast.Call): - previous_call = None - - args = [] - for node2 in node.args: - args.append(self.process(node2)) - - # check functions - if isinstance(node.func, py_ast.Name): - # it is just name - func = node.func.id - - # special functions: - if func == "ISODate": - return dateutil.parser.isoparse(args[0]) - if func == "ObjectId": - return ObjectId(args[0]) - elif isinstance(node.func, py_ast.Attribute): - # it can be an attribute or pipeline - previous_call, func = self.process_func_name(node.func) - else: - raise NotImplementedError(f"Unknown function type: {node.func}") - - call = [{"method": func, "args": args}] - if previous_call is not None: - call = previous_call + call - - return call - - if isinstance(node, py_ast.List): - elements = [] - for node2 in node.elts: - elements.append(self.process(node2)) - return elements - - if isinstance(node, py_ast.Dict): - keys = [] - for node2 in node.keys: - if isinstance(node2, py_ast.Constant): - value = node2.value - elif isinstance(node2, py_ast.Str): # py37 - value = node2.s - elif isinstance(node2, py_ast.Name): - value = node2.id - else: - raise NotImplementedError(f"Unknown dict key {node2}") - - keys.append(value) - - values = [] - for node2 in node.values: - values.append(self.process(node2)) - - return dict(zip(keys, values)) - - if isinstance(node, py_ast.Name): - # special attributes - name = node.id - if name == "true": - return True - elif name == "false": - return False - elif name == "null": - return None - - if isinstance(node, py_ast.Constant): - return node.value - - # ---- python 3.7 objects ----- - if isinstance(node, py_ast.Str): - return node.s - - if isinstance(node, py_ast.Num): - return node.n - - # ----------------------------- - - if isinstance(node, py_ast.UnaryOp): - if isinstance(node.op, py_ast.USub): - value = self.process(node.operand) - return -value - - raise NotImplementedError(f"Unknown node {node}") - - def process_func_name(self, node): - previous_call = None - if isinstance(node, py_ast.Attribute): - attribute = node - # multilevel attribute - - obj_name = [] - while isinstance(attribute, py_ast.Attribute): - obj_name.insert(0, attribute.attr) - attribute = attribute.value - - if isinstance(attribute, py_ast.Name): - obj_name.insert(0, attribute.id) - - if isinstance(attribute, py_ast.Call): - # is pipeline - previous_call = self.process(attribute) - - return previous_call, obj_name diff --git a/mindsdb/integrations/handlers/mongodb_handler/utils/mongodb_query.py b/mindsdb/integrations/handlers/mongodb_handler/utils/mongodb_query.py deleted file mode 100644 index 8a50b85d8fc..00000000000 --- a/mindsdb/integrations/handlers/mongodb_handler/utils/mongodb_query.py +++ /dev/null @@ -1,78 +0,0 @@ -import datetime as dt -import json -from bson import ObjectId - - -class MongoJSONEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, dt.datetime): - return f"ISODate({obj.isoformat()})" - if isinstance(obj, ObjectId): - return f"ObjectId({str(obj)})" - return super(MongoJSONEncoder, self).default(obj) - - -class MongoQuery: - def __init__(self, collection, pipline=None): - self.collection = collection - self.pipeline = [] - - if pipline is None: - pipline = [] - for step in pipline: - self.add_step(step) - - def add_step(self, step): - # step = { - # 'method': 'sort', - # 'args': [{c: 3}] - # } - - if "method" not in step or "args" not in step or not isinstance(step["args"], list): - raise AttributeError(f"Wrong step {step}") - - self.pipeline.append(step) - - def to_string(self): - return self.__str__() - - def __getattr__(self, item): - # return callback to save step of pipeline - def fnc(*args): - self.pipeline.append({"method": item, "args": args}) - - return fnc - - def __str__(self): - """ - converts call to string - - { - 'collection': 'fish', - 'call': [ // call is sequence of methods - { - 'method': 'find', - 'args': [{a:1}, {b:2}] - }, - { - 'method': 'sort', - 'args': [{c:3}] - }, - ] - } - - to: - - "db_test.fish.find({a:1}, {b:2}).sort({c:3})" - """ - - call_str = f"db.{self.collection}" - for step in self.pipeline: - args_str = [] - for arg in step["args"]: - args_str.append(MongoJSONEncoder().encode(arg)) - call_str += f".{step['method']}({','.join(args_str)})" - return call_str - - def __repr__(self): - return f"MongoQuery({self.collection}, {str(self.pipeline)})" diff --git a/mindsdb/integrations/handlers/ms_teams_handler/README.md b/mindsdb/integrations/handlers/ms_teams_handler/README.md deleted file mode 100644 index 7121c534fe5..00000000000 --- a/mindsdb/integrations/handlers/ms_teams_handler/README.md +++ /dev/null @@ -1,105 +0,0 @@ ---- -title: Microsoft Teams -sidebarTitle: Microsoft Teams ---- - -This documentation describes the integration of MindsDB with [Microsoft Teams](https://www.microsoft.com/en-us/microsoft-teams/group-chat-software), the ultimate messaging app for your organization. -The integration allows MindsDB to access data from Microsoft Teams and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Microsoft Teams to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to Microsoft Teams from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/ms_teams_handler) as an engine. - -```sql -CREATE DATABASE teams_datasource -WITH ENGINE = 'teams', -PARAMETERS = { - "client_id": "12345678-90ab-cdef-1234-567890abcdef", - "client_secret": "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6", - "tenant_id": "abcdef12-3456-7890-abcd-ef1234567890" -}; -``` - -Required connection parameters include the following: - -* `client_id`: The client ID of the registered Microsoft Entra ID application. -* `client_secret`: The client secret of the registered Microsoft Entra ID application. -* `tenant_id`: The tenant ID of the Microsoft Entra ID directory. - -Optional connection parameters include the following: - -* `permission_mode`: The type of permissions used to access data in Microsoft Teams. Can be either `delegated` (default) or `application`. - - -The `delegated` permission mode requires user sign-in and allows the app to access data on behalf of the signed-in user. The `application` permission mode does not require user sign-in and allows the app to access data without a user context. You can learn more about permission types in the [Microsoft Graph permissions documentation](https://learn.microsoft.com/en-us/graph/auth/auth-concepts#delegated-and-application-permissions). -Note that application permissions generally require higher privileges and admin consent compared to delegated permissions, as they allow broader access to organizational data without user context. - - - -Microsoft Entra ID was previously known as Azure Active Directory (Azure AD). - - -### How to set up the Microsoft Entra ID app registration - -Follow the instructions below to set up the Microsoft Teams app that will be used to connect with MindsDB. - - - - - Navigate to Microsoft Entra ID in the Azure portal, click on *Add* and then on *App registration*. - - Click on *New registration* and fill out the *Name* and select the `Accounts in any organizational directory (Any Azure AD directory - Multitenant)` option under *Supported account types*. - - If you chose the `application` permission mode you may skip this step, but if you are using `delegated` permissions, select `Web` as the platform and enter URL where MindsDB has been deployed followed by /verify-auth under *Redirect URI*. For example, if you are running MindsDB locally (on https://localhost:47334), enter https://localhost:47334/verify-auth in the Redirect URIs field. - - Click on *Register*. **Save the *Application (client) ID* and *Directory (tenant) ID* for later use.** - - Click on *API Permissions* and then click on *Add a permission*. - - Select *Microsoft Graph* and then click on either *Delegated permissions* or *Application permissions* based on the permission mode you have chosen. - - Search for the following permissions and select them: - - `delegated` permission mode: - - Team.ReadBasic.All - - Channel.ReadBasic.All - - ChannelMessage.Read.All - - Chat.Read - - `application` permission mode: - - Group.Read.All - - ChannelMessage.Read.All - - Chat.Read.All - - Click on **Add permissions**. - - Request an administrator to grant consent for the above permissions. If you are the administrator, click on **Grant admin consent for [your organization]** and then click on **Yes**. - - Click on *Certificates & secrets* under *Manage*. - - Click on *New client secret* and fill out the *Description* and select an appropriate *Expires* period, and click on *Add*. - - Copy and **save the client secret in a secure location.** - - If you already have an existing app registration, you can use it instead of creating a new one and skip the above steps. - - - - - Open the MindsDB editor and create a connection to Microsoft Teams using the client ID, client secret and tenant ID obtained in the previous steps. Use the `CREATE DATABASE` statement as shown above. - - - -## Usage - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM teams_datasource.table_name -LIMIT 10; -``` - - -The above example utilize `teams_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Supported Tables - -* `teams`: The table containing information about the teams in Microsoft Teams. -* `channels`: The table containing information about the channels in Microsoft Teams. -* `channel_messages`: The table containing information about messages from channels in Microsoft Teams. -* `chats`: The table containing information about the chats in Microsoft Teams. -* `chat_messages`: The table containing information about messages from chats in Microsoft Teams. diff --git a/mindsdb/integrations/handlers/ms_teams_handler/__about__.py b/mindsdb/integrations/handlers/ms_teams_handler/__about__.py deleted file mode 100644 index d16e1c36ad8..00000000000 --- a/mindsdb/integrations/handlers/ms_teams_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Microsoft Teams handler" -__package_name__ = "mindsdb_ms_teams_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Microsoft Teams" -__author__ = 'MindsDB Inc' -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/ms_teams_handler/__init__.py b/mindsdb/integrations/handlers/ms_teams_handler/__init__.py deleted file mode 100644 index 69cea558412..00000000000 --- a/mindsdb/integrations/handlers/ms_teams_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description - -try: - from .ms_teams_handler import MSTeamsHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Microsoft Teams" -name = "teams" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY - -__all__ = [ - "Handler", - "version", - "name", - "type", - "support_level", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/ms_teams_handler/icon.svg b/mindsdb/integrations/handlers/ms_teams_handler/icon.svg deleted file mode 100644 index 086d3615747..00000000000 --- a/mindsdb/integrations/handlers/ms_teams_handler/icon.svg +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py b/mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py deleted file mode 100644 index 511b9d1a7b8..00000000000 --- a/mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py +++ /dev/null @@ -1,501 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Text, List, Dict - -from requests.exceptions import RequestException - -from mindsdb.integrations.utilities.handlers.api_utilities.microsoft.ms_graph_api_utilities import MSGraphAPIBaseClient -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class MSGraphAPITeamsClient(MSGraphAPIBaseClient, ABC): - """ - The base class for the Microsoft Graph API client for the Microsoft Teams handler. - """ - - def check_connection(self) -> bool: - """ - Check if the connection to Microsoft Teams is established. - - Returns: - bool: True if the connection is established, False otherwise. - """ - try: - self._get_all_groups() - return True - except RequestException as request_error: - logger.error(f"Failed to check connection to Microsoft Teams: {request_error}") - return False - - def get_teams(self) -> List[Dict]: - """ - Get teams from Microsoft Teams. - - Returns: - List[Dict]: The teams data. - """ - return self._get_all_groups() - - def get_channels(self, group_id: Text = None, channel_ids: List[Text] = None) -> List[Dict]: - """ - Get channels from Microsoft Teams. - - Args: - group_id (Text): The ID of the group that the channels belong to. - channel_ids (List[Text]): The IDs of the channels. - - Returns: - List[Dict]: The channels data. - """ - if group_id and channel_ids: - return self._get_channels_in_group_by_ids(group_id, channel_ids) - elif group_id: - return self._get_all_channels_in_group(group_id) - elif channel_ids: - return self._get_channels_across_all_groups_by_ids(channel_ids) - else: - return self._get_all_channels_across_all_groups() - - def get_channel_messages(self, group_id: Text, channel_id: Text, message_ids: List[Text] = None) -> List[Dict]: - """ - Get channel messages from Microsoft Teams. - - Args: - group_id (Text): The ID of the group that the channel belongs to. - channel_id (Text): The ID of the channel that the messages belong to. - message_ids (List[Text]): The IDs of the messages. - - Returns: - List[Dict]: The messages data. - """ - if message_ids: - return self._get_messages_in_channel_by_ids(group_id, channel_id, message_ids) - else: - return self._get_all_messages_in_channel(group_id, channel_id) - - def get_chats(self, chat_ids: List[Text] = None) -> List[Dict]: - """ - Get chats from Microsoft Teams. - - Args: - chat_ids (List[Text]): The IDs of the chats. - - Returns: - List[Dict]: The chats data. - """ - if chat_ids: - return self._get_chats_by_ids(chat_ids) - else: - return self._get_all_chats() - - def get_chat_messages(self, chat_id: Text, message_ids: List[Text] = None) -> List[Dict]: - """ - Get chat messages from Microsoft Teams. - - Args: - chat_id (Text): The ID of the chat that the messages belong to. - message_ids (List[Text]): The IDs of the messages. - - Returns: - List[Dict]: The messages data. - """ - if message_ids: - return self._get_messages_in_chat_by_ids(chat_id, message_ids) - else: - return self._get_all_messages_in_chat(chat_id) - - def _get_all_group_ids(self) -> List[Text]: - """ - Get all group IDs related to Microsoft Teams. - - Returns: - List[Text]: The group IDs. - """ - if not self._group_ids: - groups = self._get_all_groups() - self._group_ids = [group["id"] for group in groups] - - return self._group_ids - - @abstractmethod - def _get_all_groups(self) -> List[Dict]: - """ - Get all groups related to Microsoft Teams. - - Returns: - List[Dict]: The groups data. - """ - pass - - @abstractmethod - def _get_chat_by_id(self, chat_id: Text) -> Dict: - """ - Get a chat by its ID. - - Args: - chat_id (Text): The ID of the chat. - - Returns: - Dict: The chat data. - """ - pass - - @abstractmethod - def _get_all_chats(self, limit: int = None) -> List[Dict]: - """ - Get all chats related to Microsoft Teams. - - Args: - limit (int): The maximum number of chats to return. - - Returns: - List[Dict]: The chats data. - """ - pass - - @abstractmethod - def _get_message_in_chat_by_id(self, chat_id: Text, message_id: Text) -> Dict: - """ - Get a message by its ID and the ID of the chat that it belongs to. - - Args: - chat_id (Text): The ID of the chat that the message belongs to. - message_id (Text): The ID of the message. - - Returns: - Dict: The message data. - """ - pass - - @abstractmethod - def _get_all_messages_in_chat(self, chat_id: Text, limit: int = None) -> List[Dict]: - """ - Get messages of a chat by its ID. - - Args: - chat_id (Text): The ID of the chat. - - Returns: - List[Dict]: The messages data. - """ - pass - - def _get_channel_in_group_by_id(self, group_id: Text, channel_id: Text) -> Dict: - """ - Get a channel by its ID and the ID of the group that it belongs to. - - Args: - group_id (Text): The ID of the group that the channel belongs to. - channel_id (Text): The ID of the channel. - - Returns: - Dict: The channel data. - """ - channel = self.fetch_data_json(f"teams/{group_id}/channels/{channel_id}") - # Add the group ID to the channel data. - channel.update({"teamId": group_id}) - - return channel - - def _get_channels_in_group_by_ids(self, group_id: Text, channel_ids: List[Text]) -> List[Dict]: - """ - Get channels by their IDs and the ID of the group that they belong to. - - Args: - group_id (Text): The ID of the group that the channels belong to. - channel_ids (List[Text]): The IDs of the channels. - - Returns: - List[Dict]: The channels data. - """ - channels = [] - for channel_id in channel_ids: - channels.append(self._get_channel_in_group_by_id(group_id, channel_id)) - - return channels - - def _get_all_channels_in_group(self, group_id: Text) -> List[Dict]: - """ - Get all channels of a group by its ID. - - Args: - group_id (Text): The ID of the group. - - Returns: - List[Dict]: The channels data. - """ - channels = self.fetch_data_json(f"teams/{group_id}/channels") - for channel in channels: - channel["teamId"] = group_id - - return channels - - def _get_all_channels_across_all_groups(self) -> List[Dict]: - """ - Get all channels across all groups related to Microsoft Teams. - - Returns: - List[Dict]: The channels data. - """ - channels = [] - for group_id in self._get_all_group_ids(): - channels += self._get_all_channels_in_group(group_id) - - return channels - - def _get_channels_across_all_groups_by_ids(self, channel_ids: List[Text]) -> List[Dict]: - """ - Get channels by their IDs. - - Args: - channel_ids (List[Text]): The IDs of the channels. - - Returns: - List[Dict]: The channels data. - """ - channels = self._get_all_channels_across_all_groups() - - return [channel for channel in channels if channel["id"] in channel_ids] - - def _get_message_in_channel_by_id(self, group_id: Text, channel_id: Text, message_id: Text) -> Dict: - """ - Get a message by its ID, the ID of the group that it belongs to, and the ID of the channel that it belongs to. - - Args: - group_id (Text): The ID of the group that the channel belongs to. - channel_id (Text): The ID of the channel that the message belongs to. - message_id (Text): The ID of the message. - - Returns: - Dict: The message data. - """ - return self.fetch_data_json(f"teams/{group_id}/channels/{channel_id}/messages/{message_id}") - - def _get_messages_in_channel_by_ids(self, group_id: Text, channel_id: Text, message_ids: List[Text]) -> List[Dict]: - """ - Get messages by their IDs, the ID of the group that they belong to, and the ID of the channel that they belong to. - - Args: - group_id (Text): The ID of the group that the channel belongs to. - channel_id (Text): The ID of the channel that the messages belong to. - message_ids (List[Text]): The IDs of the messages. - - Returns: - List[Dict]: The messages data. - """ - messages = [] - for message_id in message_ids: - messages.append(self._get_message_in_channel_by_id(group_id, channel_id, message_id)) - - return messages - - def _get_all_messages_in_channel(self, group_id: Text, channel_id: Text, limit: int = None) -> List[Dict]: - """ - Get messages of a channel by its ID and the ID of the group that it belongs to. - - Args: - group_id (Text): The ID of the group that the channel belongs to. - channel_id (Text): The ID of the channel. - - Returns: - List[Dict]: The messages data. - """ - messages = [] - for messages_batch in self.fetch_paginated_data(f"teams/{group_id}/channels/{channel_id}/messages"): - messages += messages_batch - - if limit and len(messages) >= limit: - break - - return messages[:limit] - - def _get_chats_by_ids(self, chat_ids: List[Text]) -> List[Dict]: - """ - Get chats by their IDs. - - Args: - chat_ids (List[Text]): The IDs of the chats. - - Returns: - List[Dict]: The chats data. - """ - chats = [] - for chat_id in chat_ids: - chats.append(self._get_chat_by_id(chat_id)) - - return chats - - def _get_messages_in_chat_by_ids(self, chat_id: Text, message_ids: List[Text]) -> List[Dict]: - """ - Get messages by their IDs and the ID of the chat that they belong to. - - Args: - chat_id (Text): The ID of the chat that the messages belong to. - message_ids (List[Text]): The IDs of the messages. - - Returns: - List[Dict]: The messages data. - """ - messages = [] - for message_id in message_ids: - messages.append(self._get_message_in_chat_by_id(chat_id, message_id)) - - return messages - - -class MSGraphAPITeamsApplicationPermissionsClient(MSGraphAPITeamsClient): - """ - The Microsoft Graph API client for the Microsoft Teams handler with application permissions. - This client is used for accessing the Microsoft Teams specific endpoints of the Microsoft Graph API. - Several common methods for submitting requests, fetching data, etc. are inherited from the base class. - """ - - def _get_all_groups(self) -> List[Dict]: - """ - Get all groups related to Microsoft Teams. - - Returns: - List[Dict]: The groups data. - """ - return self.fetch_data_json( - "/groups", - params={"$filter": "resourceProvisioningOptions/Any(x:x eq 'Team')"} - ) - - def _get_chat_by_id(self, chat_id: Text) -> Dict: - """ - Get a chat by its ID. - - Args: - chat_id (Text): The ID of the chat. - - Returns: - Dict: The chat data. - """ - return self.fetch_data_json(f"chats/{chat_id}") - - def _get_all_chats(self, limit: int = None) -> List[Dict]: - """ - Get all chats related to Microsoft Teams. - - Args: - limit (int): The maximum number of chats to return. - - Returns: - List[Dict]: The chats data. - """ - raise RuntimeError("Retrieving all chats is not supported with application permissions. Either use delegated permissions or provide a chat ID.") - - def _get_message_in_chat_by_id(self, chat_id: Text, message_id: Text) -> Dict: - """ - Get a message by its ID and the ID of the chat that it belongs to. - - Args: - chat_id (Text): The ID of the chat that the message belongs to. - message_id (Text): The ID of the message. - - Returns: - Dict: The message data. - """ - return self.fetch_data_json(f"chats/{chat_id}/messages/{message_id}") - - def _get_all_messages_in_chat(self, chat_id: Text, limit: int = None) -> List[Dict]: - """ - Get messages of a chat by its ID. - - Args: - chat_id (Text): The ID of the chat. - - Returns: - List[Dict]: The messages data. - """ - messages = [] - for messages_batch in self.fetch_paginated_data(f"chats/{chat_id}/messages"): - messages += messages_batch - - if limit and len(messages) >= limit: - break - - return messages[:limit] - - -class MSGraphAPITeamsDelegatedPermissionsClient(MSGraphAPITeamsClient): - """ - The Microsoft Graph API client for the Microsoft Teams handler with delegated permissions. - This client is used for accessing the Microsoft Teams specific endpoints of the Microsoft Graph API. - Several common methods for submitting requests, fetching data, etc. are inherited from the base class. - """ - - def _get_all_groups(self) -> List[Dict]: - """ - Get all groups that the signed in user is a member of. - - Returns: - List[Dict]: The groups data. - """ - return self.fetch_data_json("me/joinedTeams") - - def _get_chat_by_id(self, chat_id: Text) -> Dict: - """ - Get a chat by its ID. - - Args: - chat_id (Text): The ID of the chat. - - Returns: - Dict: The chat data. - """ - return self.fetch_data_json(f"/me/chats/{chat_id}") - - def _get_all_chats(self, limit: int = None) -> List[Dict]: - """ - Get all chats of the signed in user. - - Args: - limit (int): The maximum number of chats to return. - - Returns: - List[Dict]: The chats data. - """ - chats = [] - for chat_batch in self.fetch_paginated_data("me/chats"): - chats += chat_batch - - if limit and len(chats) >= limit: - break - - return chats[:limit] - - def _get_message_in_chat_by_id(self, chat_id: Text, message_id: Text) -> Dict: - """ - Get a message by its ID and the ID of the chat that it belongs to. - - Args: - chat_id (Text): The ID of the chat that the message belongs to. - message_id (Text): The ID of the message. - - Returns: - Dict: The message data. - """ - return self.fetch_data_json(f"me/chats/{chat_id}/messages/{message_id}") - - def _get_all_messages_in_chat(self, chat_id: Text, limit: int = None) -> List[Dict]: - """ - Get messages of a chat by its ID. - - Args: - chat_id (Text): The ID of the chat. - - Returns: - List[Dict]: The messages data. - """ - messages = [] - for messages_batch in self.fetch_paginated_data(f"me/chats/{chat_id}/messages"): - messages += messages_batch - - if limit and len(messages) >= limit: - break - - return messages[:limit] diff --git a/mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py b/mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py deleted file mode 100644 index ea26b4bdbab..00000000000 --- a/mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py +++ /dev/null @@ -1,268 +0,0 @@ -from typing import Dict, Text, Callable, Union - -from botbuilder.schema import Activity, ActivityTypes -from botbuilder.schema import ChannelAccount -from botframework.connector import ConnectorClient -from botframework.connector.auth import MicrosoftAppCredentials -import msal -from requests.exceptions import RequestException - -from mindsdb.integrations.handlers.ms_teams_handler.ms_graph_api_teams_client import ( - MSGraphAPIBaseClient, - MSGraphAPITeamsApplicationPermissionsClient, - MSGraphAPITeamsDelegatedPermissionsClient -) -from mindsdb.integrations.handlers.ms_teams_handler.ms_teams_tables import ( - ChannelsTable, ChannelMessagesTable, ChatsTable, ChatMessagesTable, TeamsTable -) -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) -from mindsdb.integrations.libs.api_handler import APIChatHandler -from mindsdb.integrations.utilities.handlers.auth_utilities.microsoft import ( - MSGraphAPIApplicationPermissionsManager, - MSGraphAPIDelegatedPermissionsManager -) -from mindsdb.integrations.utilities.handlers.auth_utilities.exceptions import AuthException -from mindsdb.interfaces.chatbot.types import ChatBotMessage -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -def chatbot_only(func): - def wrapper(self, *args, **kwargs): - if self.connection_data.get('opertion_mode', 'datasource') != 'chatbot': - raise ValueError("This connection can only be used as a data source. Please use a chatbot connection by setting the 'mode' parameter to 'chat'.") - return func(self, *args, **kwargs) - return wrapper - - -class MSTeamsHandler(APIChatHandler): - """ - This handler handles the connection and execution of SQL statements on Microsoft Teams via the Microsoft Graph API. - It is also responsible for handling the chatbot functionality. - """ - - name = 'teams' - - def __init__(self, name: str, **kwargs): - """ - Initializes the handler. - - Args: - name (str): name of particular handler instance - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.handler_storage = kwargs['handler_storage'] - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - self.service_url = None - self.channel_id = None - self.bot_id = None - self.conversation_id = None - - def connect(self) -> Union[MicrosoftAppCredentials, MSGraphAPIBaseClient]: - """ - Establishes a connection to the Microsoft Teams registered app or the Microsoft Graph API. - - Returns: - Union[MicrosoftAppCredentials, MSGraphAPITeamsDelegatedPermissionsClient]: A connection object to the Microsoft Teams registered app or the Microsoft Graph API. - """ - if self.is_connected: - return self.connection - - # The default operation mode is 'datasource'. This is used for data source connections. - operation_mode = self.connection_data.get('operation_mode', 'datasource') - if operation_mode == 'datasource': - # Initialize the token cache. - cache = msal.SerializableTokenCache() - - # Load the cache from file if it exists. - cache_file = 'cache.bin' - try: - cache_content = self.handler_storage.file_get(cache_file) - except FileNotFoundError: - cache_content = None - - if cache_content: - cache.deserialize(cache_content) - - # The default permissions mode is 'delegated'. This requires the user to sign in. - permission_mode = self.connection_data.get('permission_mode', 'delegated') - if permission_mode == 'delegated': - permissions_manager = MSGraphAPIDelegatedPermissionsManager( - client_id=self.connection_data['client_id'], - client_secret=self.connection_data['client_secret'], - tenant_id=self.connection_data['tenant_id'], - cache=cache, - code=self.connection_data.get('code') - ) - - elif permission_mode == 'application': - permissions_manager = MSGraphAPIApplicationPermissionsManager( - client_id=self.connection_data['client_id'], - client_secret=self.connection_data['client_secret'], - tenant_id=self.connection_data['tenant_id'], - cache=cache - ) - - else: - raise ValueError("The supported permission modes are 'delegated' and 'application'.") - - access_token = permissions_manager.get_access_token() - - # Save the cache back to file if it has changed. - if cache.has_state_changed: - self.handler_storage.file_set(cache_file, cache.serialize().encode('utf-8')) - - if permission_mode == 'delegated': - self.connection = MSGraphAPITeamsDelegatedPermissionsClient(access_token) - - else: - self.connection = MSGraphAPITeamsApplicationPermissionsClient(access_token) - - self._register_table('channels', ChannelsTable(self)) - self._register_table('channel_messages', ChannelMessagesTable(self)) - self._register_table('chats', ChatsTable(self)) - self._register_table('chat_messages', ChatMessagesTable(self)) - self._register_table('teams', TeamsTable(self)) - - elif operation_mode == 'chatbot': - self.connection = MicrosoftAppCredentials( - self.connection_data['app_id'], - self.connection_data['app_password'] - ) - - else: - raise ValueError("The supported operation modes are 'datasource' and 'chatbot'.") - - self.is_connected = True - - return self.connection - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to Microsoft Teams. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - - try: - connection = self.connect() - # A connection check against the Microsoft Graph API is run if the connection is in 'datasource' mode. - if self.connection_data.get('operation_mode', 'datasource') == 'datasource' and connection.check_connection(): - response.success = True - response.copy_storage = True - else: - raise RequestException("Connection check failed!") - except (ValueError, RequestException) as known_error: - logger.error(f'Connection check to Microsoft Teams failed, {known_error}!') - response.error_message = str(known_error) - except AuthException as error: - response.error_message = str(error) - response.redirect_url = error.auth_url - return response - except Exception as unknown_error: - logger.error(f'Connection check to Microsoft Teams failed due to an unknown error, {unknown_error}!') - response.error_message = str(unknown_error) - - self.is_connected = response.success - - return response - - @chatbot_only - def get_chat_config(self) -> Dict: - """ - Gets the configuration for the chatbot. - This method is required for the implementation of the chatbot. - - Returns: - Dict: The configuration for the chatbot. - """ - params = { - 'polling': { - 'type': 'webhook' - } - } - - return params - - @chatbot_only - def get_my_user_name(self) -> Text: - """ - Gets the name of the signed in user. - This method is required for the implementation of the chatbot. - - Returns: - Text: The name of the signed in user. - """ - return None - - @chatbot_only - def on_webhook(self, request: Dict, callback: Callable) -> None: - """ - Handles a webhook request. - - Args: - request (Dict): The request data. - callback (Callable): The callback function to call. - """ - self.service_url = request["serviceUrl"] - self.channel_id = request["channelId"] - self.bot_id = request["from"]["id"] - self.conversation_id = request["conversation"]["id"] - - chat_bot_message = ChatBotMessage( - ChatBotMessage.Type.DIRECT, - text=request["text"], - user=request["from"]["id"], - destination=request["recipient"]["id"] - ) - - callback( - chat_id=request['conversation']['id'], - message=chat_bot_message - ) - - @chatbot_only - def respond(self, message: ChatBotMessage) -> None: - """ - Sends a response to the chatbot. - - Args: - message (ChatBotMessage): The message to send - - Raises: - ValueError: If the chatbot message is not of type DIRECT. - - Returns: - None - """ - credentials = self.connect() - - connector = ConnectorClient(credentials, base_url=self.service_url) - connector.conversations.send_to_conversation( - self.conversation_id, - Activity( - type=ActivityTypes.message, - channel_id=self.channel_id, - recipient=ChannelAccount( - id=message.destination - ), - from_property=ChannelAccount( - id=self.bot_id - ), - text=message.text - ) - ) diff --git a/mindsdb/integrations/handlers/ms_teams_handler/ms_teams_tables.py b/mindsdb/integrations/handlers/ms_teams_handler/ms_teams_tables.py deleted file mode 100644 index 2c71dd750f2..00000000000 --- a/mindsdb/integrations/handlers/ms_teams_handler/ms_teams_tables.py +++ /dev/null @@ -1,413 +0,0 @@ -from typing import List - -import pandas as pd - -from mindsdb.integrations.handlers.ms_teams_handler.ms_graph_api_teams_client import MSGraphAPITeamsDelegatedPermissionsClient -from mindsdb.integrations.libs.api_handler import APIResource -from mindsdb.integrations.utilities.sql_utils import ( - FilterCondition, - FilterOperator, - SortColumn -) - - -class TeamsTable(APIResource): - """ - The table abstraction for the 'teams' resource of the Microsoft Graph API. - """ - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - **kwargs - ): - """ - Executes a parsed SELECT SQL query on the 'teams' resource of the Microsoft Graph API. - - Args: - conditions (List[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (List[SortColumn]): The list of parsed sort columns. - targets (List[str]): The list of target columns to return. - """ - client: MSGraphAPITeamsDelegatedPermissionsClient = self.handler.connect() - teams = client.get_teams() - - teams_df = pd.json_normalize(teams, sep="_") - teams_df = teams_df.reindex(columns=self.get_columns(), fill_value=None) - - return teams_df - - def get_columns(self) -> List[str]: - """ - Retrieves the attributes (columns) of the 'teams' resource. - - Returns: - List[Text]: A list of attributes (columns) of the 'teams' resource. - """ - return [ - "id", - "createdDateTime", - "displayName", - "description", - "internalId", - "classification", - "specialization", - "visibility", - "webUrl", - "isArchived", - "tenantId", - "isMembershipLimitedToOwners", - ] - - -class ChannelsTable(APIResource): - """ - The table abstraction for the 'channels' resource of the Microsoft Graph API. - """ - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - **kwargs - ): - """ - Executes a parsed SELECT SQL query on the 'channels' resource of the Microsoft Graph API. - - Args: - conditions (List[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (List[SortColumn]): The list of parsed sort columns. - targets (List[str]): The list of target columns to return. - """ - client: MSGraphAPITeamsDelegatedPermissionsClient = self.handler.connect() - channels = [] - - team_id, channel_ids = None, None - for condition in conditions: - if condition.column == "teamId": - if condition.op == FilterOperator.EQUAL: - team_id = condition.value - - else: - raise ValueError( - f"Unsupported operator '{condition.op}' for column 'teamId'." - ) - - condition.applied = True - - if condition.column == "id": - if condition.op == FilterOperator.EQUAL: - channel_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - channel_ids = condition.value - - else: - raise ValueError( - f"Unsupported operator '{condition.op}' for column 'id'." - ) - - condition.applied = True - - channels = client.get_channels(team_id, channel_ids) - - channels_df = pd.json_normalize(channels, sep="_") - channels_df = channels_df[self.get_columns()] - - return channels_df - - def get_columns(self) -> List[str]: - """ - Retrieves the attributes (columns) of the 'chats' resource. - - Returns: - List[Text]: A list of attributes (columns) of the 'chats' resource. - """ - return [ - "id", - "createdDateTime", - "displayName", - "description", - "isFavoriteByDefault", - "email", - "tenantId", - "webUrl", - "membershipType", - "teamId", - ] - - -class ChannelMessagesTable(APIResource): - """ - The table abstraction for the 'channel messages' resource of the Microsoft Graph API. - """ - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - **kwargs - ): - """ - Executes a parsed SELECT SQL query on the 'channel messages' resource of the Microsoft Graph API. - - Args: - conditions (List[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (List[SortColumn]): The list of parsed sort columns. - targets (List[str]): The list of target columns to return. - """ - client: MSGraphAPITeamsDelegatedPermissionsClient = self.handler.connect() - messages = [] - - group_id, channel_id, message_ids = None, None, None - for condition in conditions: - if condition.column == "channelIdentity_teamId": - if condition.op == FilterOperator.EQUAL: - group_id = condition.value - - else: - raise ValueError( - f"Unsupported operator '{condition.op}' for column 'channelIdentity_teamId'." - ) - - condition.applied = True - - if condition.column == "channelIdentity_channelId": - if condition.op == FilterOperator.EQUAL: - channel_id = condition.value - - else: - raise ValueError( - f"Unsupported operator '{condition.op}' for column 'channelIdentity_channelId'." - ) - - condition.applied = True - - if condition.column == "id": - if condition.op == FilterOperator.EQUAL: - message_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - message_ids = condition.value - - else: - raise ValueError( - f"Unsupported operator '{condition.op}' for column 'id'." - ) - - condition.applied = True - - if not group_id or not channel_id: - raise ValueError("The 'channelIdentity_teamId' and 'channelIdentity_channelId' columns are required.") - - messages = client.get_channel_messages(group_id, channel_id, message_ids) - - messages_df = pd.json_normalize(messages, sep="_") - messages_df = messages_df[self.get_columns()] - - return messages_df - - def get_columns(self) -> List[str]: - """ - Retrieves the attributes (columns) of the 'chat messages' resource. - - Returns: - List[Text]: A list of attributes (columns) of the 'chat messages' resource. - """ - return [ - "id", - "replyToId", - "etag", - "messageType", - "createdDateTime", - "lastModifiedDateTime", - "lastEditedDateTime", - "deletedDateTime", - "subject", - "summary", - "chatId", - "importance", - "locale", - "webUrl", - "policyViolation", - "from_application", - "from_device", - "from_user_id", - "from_user_displayName", - "from_user_userIdentityType", - "body_contentType", - "body_content", - "channelIdentity_teamId", - "channelIdentity_channelId", - ] - - -class ChatsTable(APIResource): - """ - The table abstraction for the 'chats' resource of the Microsoft Graph API. - """ - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - **kwargs - ): - """ - Executes a parsed SELECT SQL query on the 'chats' resource of the Microsoft Graph API. - - Args: - conditions (List[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (List[SortColumn]): The list of parsed sort columns. - targets (List[str]): The list of target columns to return. - """ - client: MSGraphAPITeamsDelegatedPermissionsClient = self.handler.connect() - chats = [] - - chat_ids = None - for condition in conditions: - if condition.column == "id": - if condition.op == FilterOperator.EQUAL: - chat_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - chat_ids = condition.value - - else: - raise ValueError( - f"Unsupported operator '{condition.op}' for column 'id'." - ) - - condition.applied = True - - chats = client.get_chats(chat_ids) - - chats_df = pd.json_normalize(chats, sep="_") - chats_df = chats_df[self.get_columns()] - - return chats_df - - def get_columns(self) -> List[str]: - """ - Retrieves the attributes (columns) of the 'chats' resource. - - Returns: - List[Text]: A list of attributes (columns) of the 'chats' resource. - """ - return [ - "id", - "topic", - "createdDateTime", - "lastUpdatedDateTime", - "chatType", - "webUrl", - "isHiddenForAllMembers" - ] - - -class ChatMessagesTable(APIResource): - """ - The table abstraction for the 'chat messages' resource of the Microsoft Graph API. - """ - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - **kwargs - ): - """ - Executes a parsed SELECT SQL query on the 'chat messages' resource of the Microsoft Graph API. - - Args: - conditions (List[FilterCondition]): The list of parsed filter conditions. - limit (int): The maximum number of records to return. - sort (List[SortColumn]): The list of parsed sort columns. - targets (List[str]): The list of target columns to return. - """ - client: MSGraphAPITeamsDelegatedPermissionsClient = self.handler.connect() - messages = [] - - chat_id, message_ids = None, None - for condition in conditions: - if condition.column == "chatId": - if condition.op == FilterOperator.EQUAL: - chat_id = condition.value - - else: - raise ValueError( - f"Unsupported operator '{condition.op}' for column 'chatId'." - ) - - condition.applied = True - - if condition.column == "id": - if condition.op == FilterOperator.EQUAL: - message_ids = [condition.value] - - elif condition.op == FilterOperator.IN: - message_ids = condition.value - - else: - raise ValueError( - f"Unsupported operator '{condition.op}' for column 'id'." - ) - - condition.applied = True - - if not chat_id: - raise ValueError("The 'chatId' column is required.") - - messages = client.get_chat_messages(chat_id, message_ids) - - messages_df = pd.json_normalize(messages, sep="_") - messages_df = messages_df[self.get_columns()] - - return messages_df - - def get_columns(self) -> List[str]: - """ - Retrieves the attributes (columns) of the 'chat messages' resource. - - Returns: - List[Text]: A list of attributes (columns) of the 'chat messages' resource. - """ - return [ - "id", - "replyToId", - "etag", - "messageType", - "createdDateTime", - "lastModifiedDateTime", - "lastEditedDateTime", - "deletedDateTime", - "subject", - "summary", - "chatId", - "importance", - "locale", - "webUrl", - "policyViolation", - "from_application", - "from_device", - "from_user_id", - "from_user_displayName", - "from_user_userIdentityType", - "body_contentType", - "body_content", - ] diff --git a/mindsdb/integrations/handlers/ms_teams_handler/requirements.txt b/mindsdb/integrations/handlers/ms_teams_handler/requirements.txt deleted file mode 100644 index 227ff3ec92c..00000000000 --- a/mindsdb/integrations/handlers/ms_teams_handler/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -botframework-connector -botbuilder-schema -msal --r mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/requirements.txt \ No newline at end of file diff --git a/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py b/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py index bfff010e02b..7b6e42fff34 100644 --- a/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +++ b/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py @@ -327,7 +327,7 @@ def check_connection(self) -> StatusResponse: # Execute a simple query to test the connection cur.execute("select 1;") response.success = True - except OperationalError as e: + except Exception as e: logger.error(f"Error connecting to Microsoft SQL Server {self.database}, {e}!") response.error_message = str(e) diff --git a/mindsdb/integrations/handlers/mysql_handler/mysql_handler.py b/mindsdb/integrations/handlers/mysql_handler/mysql_handler.py index 2b6c67c4eea..86882d03563 100644 --- a/mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +++ b/mindsdb/integrations/handlers/mysql_handler/mysql_handler.py @@ -1,4 +1,4 @@ -from typing import Optional, List, Dict, Any +from typing import Optional, List, Dict, Any, Generator import pandas as pd import mysql.connector @@ -12,11 +12,16 @@ from mindsdb.integrations.libs.response import ( HandlerStatusResponse as StatusResponse, HandlerResponse as Response, - RESPONSE_TYPE, + TableResponse, + OkResponse, + ErrorResponse, + DataHandlerResponse, ) from mindsdb.integrations.handlers.mysql_handler.settings import ConnectionConfig from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import C_TYPES, DATA_C_TYPE_MAP +from mindsdb.utilities.types.column import Column +from mindsdb.utilities.config import config as mindsdb_config logger = log.getLogger(__name__) @@ -37,57 +42,47 @@ def _map_type(mysql_type_text: str) -> MYSQL_DATA_TYPE: return MYSQL_DATA_TYPE.TEXT -def _make_table_response(result: List[Dict[str, Any]], cursor: mysql.connector.cursor.MySQLCursor) -> Response: - """Build response from result and cursor. +def _get_columns(cursor: mysql.connector.cursor.MySQLCursor) -> list[Column]: + """Get columns from cursor description. Args: - result (list[dict]): result of the query. cursor (mysql.connector.cursor.MySQLCursor): cursor object. Returns: - Response: response object. + list[Column]: List of Column objects with type and dtype info. """ description = cursor.description reverse_c_type_map = {v.code: k for k, v in DATA_C_TYPE_MAP.items() if v.code != C_TYPES.MYSQL_TYPE_BLOB} - mysql_types: list[MYSQL_DATA_TYPE] = [] + columns = [] for col in description: + column_name = col[0] type_int = col[1] - if isinstance(type_int, int) is False: - mysql_types.append(MYSQL_DATA_TYPE.TEXT) - continue - if type_int == C_TYPES.MYSQL_TYPE_TINY: + if isinstance(type_int, int) is False: + mysql_type = MYSQL_DATA_TYPE.TEXT + elif type_int == C_TYPES.MYSQL_TYPE_TINY: # There are 3 types that returns as TINYINT: TINYINT, BOOL, BOOLEAN. - mysql_types.append(MYSQL_DATA_TYPE.TINYINT) - continue - - if type_int in reverse_c_type_map: - mysql_types.append(reverse_c_type_map[type_int]) - continue - - if type_int == C_TYPES.MYSQL_TYPE_BLOB: + mysql_type = MYSQL_DATA_TYPE.TINYINT + elif type_int in reverse_c_type_map: + mysql_type = reverse_c_type_map[type_int] + elif type_int == C_TYPES.MYSQL_TYPE_BLOB: # region determine text/blob type by flags # Unfortunately, there is no way to determine particular type of text/blob column by flags. # Subtype have to be determined by 8-s element of description tuple, but mysql.conector # return the same value for all text types (TINYTEXT, TEXT, MEDIUMTEXT, LONGTEXT), and for # all blob types (TINYBLOB, BLOB, MEDIUMBLOB, LONGBLOB). - if col[7] == 16: # and col[8] == 45 - mysql_types.append(MYSQL_DATA_TYPE.TEXT) - elif col[7] == 144: # and col[8] == 63 - mysql_types.append(MYSQL_DATA_TYPE.BLOB) + if col[7] == 16: + mysql_type = MYSQL_DATA_TYPE.TEXT + elif col[7] == 144: + mysql_type = MYSQL_DATA_TYPE.BLOB else: logger.debug(f"MySQL handler: unknown type code {col[7]}, use TEXT as fallback.") - mysql_types.append(MYSQL_DATA_TYPE.TEXT) + mysql_type = MYSQL_DATA_TYPE.TEXT # endregion else: - logger.warning(f"MySQL handler: unknown type id={type_int} in column {col[0]}, use TEXT as fallback.") - mysql_types.append(MYSQL_DATA_TYPE.TEXT) + logger.warning(f"MySQL handler: unknown type id={type_int} in column {column_name}, use TEXT as fallback.") + mysql_type = MYSQL_DATA_TYPE.TEXT - # region cast int and bool to nullable types - serieses = [] - for i, mysql_type in enumerate(mysql_types): - expected_dtype = None - column_name = description[i][0] if mysql_type in ( MYSQL_DATA_TYPE.SMALLINT, MYSQL_DATA_TYPE.INT, @@ -98,12 +93,27 @@ def _make_table_response(result: List[Dict[str, Any]], cursor: mysql.connector.c expected_dtype = "Int64" elif mysql_type in (MYSQL_DATA_TYPE.BOOL, MYSQL_DATA_TYPE.BOOLEAN): expected_dtype = "boolean" - serieses.append(pd.Series([row[column_name] for row in result], dtype=expected_dtype, name=description[i][0])) - df = pd.concat(serieses, axis=1, copy=False) - # endregion + else: + expected_dtype = None + + columns.append(Column(name=column_name, type=mysql_type, dtype=expected_dtype)) + return columns + + +def _make_df(result: list[tuple[Any]], columns: list[Column]) -> pd.DataFrame: + """Make pandas DataFrame from result and columns. + + Args: + result (list[tuple[Any]]): result of the query (list of tuples). + columns (list[Column]): list of columns. - response = Response(RESPONSE_TYPE.TABLE, df, affected_rows=cursor.rowcount, mysql_types=mysql_types) - return response + Returns: + pd.DataFrame: pandas DataFrame. + """ + serieses = [] + for i, column in enumerate(columns): + serieses.append(pd.Series([row[i] for row in result], dtype=column.dtype, name=column.name)) + return pd.concat(serieses, axis=1, copy=False) class MySQLHandler(MetaDatabaseHandler): @@ -112,6 +122,7 @@ class MySQLHandler(MetaDatabaseHandler): """ name = "mysql" + stream_response = True def __init__(self, name: str, **kwargs: Any) -> None: super().__init__(name) @@ -229,41 +240,100 @@ def check_connection(self) -> StatusResponse: return result - def native_query(self, query: str) -> Response: - """ - Executes a SQL query on the MySQL database and returns the result. + def native_query(self, query: str, stream: bool = True, **kwargs) -> DataHandlerResponse: + """Executes a SQL query on the MySQL database and returns the result. Args: query (str): The SQL query to be executed. + stream (bool): Whether to stream the results of the query. + **kwargs: Additional keyword arguments. Returns: - Response: A response object containing the result of the query or an error message. + DataHandlerResponse: A response object containing the result of the query or an error message. """ - need_to_close = not self.is_connected - connection = None - try: - connection = self.connect() - with connection.cursor(dictionary=True, buffered=True) as cur: - cur.execute(query) - if cur.with_rows: - result = cur.fetchall() - response = _make_table_response(result, cur) - else: - response = Response(RESPONSE_TYPE.OK, affected_rows=cur.rowcount) - except mysql.connector.Error as e: - logger.error( - f"Error running query: {query} on {self.connection_data.get('database', 'unknown')}! Error: {e}" - ) - response = Response(RESPONSE_TYPE.ERROR, error_code=e.errno or 1, error_message=str(e)) - if connection is not None and connection.is_connected(): - connection.rollback() + if stream is False: + response = self._execute_fetchall(query) + else: + generator = self._execute_fetchmany(query) + try: + response: TableResponse = next(generator) + response.data_generator = generator + except StopIteration as e: + response = e.value + if isinstance(response, DataHandlerResponse) is False: + raise + return response - if need_to_close: - self.disconnect() + def _execute_fetchall(self, query: str) -> DataHandlerResponse: + """Executes a SQL query on the MySQL database and returns the full result at once. + + Args: + query (str): The SQL query to be executed. + Returns: + DataHandlerResponse: A response object containing the result of the query or an error message. + """ + connection = self.connect() + with connection.cursor(buffered=True) as cursor: + try: + cursor.execute(query) + if cursor.with_rows: + result = cursor.fetchall() + columns = _get_columns(cursor) + df = _make_df(result, columns) + response = TableResponse(data=df, affected_rows=cursor.rowcount, columns=columns) + else: + response = OkResponse(affected_rows=cursor.rowcount) + except Exception as e: + response = self._handle_query_exception(e, query, connection) return response - def query(self, query: ASTNode) -> Response: + def _execute_fetchmany( + self, query: str + ) -> Generator[TableResponse | pd.DataFrame, None, OkResponse | ErrorResponse]: + """Execute a SQL query on the MySQL database and return a generator of data frames. + + Args: + query (str): The SQL query to be executed. + + Returns: + Generator[TableResponse | pd.DataFrame, None, OkResponse | ErrorResponse]: Generator of data frames. + """ + connection = self.connect() + with connection.cursor(buffered=False) as cursor: + try: + cursor.execute(query) + if not cursor.with_rows: + return OkResponse(affected_rows=cursor.rowcount) + + columns = _get_columns(cursor) + yield TableResponse(affected_rows=cursor.rowcount, columns=columns) + + fetch_size = mindsdb_config["data_stream"]["fetch_size"] + while result := cursor.fetchmany(size=fetch_size): + yield _make_df(result, columns) + except Exception as e: + return self._handle_query_exception(e, query, connection) + + def _handle_query_exception(self, e: Exception, query: str, connection) -> ErrorResponse: + """Handle query execution errors with appropriate logging and rollback. + + Args: + e: The exception that was raised + query: The SQL query that failed + connection: The database connection to rollback + + Returns: + ErrorResponse with appropriate error details + """ + logger.error(f"Error running query: {query} on {self.connection_data.get('database', 'unknown')}! Error: {e}") + if connection is not None and connection.is_connected(): + connection.rollback() + if isinstance(e, mysql.connector.Error): + return ErrorResponse(error_code=e.errno or 1, error_message=str(e)) + return ErrorResponse(error_code=0, error_message=str(e)) + + def query(self, query: ASTNode) -> DataHandlerResponse: """ Retrieve the data from the SQL statement. """ @@ -312,7 +382,8 @@ def get_columns(self, table_name: str) -> Response: from information_schema.columns where - table_name = '{table_name}'; + table_name = '{table_name}' + and table_schema = DATABASE(); """ result = self.native_query(q) result.to_columns_table_response(map_type_fn=_map_type) diff --git a/mindsdb/integrations/handlers/netsuite_handler/__init__.py b/mindsdb/integrations/handlers/netsuite_handler/__init__.py index 673d6ed81ed..f6e6c4d6863 100644 --- a/mindsdb/integrations/handlers/netsuite_handler/__init__.py +++ b/mindsdb/integrations/handlers/netsuite_handler/__init__.py @@ -1,4 +1,4 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE +from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL, HANDLER_TYPE from .__about__ import __version__ as version, __description__ as description from .connection_args import connection_args, connection_args_example @@ -14,7 +14,8 @@ title = "Oracle NetSuite" name = "netsuite" type = HANDLER_TYPE.DATA -icon_path = "netsuite.svg" +icon_path = "icon.svg" +support_level = HANDLER_SUPPORT_LEVEL.MINDSDB __all__ = [ "Handler", @@ -23,6 +24,7 @@ "type", "title", "description", + "support_level", "import_error", "icon_path", "connection_args", diff --git a/mindsdb/integrations/handlers/netsuite_handler/netsuite.svg b/mindsdb/integrations/handlers/netsuite_handler/icon.svg similarity index 100% rename from mindsdb/integrations/handlers/netsuite_handler/netsuite.svg rename to mindsdb/integrations/handlers/netsuite_handler/icon.svg diff --git a/mindsdb/integrations/handlers/newsapi_handler/README.md b/mindsdb/integrations/handlers/newsapi_handler/README.md deleted file mode 100644 index 9b5f28e8d17..00000000000 --- a/mindsdb/integrations/handlers/newsapi_handler/README.md +++ /dev/null @@ -1,83 +0,0 @@ -# NewsAPI API Handler - -This handler integrates with the [News API](https://newsapi.org/docs) to make aggregate article (data available to use for model training and predictions. - -## Example: Select articles from news api - -Connect to the NewsAPI API - -We start by creating a database to connect to the News API. - -``` -CREATE DATABASE newsAPI -WITH - ENGINE = 'newsapi' - PARAMETERS = { - "api_key": "Your api key" - }; -``` - -### Select Data - -To see if the connection was successful, try searching for the most recent article data. - -``` -SELECT * -FROM newsAPI.article -WHERE query = 'Python'; -``` - -The result come with all these columns - -* author -* title -* description -* url -* urlToImage -* publishedAt -* content -* source_id -* source_name -* query -* searchIn -* domains -* excludedDomains - -You can select with multiple clauses - -``` -SELECT * -FROM newsAPI.article -WHERE query = 'Python' -AND sources="bbc-news" -AND publishedAt >= "2021-03-23" AND publishedAt <= "2023-04-23" -LIMIT 4; -``` - -#### **WHERE CLAUSE PARAMETERS:** - -**query** : Base on the newsAPI documentation you must provide at least this parameter with is the keywords or phrases to search for in the article title and body. - -**sources** : Is a comma-seperated string of identifiers (maximum 20) for the news sources or blogs you want headlines from. - - You can check all available sources[here](https://newsapi.org/sources) . - -**domains** : A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com) to restrict the search to. - -**exclude_domains** : A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com) to remove from the results. - -**searchIn** : The fields to restrict your query search to possible options are title, description, content. Multiple options can be specified by separating them with a comma, for example: `title,content` - -**lamguage** : The 2-letter ISO-639-1 code of the language you want to get headlines for. Possible options: `ar de, en es, fr he, it nl, no pt, ru, sv, ud , zh`. - - Default: all languages returned. - -**publishedAt** : A date and optional time for the oldest or newest article allowed. - -#### **ORDER BY PARAMETERS:** - -You can sort article by: - -**relevancy** : articles more closely related to the query parameter come first - -**popularity** : articles from popular sources and publishers come first diff --git a/mindsdb/integrations/handlers/newsapi_handler/__about__.py b/mindsdb/integrations/handlers/newsapi_handler/__about__.py deleted file mode 100644 index 136307b820d..00000000000 --- a/mindsdb/integrations/handlers/newsapi_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Newa APi handler' -__package_name__ = 'mindsdb_news_api_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for News API" -__author__ = 'Thimotee Kenmogne' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/newsapi_handler/__init__.py b/mindsdb/integrations/handlers/newsapi_handler/__init__.py deleted file mode 100644 index 4f736a06ca2..00000000000 --- a/mindsdb/integrations/handlers/newsapi_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version -from .connection_args import connection_args, connection_args_example -try: - from .newsapi_handler import NewsAPIHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "News API" -name = "newsapi" -type = HANDLER_TYPE.DATA -icon_path = "icon.png" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", - "connection_args_example", - "connection_args", -] diff --git a/mindsdb/integrations/handlers/newsapi_handler/connection_args.py b/mindsdb/integrations/handlers/newsapi_handler/connection_args.py deleted file mode 100644 index b99da019dad..00000000000 --- a/mindsdb/integrations/handlers/newsapi_handler/connection_args.py +++ /dev/null @@ -1,14 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - api_key={ - "type": ARG_TYPE.STR, - "description": "The API key for the newsAPI API.", - 'secret': True - } -) - -connection_args_example = OrderedDict(api_key="knlsndlknslk") diff --git a/mindsdb/integrations/handlers/newsapi_handler/icon.png b/mindsdb/integrations/handlers/newsapi_handler/icon.png deleted file mode 100644 index 69a93ec7e70..00000000000 Binary files a/mindsdb/integrations/handlers/newsapi_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/newsapi_handler/newsapi_handler.py b/mindsdb/integrations/handlers/newsapi_handler/newsapi_handler.py deleted file mode 100644 index 4a871464e03..00000000000 --- a/mindsdb/integrations/handlers/newsapi_handler/newsapi_handler.py +++ /dev/null @@ -1,209 +0,0 @@ -import os -import urllib -from typing import Any - -import pandas as pd -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser import ast -from newsapi import NewsApiClient - -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.integrations.libs.api_handler import APIHandler, APITable -from mindsdb.integrations.libs.response import HandlerResponse, HandlerStatusResponse -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb.utilities.config import Config - - -class NewsAPIArticleTable(APITable): - def __init__(self, handler): - super().__init__(handler) - - def select(self, query: ast.Select) -> pd.DataFrame: - conditions = extract_comparison_conditions(query.where) - - params = {} - - for op, arg1, arg2 in conditions: - if arg1 == "query": - params["q"] = urllib.parse.quote_plus(arg2) - elif arg1 == "sources": - if len(arg2.split(",")) > 20: - raise ValueError( - "The number of items it sources should be 20 or less" - ) - else: - params[arg1] = arg2 - elif arg1 == "publishedAt": - if op == "Gt" or op == "GtE": - params["from"] = arg2 - if op == "Lt" or op == "LtE": - params["to"] = arg2 - elif op == "Eq": - params["from"] = arg2 - params["to"] = arg2 - else: - params[arg1] = arg2 - - if query.limit: - if query.limit.value > 100: - params["page"], params["page_size"] = divmod(query.limit.value, 100) - if params["page_size"] == 0: - params["page_size"] = 100 - else: - params["page_size"] = query.limit.value - params["page"] = 1 - else: - params["page_size"] = 100 - params["page"] = 1 - - if query.order_by: - if len(query.order_by) == 1: - order_column = str(query.order_by[0]).split('.')[-1] - if order_column not in ["relevancy", "publishedAt"]: - raise NotImplementedError("Not supported ordering by this field") - params["sort_by"] = order_column - else: - raise ValueError( - "Multiple order by condition is not supported by the API" - ) - - selected_columns = [] - - result = self.handler.call_application_api(params=params) - - if not result.empty: - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - return result[selected_columns] - - def get_columns(self) -> list: - return [ - "author", - "title", - "description", - "url", - "urlToImage", - "publishedAt", - "content", - "source_id", - "source_name", - "query", - "searchIn", - "domains", - "excludedDomains", - ] - - -class NewsAPIHandler(APIHandler): - def __init__(self, name: str, **kwargs): - super().__init__(name) - self.api = None - self._tables = {} - - args = kwargs.get("connection_data", {}) - self.connection_args = {} - handler_config = Config().get("newsAPI_handler", {}) - - for k in ["api_key"]: - if k in args: - self.connection_args[k] = args[k] - elif f"NEWSAPI_{k.upper()}" in os.environ: - self.connection_args[k] = os.environ[f"NEWSAPI_{k.upper()}"] - elif k in handler_config: - self.connection_args[k] = handler_config[k] - - self.is_connected = False - self.api = self.create_connection() - - article = NewsAPIArticleTable(self) - self._register_table("article", article) - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def disconnect(self): - """ - Close any existing connections. - """ - - if self.is_connected is False: - return - - self.is_connected = False - return self.is_connected - - def create_connection(self): - return NewsApiClient(**self.connection_args) - - def _register_table(self, table_name: str, table_class: Any): - self._tables[table_name] = table_class - - def get_table(self, table_name: str): - return self._tables.get(table_name) - - def connect(self) -> HandlerStatusResponse: - if self.is_connected is True: - return self.api - - self.api = self.create_connection() - - self.is_connected = True - return HandlerStatusResponse(success=True) - - def check_connection(self) -> HandlerStatusResponse: - response = HandlerStatusResponse(False) - - try: - self.connect() - - self.api.get_top_headlines(page_size=1, page=1) - response.success = True - - except Exception as e: - response.error_message = e.message - - return response - - def native_query(self, query: Any): - ast = parse_sql(query) - table = self.get_table("article") - data = table.select(ast) - return HandlerResponse(RESPONSE_TYPE.TABLE, data_frame=data) - - def call_application_api( - self, method_name: str = None, params: dict = None - ) -> pd.DataFrame: - # This will implement api base on the native query - # By processing native query to convert it to api callable parameters - if self.is_connected is False: - self.connect() - - pages = params.get("page", 1) - data = [] - - for page in range(1, pages + 1): - params["page"] = page - try: - result = self.api.get_everything(**params) - except Exception as e: - raise RuntimeError(f"API call failed: {e}") - articles = result["articles"] - for article in articles: - article["source_id"] = article["source"]["id"] - article["source_name"] = article["source"]["name"] - del article["source"] - article["query"] = params.get("q") - article["searchIn"] = params.get("searchIn") - article["domains"] = params.get("domains") - article["excludedDomains"] = params.get("exclude_domains") - data.append(article) - - return pd.DataFrame(data=data) diff --git a/mindsdb/integrations/handlers/newsapi_handler/requirements.txt b/mindsdb/integrations/handlers/newsapi_handler/requirements.txt deleted file mode 100644 index 6d0cc76b2e2..00000000000 --- a/mindsdb/integrations/handlers/newsapi_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -newsapi-python \ No newline at end of file diff --git a/mindsdb/integrations/handlers/newsapi_handler/tests/__init__.py b/mindsdb/integrations/handlers/newsapi_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/newsapi_handler/tests/test_newsapi_handler.py b/mindsdb/integrations/handlers/newsapi_handler/tests/test_newsapi_handler.py deleted file mode 100644 index 487a7b05d7b..00000000000 --- a/mindsdb/integrations/handlers/newsapi_handler/tests/test_newsapi_handler.py +++ /dev/null @@ -1,90 +0,0 @@ -import unittest - -from newsapi.newsapi_exception import NewsAPIException - -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.integrations.handlers.newsapi_handler.newsapi_handler import NewsAPIHandler - - -class NewsApiHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": {"api_key": "82fa480335ce42c0aa3758cb0efe66be"} - } - cls.handler = NewsAPIHandler("test_newsapi_handler", **cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_check_connection(self): - self.handler.check_connection() - - def test_2_select(self): - # table = self.handler.get_table("article") - res = self.handler.native_query('SELECT * FROM article WHERE query="google"') - assert res.type is RESPONSE_TYPE.TABLE - - def test_3_select(self): - # table = self.handler.get_table("article") - with self.assertRaises(NewsAPIException): - self.handler.native_query("SELECT * FROM article") - - def test_4_select(self): - # table = self.handler.get_table("article") - with self.assertRaises(NewsAPIException): - self.handler.native_query( - 'SELECT * FROM article WHERE query="google" AND sources="google.com"' - ) - - def test_5_select(self): - # table = self.handler.get_table("article") - res = self.handler.native_query( - 'SELECT * FROM article WHERE query="google" AND sources="abc-news"' - ) - assert res.type is RESPONSE_TYPE.TABLE - - def test_6_select(self): - # table = self.handler.get_table("article") - self.handler.native_query( - 'SELECT * FROM article WHERE query="google" AND publishedAt >= "2023-03-23" AND publishedAt <= "2023-04-23"' - ) - - def test_7_select(self): - # table = self.handler.get_table("article") - res = self.handler.native_query( - 'SELECT * FROM article WHERE query="google" LIMIT 78' - ) - assert res.type is RESPONSE_TYPE.TABLE - - def test_8_select(self): - # table = self.handler.get_table("article") - res = self.handler.native_query( - 'SELECT * FROM article WHERE query="google" LIMIT 150' - ) - assert res.type is RESPONSE_TYPE.TABLE - - def test_9_select(self): - # table = self.handler.get_table("article") - res = self.handler.native_query( - 'SELECT * FROM article WHERE query="google" ORDER BY publishedAt' - ) - assert res.type is RESPONSE_TYPE.TABLE - - def test_10_select(self): - # table = self.handler.get_table("article") - with self.assertRaises(NotImplementedError): - self.handler.native_query( - 'SELECT * FROM article WHERE query="google" ORDER BY query' - ) - - def test_11_select(self): - # table = self.handler.get_table("article") - res = self.handler.native_query( - 'SELECT * FROM article WHERE query="google" ORDER BY relevancy' - ) - assert res.type is RESPONSE_TYPE.TABLE - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/notion_handler/README.md b/mindsdb/integrations/handlers/notion_handler/README.md deleted file mode 100644 index 2e20e7945ac..00000000000 --- a/mindsdb/integrations/handlers/notion_handler/README.md +++ /dev/null @@ -1,108 +0,0 @@ -# Notion Handler - -This is the implementation of the Notion handler for MindsDB. - -## Notion -[Notion](https://www.notion.so/help/guides/what-is-notion) is a note-taking and productivity freemium cloud platform. -In short, notion has all-in-one workspace tool that integrates kanban boards, tasks, wikis, and database. - -## Implementation -This handler was implemented using notion-client library, a simple and easy to use client library for the official [Notion API](https://developers.notion.com/). - - -The required arguments to establish a connection are, -* `api_token`: API key for acessing the Notion instance. - -NOTE: To access any database or page, the `api_token` must be added as it's connection. To add that as a connection, navigate to the page/database settings and add connection and enter the name of the api token integration. - -## Usage -In order to make use of this handler and connect to an Notion in MindsDB, the following syntax can be used, - -```sql -CREATE DATABASE notion_source -WITH -engine='notion', -parameters={ - "api_token": "", -}; -``` - -## Implemented Features - -Now, you can use this established connection to query your table as follows: - -### Database - -[Databases](https://developers.notion.com/reference/database) are like a collection of pages. It also has properties allowing to store and organize data in a structured way. - -```sql -SELECT * FROM notion_test.database where database_id=''; -``` - -NOTE: In order to fetch database from notion, mindsdb will require the database_id. - -> To get a database_id, just navigate to the database web page and copy the URL, the id then is the portion between the `/` and `?` -> -> Example: -> -> link: `https://www.notion.so/b144844d418f404a8f0f1da2a63cdf7c?v=25544d8fe08f4b06a4cecceabc49ff6a&pvs=4` -> -> Then the database_id is `b144844d418f404a8f0f1da2a63cdf7c` - -### Pages - -A [Page](https://developers.notion.com/reference/page) is a collection of blocks. It also has properties(if in a database) allowing to store and organize data in a structured way. - -```sql -SELECT * FROM notion_test.pages where page_id=''; -``` - -NOTE: In order to fetch a page from notion, mindsdb will require the page_id. - -> To get the page_id, copy the URL and select the id comming after the `title` of the page and the `-` . -> -> Example: -> -> link: `https://www.notion.so/From-Mindsdb-10eeb618d598468ba81c8d83da9613ff` -> -> Then the page_id is `10eeb618d598468ba81c8d83da9613ff` - -### Blocks - -A [Block](https://developers.notion.com/reference/block) is a piece of content in a page. It could be anything from a paragraph, link, image, table, etc. - -```sql -SELECT * FROM notion_test.blocks where block_id=''; -``` - -NOTE: In order to fetch a block from notion, mindsdb will require the block_id. - -> To get the block id, right clicking the block will open a options tray and click `Copy link to block` this will provide the link to the block for that page. Then the block id is the fragment in that link i.e. after the part after the `#` in the link -> -> Example -> -> link: `https://www.notion.so/From-Mindsdb-10eeb618d598468ba81c8d83da9613ff?pvs=4#3666b704803140e2b60cadfcc59cf66a` -> -> Then the block_id is `3366b704803140e2b60cadfcc59cf66a` - -### Comments for a Block - -```sql -SELECT * FROM notion_test.comments where block_id=''; -``` - -### Create a page in a database - -```sql -INSERT INTO notion_test.pages (title, text, database_id) -VALUES ('Hello World', 'MindsDB is awesome. You should check it out', ''); -``` - -### Run native query as a python function - -```sql -SELECT * FROM notion_test -( - databases.query(database_id=b144844d418f404a8f0f1da2a63cdf7c) -); -``` diff --git a/mindsdb/integrations/handlers/notion_handler/__about__.py b/mindsdb/integrations/handlers/notion_handler/__about__.py deleted file mode 100644 index 348d692b4bd..00000000000 --- a/mindsdb/integrations/handlers/notion_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Notion handler" -__package_name__ = "mindsdb_notion_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Notion" -__author__ = "Meet Gor" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023-mindsdb" diff --git a/mindsdb/integrations/handlers/notion_handler/__init__.py b/mindsdb/integrations/handlers/notion_handler/__init__.py deleted file mode 100644 index 7f7b85d1df2..00000000000 --- a/mindsdb/integrations/handlers/notion_handler/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE -from .__about__ import __version__ as version, __description__ as description -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - -try: - from .notion_handler import NotionHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - - -title = "Notion" -name = "notion" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/notion_handler/icon.svg b/mindsdb/integrations/handlers/notion_handler/icon.svg deleted file mode 100644 index d1fa9901b84..00000000000 --- a/mindsdb/integrations/handlers/notion_handler/icon.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/notion_handler/notion_handler.py b/mindsdb/integrations/handlers/notion_handler/notion_handler.py deleted file mode 100644 index 17591298647..00000000000 --- a/mindsdb/integrations/handlers/notion_handler/notion_handler.py +++ /dev/null @@ -1,238 +0,0 @@ -import time -import pandas as pd -from collections import defaultdict - -from notion_client import Client - -from mindsdb.utilities import log -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse, - RESPONSE_TYPE, -) -from .notion_table import ( - NotionBlocksTable, - NotionCommentsTable, - NotionDatabaseTable, - NotionPagesTable, -) - -logger = log.getLogger(__name__) - - -class NotionHandler(APIHandler): - name = "notion" - - def __init__(self, name: str, **kwargs): - """constructor - Args: - name (str): the handler name - """ - super().__init__(name) - - self.connection_args = kwargs.get("connection_data", {}) - self.key = "api_token" - - # set the api token from the args in create query - if self.key in self.connection_args: - self.connection_args[self.key] = self.connection_args[self.key] - - self.api = None - self.is_connected = False - - notion_database_data = NotionDatabaseTable(self) - notion_pages_data = NotionPagesTable(self) - notion_comments_data = NotionCommentsTable(self) - notion_blocks_data = NotionBlocksTable(self) - - self._register_table("database", notion_database_data) - self._register_table("pages", notion_pages_data) - self._register_table("blocks", notion_blocks_data) - self._register_table("comments", notion_comments_data) - - def connect(self, args=None, **kwargs): - api_token = self.connection_args[self.key] - notion = Client(auth=api_token) - self.is_connected = True - return notion - - def check_connection(self) -> StatusResponse: - response = StatusResponse(False) - - try: - self.connect() - response.success = True - - except Exception as e: - response.error_message = ( - f"Error connecting to Notion api: {e}. Check api_token" - ) - logger.error(response.error_message) - response.success = False - - if response.success is False: - self.is_connected = False - - return response - - def native_query(self, query: str = None) -> HandlerResponse: - method_name, param = query.split("(") - params = dict() - # parse the query as a python function - for map in param.strip(")").split(","): - if map: - k, v = map.split("=") - params[k] = v - - df = self.call_notion_api(method_name, params) - return HandlerResponse(RESPONSE_TYPE.TABLE, data_frame=df) - - def _apply_filters(self, data, filters): - if not filters: - return data - - data2 = [] - for row in data: - add = False - for op, key, value in filters: - value2 = row.get(key) - if isinstance(value, int): - value = str(value) - - if op in ("!=", "<>"): - if value == value2: - break - elif op in ("==", "="): - if value != value2: - break - elif op == "in": - if not isinstance(value, list): - value = [value] - if value2 not in value: - break - elif op == "not in": - if not isinstance(value, list): - value = [value] - if value2 in value: - break - else: - raise NotImplementedError(f"Unknown filter: {op}") - add = True - if add: - data2.append(row) - return data2 - - def call_notion_api( - self, method_name: str = None, params: dict = None, filters: list = None - ): - # method > table > columns - expansions_map = { - "database": { - "page": ["id", "url", "properties"], - }, - "page": { - "properties": ["Name"], - }, - } - - self.api = self.connect() - # use the service as the resource to query(database, page, block, comment) - # and query as the type of method(retrieve, list, query) - parts = method_name.split(".") - if len(parts) == 2: - service, query = parts - if service in ["databases", "pages", "blocks", "comments"]: - method = getattr(self.api, service) - method = getattr(method, query) - else: - service, children, query = parts - if service in ["blocks"]: - method = getattr(self.api, service) - method = getattr(method, children) - method = getattr(method, query) - - count_results = None - data = [] - includes = defaultdict(list) - - max_page_size = 100 - left = None - - limit_exec_time = time.time() + 60 - - if filters: - # if we have filters: do big page requests - params["max_results"] = max_page_size - - chunk = [] - while True: - if time.time() > limit_exec_time: - raise RuntimeError("Handler request timeout error") - - if count_results is not None: - left = count_results - len(data) - if left == 0: - break - elif left < 0: - # got more results that we need - data = data[:left] - break - - logger.debug(f">>>notion in: {method_name}({params})") - - resp = method(**params) - - if hasattr(resp, "includes"): - for table, records in resp.includes.items(): - includes[table].extend([r.data for r in records]) - if resp.get("results"): - # database and comment api has list of results - if isinstance(resp["results"], list): - chunk = [r for r in resp["results"]] - else: - if resp.get("object") in ["page", "block"]: - chunk = [resp] - - if filters: - chunk = self._apply_filters(chunk, filters) - - # limit output - if left is not None: - chunk = chunk[:left] - - data.extend(chunk) - if ( - count_results is not None - and hasattr(resp, "meta") - and "next_token" in resp.meta - ): - params["next_token"] = resp.meta["next_token"] - else: - break - - df = pd.DataFrame(data) - - # enrich - expansions = expansions_map.get(method_name) - if expansions is not None: - for table, records in includes.items(): - df_ref = pd.DataFrame(records) - - if table not in expansions: - continue - - for col_id in expansions[table]: - col = col_id[:-3] # cut _id - if col_id not in df.columns: - continue - - col_map = { - col_ref: f"{col}_{col_ref}" for col_ref in df_ref.columns - } - df_ref2 = df_ref.rename(columns=col_map) - df_ref2 = df_ref2.drop_duplicates(col_id) - - df = df.merge(df_ref2, on=col_id, how="left") - - return df diff --git a/mindsdb/integrations/handlers/notion_handler/notion_table.py b/mindsdb/integrations/handlers/notion_handler/notion_table.py deleted file mode 100644 index 7bc0572a90c..00000000000 --- a/mindsdb/integrations/handlers/notion_handler/notion_table.py +++ /dev/null @@ -1,422 +0,0 @@ -import json -import pandas as pd - -from mindsdb_sql_parser import ast - -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import HandlerResponse as Response - - -logger = log.getLogger(__name__) - - -class NotionDatabaseTable(APITable): - def select(self, query: ast.Select) -> Response: - conditions = extract_comparison_conditions(query.where) - - params = {} - filters = [] - for op, arg1, arg2 in conditions: - if op == "or": - raise NotImplementedError("OR is not supported") - - if arg1 == "database_id": - if op == "=": - params[arg1] = arg2 - else: - NotImplementedError(f"Unknown op: {op}") - - else: - filters.append([op, arg1, arg2]) - - # fetch a particular database with the given id - # additionally filter the results - result = self.handler.call_notion_api( - method_name="databases.query", params=params, filters=filters - ) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - return result - - def get_columns(self): - return [ - "id", - "created_time", - "last_edited_time", - "created_by", - "last_edited_by", - "cover", - "icon", - "parent", - "archived", - "properties", - "url", - "public_url", - ] - - def insert(self, query: ast.Insert): - columns = [col.name for col in query.columns] - - insert_params = ("api_token",) - for p in insert_params: - if p not in self.handler.connection_args: - raise Exception( - f"To insert data into Notion, you need to provide the following parameters when connecting it to MindsDB: {insert_params}" - ) # noqa - - for row in query.values: - params = dict(zip(columns, row)) - - # parent and properties as required params for creating a database - params["parent"] = json.loads(params["parent"]) - params["properties"] = json.loads(params["properties"]) - params["title"] = json.loads(params.get("title", "{}")) - - self.handler.call_notion_api("databases.create", params) - - -class NotionPagesTable(APITable): - def select(self, query: ast.Select) -> Response: - conditions = extract_comparison_conditions(query.where) - - params = {} - filters = [] - for op, arg1, arg2 in conditions: - if op == "or": - raise NotImplementedError("OR is not supported") - - if arg1 == "page_id": - if op == "=": - params[arg1] = arg2 - else: - raise NotImplementedError - - else: - filters.append([op, arg1, arg2]) - - if "query" not in params: - # search not works without query, use 'mindsdb' - params["query"] = "mindsdb" - - # fetch a particular page with the given id - result = self.handler.call_notion_api( - method_name="pages.retrieve", params=params, filters=filters - ) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - return result - - def get_columns(self): - return [ - "id", - "object", - "created_time", - "last_edited_time", - "created_by", - "last_edited_by", - "cover", - "icon", - "parent", - "archived", - "properties", - "url", - "public_url", - ] - - def insert(self, query: ast.Insert): - columns = [col.name for col in query.columns] - - insert_params = ("api_token",) - for p in insert_params: - if p not in self.handler.connection_args: - raise Exception( - f"To insert data into Notion, you need to provide the following parameters when connecting it to MindsDB: {insert_params}" - ) # noqa - - for row in query.values: - params = dict(zip(columns, row)) - - # title and database_id as required params for creating the page - # optionally provide the text to populate the page - title = params["title"] - text = params.get("text", "") - - messages = [] - - # the last message - if text.strip() != "": - messages.append(text.strip()) - - len_messages = len(messages) - for i, text in enumerate(messages): - if i < len_messages - 1: - text += "..." - else: - text += " " - - params["parent"] = {"database_id": params["database_id"]} - params["properties"] = { - "Name": { - "title": [ - { - "text": { - "content": title, - }, - }, - ], - }, - } - params["children"] = [ - { - "object": "block", - "type": "paragraph", - "paragraph": { - "rich_text": [ - { - "type": "text", - "text": { - "content": text, - }, - } - ] - }, - } - ] - - self.handler.call_notion_api("pages.create", params) - - -class NotionBlocksTable(APITable): - def select(self, query: ast.Select) -> Response: - conditions = extract_comparison_conditions(query.where) - - params = {} - filters = [] - for op, arg1, arg2 in conditions: - if op == "or": - raise NotImplementedError("OR is not supported") - - if arg1 == "block_id": - if op == "=": - params[arg1] = arg2 - else: - NotImplementedError(f"Unknown op: {op}") - - else: - filters.append([op, arg1, arg2]) - - # fetch a particular block with the given id - result = self.handler.call_notion_api( - method_name="blocks.retrieve", params=params, filters=filters - ) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - return result - - def get_columns(self): - # most of the columns will remain NULL as a block can be of a single type - return [ - "object", - "id", - "parent", - "has_children", - "created_time", - "last_edited_time", - "created_by", - "last_edited_by", - "archived", - "type", - "bookmark", - "breadcrumb", - "bulleted_list_item", - "callout", - "child_database", - "child_page", - "column", - "column_list", - "divider", - "embed", - "equation", - "file", - "heading_1", - "heading_2", - "heading_3", - "image", - "link_preview", - "link_to_page", - "numbered_list_item", - "paragraph", - "pdf", - "quote", - "synced_block", - "table", - "table_of_contents", - "table_row", - "template", - "to_do", - "toggle", - "unsupported", - "video", - ] - - def insert(self, query: ast.Insert): - columns = [col.name for col in query.columns] - - insert_params = ("api_token",) - for p in insert_params: - if p not in self.handler.connection_args: - raise Exception( - f"To insert data into Notion, you need to provide the following parameters when connecting it to MindsDB: {insert_params}" - ) # noqa - - for row in query.values: - params = dict(zip(columns, row)) - - # block_id and children as required params for appending to a block - params["block_id"] = params["block_id"] - params["children"] = json.loads(params["children"]) - params["after"] = params.get("after", "") - - self.handler.call_notion_api("blocks.children.append", params) - - -class NotionCommentsTable(APITable): - def select(self, query: ast.Select) -> Response: - conditions = extract_comparison_conditions(query.where) - - params = {} - filters = [] - for op, arg1, arg2 in conditions: - if op == "or": - raise NotImplementedError("OR is not supported") - - if arg1 == "block_id": - if op == "=": - params[arg1] = arg2 - else: - NotImplementedError(f"Unknown op: {op}") - - else: - filters.append([op, arg1, arg2]) - - # list all the unresolved comments for a given block id - result = self.handler.call_notion_api( - method_name="comments.list", params=params, filters=filters - ) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - return result - - def get_columns(self): - return [ - "id", - "object", - "parent", - "discussion_id", - "created_time", - "last_edited_time", - "created_by", - "rich_text", - ] diff --git a/mindsdb/integrations/handlers/notion_handler/requirements.txt b/mindsdb/integrations/handlers/notion_handler/requirements.txt deleted file mode 100644 index 0665ab90c83..00000000000 --- a/mindsdb/integrations/handlers/notion_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -notion-client \ No newline at end of file diff --git a/mindsdb/integrations/handlers/notion_handler/tests/__init__.py b/mindsdb/integrations/handlers/notion_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/notion_handler/tests/test_notion_handler.py b/mindsdb/integrations/handlers/notion_handler/tests/test_notion_handler.py deleted file mode 100644 index a0cf260eadd..00000000000 --- a/mindsdb/integrations/handlers/notion_handler/tests/test_notion_handler.py +++ /dev/null @@ -1,62 +0,0 @@ -import unittest - -from mindsdb_sql_parser import parse_sql - -from mindsdb.integrations.handlers.notion_handler.notion_handler import NotionHandler -from mindsdb.integrations.handlers.notion_handler.notion_table import ( - NotionDatabaseTable, - NotionBlocksTable, - NotionCommentsTable, - NotionPagesTable, -) - - -class NotionHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "api_token": "secret_KHTlOzUN5fIwVlb1euOpBa4lwcJA7jEALoBGDRrlATx" - } - } - cls.handler = NotionHandler("test_notion_handler", **cls.kwargs) - cls.db_table = NotionDatabaseTable(cls.handler) - cls.pages_table = NotionPagesTable(cls.handler) - cls.blocks_table = NotionBlocksTable(cls.handler) - cls.comments_table = NotionCommentsTable(cls.handler) - - def test_check_connection(self): - status = self.handler.check_connection() - self.assertTrue(status.success) - - def test_select_database(self): - query = "SELECT * FROM database WHERE database_id = '21510b8a953c4d62958c9907f3cf9f87'" - ast = parse_sql(query) - res = self.db_table.select(ast) - self.assertFalse(res.empty) - - def test_select_page(self): - query = "SELECT * FROM pages WHERE page_id = '70f28e55416b4dfe8588aa175ecae63a'" - ast = parse_sql(query) - res = self.pages_table.select(ast) - self.assertFalse(res.empty) - - def test_select_blocks(self): - query = ( - "SELECT * FROM blocks WHERE block_id = '6d1480e0bf4b46e1a71be093f105d654'" - ) - ast = parse_sql(query) - res = self.blocks_table.select(ast) - self.assertFalse(res.empty) - - def test_select_comment(self): - query = ( - "SELECT * FROM comments where block_id = '169fa742a8374fe9a516caecfb33432a'" - ) - ast = parse_sql(query) - res = self.comments_table.select(ast) - self.assertFalse(res.empty) - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/npm_handler/README.md b/mindsdb/integrations/handlers/npm_handler/README.md deleted file mode 100644 index 67be1504817..00000000000 --- a/mindsdb/integrations/handlers/npm_handler/README.md +++ /dev/null @@ -1,44 +0,0 @@ -# NPM Handler - -This handler allows you to interact with the data that [npms.io](https://npms.io) stores stats about various NPM packages. - -## About NPM - -The free npm Registry has become the center of JavaScript code sharing, and with more than two million packages, the largest software registry in the world. - -## NPM Handler Implementation - -This implementation is based on the API service provided at [api.npms.io](https://api.npms.io/). - -## NPM Handler Initialization - -There is nothing needed to be passed in the database initialization process. You can create the database via the following flow. - -```sql -CREATE DATABASE npm_datasource -WITH ENGINE = 'npm'; -``` - -To select from various tables, you can use `SELECT` statement. You must provide a package for this to work. - -```sql -SELECT * FROM npm_datasource.dependencies -WHERE package="handlebars"; -``` - -```sql -SELECT username FROM npm_datasource.maintainers -WHERE package="handlebars"; -``` - -Note that some of the stats can be slightly outdated. - -## Available tables - -- `metadata`: things like name, description, license, etc. -- `maintainers`: list of maintainers and their emails -- `keywords`: keywords associated with the package -- `dependencies`: dependencies of the package -- `dev_dependencies`: development dependencies of the package -- `optional_dependencies`: optional dependencies of the package -- `github_stats`: some github stats like number of stars, forks etc. diff --git a/mindsdb/integrations/handlers/npm_handler/__about__.py b/mindsdb/integrations/handlers/npm_handler/__about__.py deleted file mode 100644 index a4cb1195427..00000000000 --- a/mindsdb/integrations/handlers/npm_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB NPM handler" -__package_name__ = "mindsdb_npm_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for NPM" -__author__ = "Aditya Azad" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/npm_handler/__init__.py b/mindsdb/integrations/handlers/npm_handler/__init__.py deleted file mode 100644 index b64c5b074fb..00000000000 --- a/mindsdb/integrations/handlers/npm_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version - -try: - from .npm_handler import NPMHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "NPM" -name = "npm" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/npm_handler/api.py b/mindsdb/integrations/handlers/npm_handler/api.py deleted file mode 100644 index 4212cbbca9b..00000000000 --- a/mindsdb/integrations/handlers/npm_handler/api.py +++ /dev/null @@ -1,26 +0,0 @@ -import requests - - -class NPM: - - def __init__(self, package_name: str): - resp = requests.get("https://api.npms.io/v2/package/" + package_name) - if not resp or resp.status_code != 200: - raise Exception(f"Unable to get package datails: '{package_name}'") - self.data = resp.json() - - def get_data(self): - return self.data - - @staticmethod - def is_connected(): - return True if requests.get("https://api.npms.io/v2/search?q=a&size=1").status_code == 200 else False - - def get_cols_in(self, path, cols): - curr_root = self.data - for p in path: - curr_root = curr_root[p] - req_cols = {} - for col in cols: - req_cols[col] = curr_root[col] if col in curr_root else {} - return req_cols diff --git a/mindsdb/integrations/handlers/npm_handler/icon.svg b/mindsdb/integrations/handlers/npm_handler/icon.svg deleted file mode 100644 index 0908809d21e..00000000000 --- a/mindsdb/integrations/handlers/npm_handler/icon.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/npm_handler/npm_handler.py b/mindsdb/integrations/handlers/npm_handler/npm_handler.py deleted file mode 100644 index ac8faf7b685..00000000000 --- a/mindsdb/integrations/handlers/npm_handler/npm_handler.py +++ /dev/null @@ -1,53 +0,0 @@ -from mindsdb_sql_parser import parse_sql - -from mindsdb.integrations.handlers.npm_handler.api import NPM -from mindsdb.integrations.handlers.npm_handler.npm_tables import ( - NPMMetadataTable, - NPMMaintainersTable, - NPMKeywordsTable, - NPMDependenciesTable, - NPMDevDependenciesTable, - NPMOptionalDependenciesTable, - NPMGithubStatsTable, -) -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse - - -class NPMHandler(APIHandler): - - def __init__(self, name: str, **kwargs) -> None: - super().__init__(name) - self.connection = None - self.is_connected = False - _tables = [ - NPMMetadataTable, - NPMMaintainersTable, - NPMKeywordsTable, - NPMDependenciesTable, - NPMDevDependenciesTable, - NPMOptionalDependenciesTable, - NPMGithubStatsTable, - ] - for Table in _tables: - self._register_table(Table.name, Table(self)) - - def check_connection(self) -> StatusResponse: - """Check if connected""" - response = StatusResponse(False) - if NPM.is_connected(): - response.success = True - else: - response.success = False - self.is_connected = True - return response - - def connect(self) -> NPM: - """Make connection object""" - self.connection = NPM - return self.connection - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query""" - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/npm_handler/npm_tables.py b/mindsdb/integrations/handlers/npm_handler/npm_tables.py deleted file mode 100644 index 72e6bdef795..00000000000 --- a/mindsdb/integrations/handlers/npm_handler/npm_tables.py +++ /dev/null @@ -1,242 +0,0 @@ -from typing import List - -import pandas as pd -from mindsdb_sql_parser import ast - -from mindsdb.integrations.utilities.handlers.query_utilities import ( - SELECTQueryExecutor, - SELECTQueryParser, -) -from mindsdb.integrations.libs.api_handler import APIHandler, APITable -from mindsdb.integrations.utilities.sql_utils import conditions_to_filter - - -def rename_key(d, new_key, old_key): - d[new_key] = d.pop(old_key) - - -class CustomAPITable(APITable): - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.handler.connect() - - def get_columns(self, ignore: List[str] = []) -> List[str]: - return [item for item in self.columns if item not in ignore] - - def select(self, query: ast.Select) -> pd.DataFrame: - raise NotImplementedError() - - def parse_select(self, query: ast.Select, table_name: str): - select_statement_parser = SELECTQueryParser(query, table_name, self.get_columns()) - self.selected_columns, self.where_conditions, self.order_by_conditions, self.result_limit = select_statement_parser.parse_query() - - def get_package_name(self, query: ast.Select): - params = conditions_to_filter(query.where) - if "package" not in params: - raise Exception("Where condition does not have 'package' selector") - return params["package"] - - def apply_query_params(self, df, query): - select_statement_parser = SELECTQueryParser(query, self.name, self.get_columns()) - selected_columns, _, order_by_conditions, result_limit = select_statement_parser.parse_query() - select_statement_executor = SELECTQueryExecutor(df, selected_columns, [], order_by_conditions, result_limit) - return select_statement_executor.execute_query() - - -class NPMMetadataTable(CustomAPITable): - name: str = "metadata" - columns: List[str] = [ - "name", - "scope", - "version", - "description", - "author_name", - "author_email", - "publisher_username", - "publisher_email", - "repository_url", - "license", - "num_releases", - "num_downloads", - "num_stars", - "score", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - package_name = self.get_package_name(query) - connection = self.handler.connection(package_name) - metadata = connection.get_cols_in( - ["collected", "metadata"], - ["name", "scope", "version", "description", "author", "publisher", "repository", "license", "releases"] - ) - metadata["author_email"] = metadata["author"].get("email", "") - metadata["author"] = metadata["author"].get("name", "") - rename_key(metadata, "author_name", "author") - metadata["publisher_email"] = metadata["publisher"].get("email", "") - metadata["publisher"] = metadata["publisher"].get("username", "") - rename_key(metadata, "publisher_username", "publisher") - metadata["repository"] = metadata["repository"].get("url", "") - rename_key(metadata, "repository_url", "repository") - metadata["releases"] = sum([x.get("count", 0) for x in metadata.get("releases", [0])]) - rename_key(metadata, "num_releases", "releases") - npm_data = connection.get_cols_in( - ["collected", "npm"], - ["downloads", "starsCount"] - ) - npm_data["downloads"] = sum([x.get("count", 0) for x in npm_data.get("downloads", [0])]) - rename_key(npm_data, "num_downloads", "downloads") - rename_key(npm_data, "num_stars", "starsCount") - score = connection.get_cols_in(["score"], ["final"])["final"] - df = pd.DataFrame.from_records([{**metadata, **npm_data, "score": score}]) - return self.apply_query_params(df, query) - - -class NPMMaintainersTable(CustomAPITable): - name: str = "maintainers" - columns: List[str] = [ - "username", - "email" - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - package_name = self.get_package_name(query) - connection = self.handler.connection(package_name) - metadata = connection.get_cols_in( - ["collected", "metadata"], - ["maintainers"] - ) - records = [{col: x[col] for col in self.columns} for x in metadata["maintainers"]] if metadata.get("maintainers") else [{}] - df = pd.DataFrame.from_records(records) - return self.apply_query_params(df, query) - - -class NPMKeywordsTable(CustomAPITable): - name: str = "keywords" - columns: List[str] = [ - "keyword" - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - package_name = self.get_package_name(query) - connection = self.handler.connection(package_name) - metadata = connection.get_cols_in( - ["collected", "metadata"], - ["keywords"] - ) - records = [{"keyword": keyword} for keyword in metadata["keywords"]] if metadata.get("keywords") else [{}] - df = pd.DataFrame.from_records(records) - return self.apply_query_params(df, query) - - -class NPMDependenciesTable(CustomAPITable): - name: str = "dependencies" - columns: List[str] = [ - "dependency", - "version" - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - package_name = self.get_package_name(query) - connection = self.handler.connection(package_name) - metadata = connection.get_cols_in( - ["collected", "metadata"], - ["dependencies"] - ) - records = [{"dependency": d, "version": v} for d, v in metadata["dependencies"].items()] if metadata.get("dependencies") else [{}] - df = pd.DataFrame.from_records(records) - return self.apply_query_params(df, query) - - -class NPMDevDependenciesTable(CustomAPITable): - name: str = "dev_dependencies" - columns: List[str] = [ - "dev_dependency", - "version" - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - package_name = self.get_package_name(query) - connection = self.handler.connection(package_name) - metadata = connection.get_cols_in( - ["collected", "metadata"], - ["devDependencies"] - ) - records = [{"dev_dependency": d, "version": v} for d, v in metadata["devDependencies"].items()] if metadata.get("devDependencies") else [{}] - df = pd.DataFrame.from_records(records) - return self.apply_query_params(df, query) - - -class NPMOptionalDependenciesTable(CustomAPITable): - name: str = "optional_dependencies" - columns: List[str] = [ - "optional_dependency", - "version" - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - package_name = self.get_package_name(query) - connection = self.handler.connection(package_name) - metadata = connection.get_cols_in( - ["collected", "metadata"], - ["optionalDependencies"] - ) - records = [{"optional_dependency": d, "version": v} for d, v in metadata["optionalDependencies"].items()] if metadata.get("optionalDependencies") else [{}] - df = pd.DataFrame.from_records(records) - return self.apply_query_params(df, query) - - -class NPMGithubStatsTable(CustomAPITable): - name: str = "github_stats" - columns: List[str] = [ - "homepage", - "num_stars", - "num_forks", - "num_subscribers", - "num_issues", - "num_open_issues", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - package_name = self.get_package_name(query) - connection = self.handler.connection(package_name) - github_data = connection.get_cols_in( - ["collected", "github"], - ["homepage", "starsCount", "forksCount", "subscribersCount", "issues"] - ) - rename_key(github_data, "num_stars", "starsCount") - rename_key(github_data, "num_forks", "forksCount") - rename_key(github_data, "num_subscribers", "subscribersCount") - github_data["num_issues"] = github_data["issues"].get("count", 0) - github_data["issues"] = github_data["issues"].get("openCount", 0) - rename_key(github_data, "num_open_issues", "issues") - df = pd.DataFrame.from_records([github_data]) - return self.apply_query_params(df, query) diff --git a/mindsdb/integrations/handlers/npm_handler/tests/__init__.py b/mindsdb/integrations/handlers/npm_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/nuo_jdbc_handler/README.md b/mindsdb/integrations/handlers/nuo_jdbc_handler/README.md deleted file mode 100644 index d81ee01942d..00000000000 --- a/mindsdb/integrations/handlers/nuo_jdbc_handler/README.md +++ /dev/null @@ -1,44 +0,0 @@ -# NuoDB Handler - -This is the implementation of the NuoDB handler for MindsDB. - -## NuoDB -NuoDB delivers consistent, resilient distributed SQL for your mission critical applications, so you can deploy on premises, in public or private clouds, in hybrid environments, or across clouds. -NuoDB is the distributed SQL database designed to meet the rapidly evolving demands of today’s enterprises, scale on demand, eliminate downtime and reduce total cost of ownership. All while maintaining SQL compatibility. - -## Implementation -This handler was implemented using the JDBC driver provided by NuoDB. To establish connection with the database, `JayDeBeApi` library is used. The `JayDeBeApi` module allows you to connect from Python code to databases using Java JDBC. It provides a Python DB-API v2.0 to that database. - -The required arguments to establish a connection are, -* `host`: host to server IP Address or hostname -* `port`: port through which TCPIP connection is to be made -* `database`: Database name to be connected -* `user`: The username to authenticate with the NuoDB server. -* `password`: The password to authenticate the user with the NuoDB server. -* `is_direct`: This argument indicates whether a direct connection to the TE is to be attempted. - -Other optional arguments are, -* `schema`: The schema name to use when connecting with the NuoDB. -* `jar_location`: The location of the jar files which contain the JDBC class. This need not be specified if the required classes are already added to the CLASSPATH variable. -* `driver_args`: The extra arguments which can be specified to the driver. Specify this in the format: "arg1=value1,arg2=value2. -More information on the supported parameters can be found at: https://doc.nuodb.com/nuodb/latest/deployment-models/physical-or-vmware-environments-with-nuodb-admin/reference-information/connection-properties/ - -## Usage -In order to make use of this handler and connect to Apache Derby in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE nuo_datasource -WITH engine='nuo_jdbc', -parameters={ - "host": "localhost", - "port": "48006", - "database": "test", - "schema": "hockey", - "user": "dba", - "password": "goalie", - "is_direct": "true", -}; -~~~~ -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM nuo_datasource.PLAYERS; -~~~~ diff --git a/mindsdb/integrations/handlers/nuo_jdbc_handler/__about__.py b/mindsdb/integrations/handlers/nuo_jdbc_handler/__about__.py deleted file mode 100644 index f9cb6d58d05..00000000000 --- a/mindsdb/integrations/handlers/nuo_jdbc_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Nuo DB handler' -__package_name__ = 'mindsdb_nuodb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Nuo DB" -__author__ = 'Kavel Baruah' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/nuo_jdbc_handler/__init__.py b/mindsdb/integrations/handlers/nuo_jdbc_handler/__init__.py deleted file mode 100644 index a86116fc37d..00000000000 --- a/mindsdb/integrations/handlers/nuo_jdbc_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .nuo_jdbc_handler import NuoHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - - -title = 'NuoDB' -name = 'nuo_jdbc' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/nuo_jdbc_handler/connection_args.py b/mindsdb/integrations/handlers/nuo_jdbc_handler/connection_args.py deleted file mode 100644 index 92e90159733..00000000000 --- a/mindsdb/integrations/handlers/nuo_jdbc_handler/connection_args.py +++ /dev/null @@ -1,65 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the NuoDB AP or TE. If is_direct is set to true then provide the TE IP else provide the AP IP.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'Specify port to connect to NuoDB. If is_direct is set to true then provide the TE port else provide the AP port.' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': """ - The database name to use when connecting with the NuoDB. - """ - }, - schema={ - 'type': ARG_TYPE.STR, - 'description': """ - The schema name to use when connecting with the NuoDB. - """ - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The username to authenticate with the NuoDB server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the NuoDB server.', - 'secret': True - }, - is_direct={ - 'type': ARG_TYPE.STR, - 'description': 'This argument indicates whether a direct connection to the TE is to be attempted.' - }, - jar_location={ - 'type': ARG_TYPE.STR, - 'description': 'The location of the jar files which contain the JDBC class. This need not be specified if the required classes are already added to the CLASSPATH variable.' - }, - driver_args={ - 'type': ARG_TYPE.STR, - 'description': """ - The extra arguments which can be specified to the driver. - Specify this in the format: "arg1=value1,arg2=value2. - More information on the supported paramters can be found at: https://doc.nuodb.com/nuodb/latest/deployment-models/physical-or-vmware-environments-with-nuodb-admin/reference-information/connection-properties/' - """ - } -) - - -connection_args_example = OrderedDict( - host="localhost", - port="48006", - database="test", - schema="hockey", - user="dba", - password="goalie", - jar_location="/Users/kavelbaruah/Desktop/nuodb-jdbc-24.0.0.jar", - is_direct="true", - driver_args="schema=hockey,clientInfo=info" -) diff --git a/mindsdb/integrations/handlers/nuo_jdbc_handler/icon.svg b/mindsdb/integrations/handlers/nuo_jdbc_handler/icon.svg deleted file mode 100644 index 820f531f60e..00000000000 --- a/mindsdb/integrations/handlers/nuo_jdbc_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/nuo_jdbc_handler/nuo_jdbc_handler.py b/mindsdb/integrations/handlers/nuo_jdbc_handler/nuo_jdbc_handler.py deleted file mode 100644 index 4237a05c78c..00000000000 --- a/mindsdb/integrations/handlers/nuo_jdbc_handler/nuo_jdbc_handler.py +++ /dev/null @@ -1,220 +0,0 @@ -from typing import Optional -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) -import pandas as pd -import jaydebeapi as jdbcconnector - -logger = log.getLogger(__name__) - - -class NuoHandler(DatabaseHandler): - - name = 'nuo_jdbc' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ Initialize the handler - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - self.kwargs = kwargs - self.parser = parse_sql - self.database = connection_data['database'] - self.connection_config = connection_data - self.host = connection_data['host'] - self.port = connection_data['port'] - self.user = connection_data['user'] - self.is_direct = connection_data['is_direct'] - self.password = connection_data['password'] - self.connection = None - self.is_connected = False - self.schema = None - - self.jdbc_url = self.construct_jdbc_url() - - def connect(self): - """ Set up any connections required by the handler - Should return output of check_connection() method after attempting - connection. Should switch self.is_connected. - Returns: - Connection Object - """ - if self.is_connected is True: - return self.connection - - jdbc_class = "com.nuodb.jdbc.Driver" - jar_location = self.connection_config.get('jar_location') - - try: - if (jar_location): - self.connection = jdbcconnector.connect(jclassname=jdbc_class, url=self.jdbc_url, jars=jar_location) - else: - self.connection = jdbcconnector.connect(jclassname=jdbc_class, url=self.jdbc_url) - except Exception as e: - logger.error(f"Error while connecting to {self.database}, {e}") - - return self.connection - - def construct_jdbc_url(self): - """ Constructs the JDBC url based on the paramters provided to the handler class.\ - Returns: - The JDBC connection url string. - """ - - jdbc_url = "jdbc:com.nuodb://" + self.host - - # port is an optional paramter, if found then append - port = self.connection_config.get('port') - if port: - jdbc_url = jdbc_url + ":" + str(port) - - jdbc_url = jdbc_url + "/" + self.database + "?user=" + self.user + "&password=" + self.password - - # check if a schema is provided in the connection args, if provided use the schema to establish connection - schema = self.connection_config.get('schema') - if schema: - self.schema = schema - jdbc_url = jdbc_url + "&schema=" + schema - - # sets direct paramter only if the paramters is specified to be true - if (str(self.is_direct).lower() == 'true'): - jdbc_url = jdbc_url + "&direct=true" - - driver_args = self.connection_config.get('driver_args') - - # if driver args are present then construct them in the form: &query=one#qquerytwo=true - # finally append these to the url - if (driver_args): - driver_arg_string = '&'.join(driver_args.split(",")) - jdbc_url = jdbc_url + "&" + driver_arg_string - - return jdbc_url - - def disconnect(self): - """ Close any existing connections - Should switch self.is_connected. - """ - if self.is_connected is False: - return - try: - self.connection.close() - self.is_connected = False - except Exception as e: - logger.error(f"Error while disconnecting to {self.database}, {e}") - - return - - def check_connection(self) -> StatusResponse: - """ Check connection to the handler - Returns: - HandlerStatusResponse - """ - responseCode = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - responseCode.success = True - except Exception as e: - logger.error(f'Error connecting to database {self.database}, {e}!') - responseCode.error_message = str(e) - finally: - if responseCode.success is True and need_to_close: - self.disconnect() - if responseCode.success is False and self.is_connected is True: - self.is_connected = False - - return responseCode - - def native_query(self, query: str) -> StatusResponse: - """Receive raw query and act upon it somehow. - Args: - query (Any): query in native format (str for sql databases, - dict for mongo, etc) - Returns: - HandlerResponse - """ - need_to_close = self.is_connected is False - conn = self.connect() - with conn.cursor() as cur: - try: - cur.execute(query) - if cur.description: - result = cur.fetchall() - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, - columns=[x[0] for x in cur.description] - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - self.connection.commit() - except Exception as e: - logger.error(f'Error running query: {query} on {self.database}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - self.connection.rollback() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """Render and execute a SQL query. - - Args: - query (ASTNode): The SQL query. - - Returns: - Response: The query result. - """ - if isinstance(query, ASTNode): - query_str = query.to_string() - else: - query_str = str(query) - - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """Get a list of all the tables in the database. - - Returns: - Response: Names of the tables in the database. - """ - if self.schema: - query = f''' SELECT TABLENAME FROM SYSTEM.TABLES WHERE SCHEMA = '{self.schema}' ''' - else: - query = ''' SELECT TABLENAME FROM SYSTEM.TABLES WHERE SCHEMA != 'SYSTEM' ''' - - result = self.native_query(query) - df = result.data_frame - result.data_frame = df.rename(columns={df.columns[0]: 'table_name'}) - return result - - def get_columns(self, table_name: str) -> StatusResponse: - """Get details about a table. - - Args: - table_name (str): Name of the table to retrieve details of. - - Returns: - Response: Details of the table. - """ - - query = f''' SELECT FIELD FROM SYSTEM.FIELDS WHERE TABLENAME='{table_name}' ''' - return self.native_query(query) diff --git a/mindsdb/integrations/handlers/nuo_jdbc_handler/requirements.txt b/mindsdb/integrations/handlers/nuo_jdbc_handler/requirements.txt deleted file mode 100644 index 78d1c7fe94b..00000000000 --- a/mindsdb/integrations/handlers/nuo_jdbc_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -jaydebeapi diff --git a/mindsdb/integrations/handlers/nuo_jdbc_handler/tests/__init__.py b/mindsdb/integrations/handlers/nuo_jdbc_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/nuo_jdbc_handler/tests/test_nuo_handler.py b/mindsdb/integrations/handlers/nuo_jdbc_handler/tests/test_nuo_handler.py deleted file mode 100644 index d23564f9702..00000000000 --- a/mindsdb/integrations/handlers/nuo_jdbc_handler/tests/test_nuo_handler.py +++ /dev/null @@ -1,50 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.nuo_jdbc_handler.nuo_jdbc_handler import NuoHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class NuoHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "localhost", - "port": 48006, - "database": "test", - "schema": "hockey", - "user": "dba", - "password": "goalie", - "is_direct": "true", - } - } - cls.handler = NuoHandler('test_nuo_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_check_connection(self): - self.handler.check_connection() - - def test_2_create(self): - res = self.handler.query('CREATE TABLE TESTTABLEX3 (ID INT PRIMARY KEY, NAME VARCHAR(14))') - assert res.type is RESPONSE_TYPE.OK - - def test_3_insert(self): - res = self.handler.query("INSERT INTO TESTTABLEX3 VALUES (100,'ONE HUNDRED'),(200,'TWO HUNDRED'),(300,'THREE HUNDRED')") - assert res.type is RESPONSE_TYPE.OK - - def test_4_select(self): - res = self.handler.query('SELECT * FROM HOCKEY') - assert res.type is RESPONSE_TYPE.TABLE - - def test_5_get_tables(self): - res = self.handler.get_tables() - assert res.type is RESPONSE_TYPE.TABLE - - def test_6_get_columns(self): - res = self.handler.get_columns("HOCKEY") - assert res.type is RESPONSE_TYPE.TABLE - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/oceanbase_handler/README.md b/mindsdb/integrations/handlers/oceanbase_handler/README.md deleted file mode 100644 index f4929f4ecf2..00000000000 --- a/mindsdb/integrations/handlers/oceanbase_handler/README.md +++ /dev/null @@ -1,42 +0,0 @@ -# OceanBase Handler - -This is the implementation of the OceanBase Handler for MindsDB. - -## OceanBase -OceanBase Database is a distributed relational database. It has been supporting the Double 11 Shopping Festival for 9 years and is also the only distributed database in the world that has broken both TPC-C and TPC-H records. It has set forth a new standard of city-level disaster recovery solutions with five IDCs across three sites. OceanBase Database adopts an independently developed integrated architecture, which encompasses both the scalability of a distributed architecture and the performance advantage of a centralized architecture. OceanBase Database supports hybrid transaction/analytical processing (HTAP) with one engine. With features such as strong data consistency, high availability, high performance, online scalability, high compatibility with SQL and mainstream relational databases, transparency to applications, and a high cost/performance ratio, OceanBase Database has helped over 400 customers across industries upgrade their core systems. - -OceanBase Database features high business continuity, ease of use, low costs, and low risks. - -## Implementation - -This handler was implemented by extending mysql connector. - -The required arguments to establish a connection are: - -* `host`: the host name of the OceanBase connection -* `port`: the port to use when connecting -* `user`: the user to authenticate -* `password`: the password to authenticate the user -* `database`: database name - -## Usage - -In order to make use of this handler and connect to a OceanBase server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE oceanbase_datasource -WITH ENGINE = "oceanbase", -PARAMETERS = { - "user": "root", - "password": "", - "host": "localhost", - "port": 9030, - "database": "test" -} -``` - -Now, you can use this established connection to query your database as follows: - -```sql -SELECT * FROM oceanbase_datasource.LoveU LIMIT 10; -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/oceanbase_handler/__about__.py b/mindsdb/integrations/handlers/oceanbase_handler/__about__.py deleted file mode 100644 index aa7cc54e74d..00000000000 --- a/mindsdb/integrations/handlers/oceanbase_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB OceanBase handler' -__package_name__ = 'mindsdb_oceanbase_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for OceanBase" -__author__ = 'Parthiv Makwana' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/oceanbase_handler/__init__.py b/mindsdb/integrations/handlers/oceanbase_handler/__init__.py deleted file mode 100644 index e5938554a41..00000000000 --- a/mindsdb/integrations/handlers/oceanbase_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -try: - from .oceanbase_handler import OceanBaseHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = 'OceanBase' -name = 'oceanbase' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/oceanbase_handler/icon.svg b/mindsdb/integrations/handlers/oceanbase_handler/icon.svg deleted file mode 100644 index e834afd675e..00000000000 --- a/mindsdb/integrations/handlers/oceanbase_handler/icon.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/oceanbase_handler/oceanbase_handler.py b/mindsdb/integrations/handlers/oceanbase_handler/oceanbase_handler.py deleted file mode 100644 index f7675dc6b21..00000000000 --- a/mindsdb/integrations/handlers/oceanbase_handler/oceanbase_handler.py +++ /dev/null @@ -1,47 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE -from collections import OrderedDict - - -from mindsdb.integrations.handlers.mysql_handler import Handler as MysqlHandler - - -class OceanBaseHandler(MysqlHandler): - """ - This handler handles connection and execution of the OceanBase statements. - """ - name = 'oceanbase' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the OceanBase server.' - }, - password={ - 'type': ARG_TYPE.STR, - 'description': 'The password to authenticate the user with the OceanBase server.' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the OceanBase server.' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the OceanBase server. ' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the OceanBase server. Must be an integer.' - } -) - -connection_args_example = OrderedDict( - host='localhost', - port=9030, - user='root', - password='', - database='test' -) diff --git a/mindsdb/integrations/handlers/oceanbase_handler/requirements.txt b/mindsdb/integrations/handlers/oceanbase_handler/requirements.txt deleted file mode 100644 index ee467569031..00000000000 --- a/mindsdb/integrations/handlers/oceanbase_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/mysql_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/oceanbase_handler/tests/__init__.py b/mindsdb/integrations/handlers/oceanbase_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/oceanbase_handler/tests/test_oceanbase_handler.py b/mindsdb/integrations/handlers/oceanbase_handler/tests/test_oceanbase_handler.py deleted file mode 100644 index ae9f85c5e3a..00000000000 --- a/mindsdb/integrations/handlers/oceanbase_handler/tests/test_oceanbase_handler.py +++ /dev/null @@ -1,54 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.oceanbase_handler.oceanbase_handler import OceanBaseHandler -from mindsdb.integrations.libs.response import RESPONSE_TYPE - - -class OceanBaseHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "localhost", - "port": 9030, - "user": "root", - "password": "", - "database": "test", - } - } - cls.handler = OceanBaseHandler('test_oceanbase_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_connect(self): - assert self.handler.connect() - - def test_2_create_table(self): - query = "CREATE Table IF NOT EXISTS Lover(name varchar(101));" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_3_insert(self): - query = "INSERT INTO LOVER VALUES('Shiv Shakti');" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_4_native_query_select(self): - query = "SELECT * FROM LOVER;" - result = self.handler.query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is RESPONSE_TYPE.TABLE - - def test_6_get_columns(self): - columns = self.handler.get_columns('LOVER') - - query = "DROP Table IF EXISTS Lover;" - self.handler.query(query) - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/oilpriceapi_handler/README.md b/mindsdb/integrations/handlers/oilpriceapi_handler/README.md deleted file mode 100644 index 42382a6a4d0..00000000000 --- a/mindsdb/integrations/handlers/oilpriceapi_handler/README.md +++ /dev/null @@ -1,69 +0,0 @@ -# OilPriceAPI Handler - -OilPriceAPI handler for MindsDB provides interfaces to connect to OilPriceAPI via APIs and Oil Price data into MindsDB. - ---- - -## Table of Contents - -- [OilPriceAPI Handler](#oilpriceapi-handler) - - [Table of Contents](#table-of-contents) - - [About OilPriceAPI](#about-oilpriceapi) - - [OilPriceAPI Handler Implementation](#oilpriceapi-handler-implementation) - - [OilPriceAPI Handler Initialization](#oilpriceapi-handler-initialization) - - [Implemented Features](#implemented-features) - - [Example Usage](#example-usage) - ---- - -## About OilPriceAPI - -OilPriceAPI is a RESTful providing various endpoints and parameters for retrieving historical or live oil prices. - - -## OilPriceAPI Handler Implementation - -This handler was implemented using the `requests` library that makes http calls to https://docs.oilpriceapi.com/guide/#endpoints. - -## OilPriceAPI Handler Initialization - -The OilPriceAPI handler is initialized with the following parameters: - -- `api_key`: API Key used to authenticate with OilPriceAPI - -Read about creating an API Key [here](https://www.oilpriceapi.com/). - -## Implemented Features - -- [x] OilPriceAPI - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - -## Example Usage - -The first step is to create a database with the new `oilpriceapi` engine. - -~~~~sql -CREATE DATABASE mindsdb_oilpriceapi -WITH ENGINE = 'oilpriceapi', -PARAMETERS = { - "api_key": "" -}; -~~~~ - -Use the established connection to query your database: - -~~~~sql -SELECT * FROM mindsdb_oilpriceapi.latest_price; -~~~~ - -~~~~sql -SELECT * FROM mindsdb_oilpriceapi.latest_price where by_type="daily_average_price" and by_code="WTI_USD"; -~~~~ - -~~~~sql -SELECT * FROM mindsdb_oilpriceapi.past_day_price where by_type="daily_average_price" and by_code="WTI_USD"; -~~~~ diff --git a/mindsdb/integrations/handlers/oilpriceapi_handler/__about__.py b/mindsdb/integrations/handlers/oilpriceapi_handler/__about__.py deleted file mode 100644 index cf289ff94da..00000000000 --- a/mindsdb/integrations/handlers/oilpriceapi_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB OilPriceAPI handler" -__package_name__ = "mindsdb_oilpriceapi_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for OilPriceAPI" -__author__ = "Abhilash K R" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/oilpriceapi_handler/__init__.py b/mindsdb/integrations/handlers/oilpriceapi_handler/__init__.py deleted file mode 100644 index ca7aebd6941..00000000000 --- a/mindsdb/integrations/handlers/oilpriceapi_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .oilpriceapi_handler import OilPriceAPIHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Oil Price API" -name = "oilpriceapi" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", - "connection_args_example", - "connection_args", -] diff --git a/mindsdb/integrations/handlers/oilpriceapi_handler/connection_args.py b/mindsdb/integrations/handlers/oilpriceapi_handler/connection_args.py deleted file mode 100644 index 9895c5ca4da..00000000000 --- a/mindsdb/integrations/handlers/oilpriceapi_handler/connection_args.py +++ /dev/null @@ -1,18 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - api_key={ - "type": ARG_TYPE.PWD, - "description": "OilPriceAPI key to use for authentication.", - "required": True, - "label": "Api key", - "secret": True - } -) - -connection_args_example = OrderedDict( - api_key="" -) diff --git a/mindsdb/integrations/handlers/oilpriceapi_handler/icon.svg b/mindsdb/integrations/handlers/oilpriceapi_handler/icon.svg deleted file mode 100644 index eefe77018bd..00000000000 --- a/mindsdb/integrations/handlers/oilpriceapi_handler/icon.svg +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/oilpriceapi_handler/oilpriceapi.py b/mindsdb/integrations/handlers/oilpriceapi_handler/oilpriceapi.py deleted file mode 100644 index e380684bab1..00000000000 --- a/mindsdb/integrations/handlers/oilpriceapi_handler/oilpriceapi.py +++ /dev/null @@ -1,45 +0,0 @@ -import requests - - -class OilPriceAPIClient: - def __init__(self, api_key): - self.api_key = api_key - self.base_endpoint = "https://api.oilpriceapi.com/v1/prices" - self.valid_values_by_type = ["spot_price", "daily_average_price"] - self.valid_values_by_code = ["BRENT_CRUDE_USD", "WTI_USD"] - - def make_request(self, url, params={}): - headers = {'Content-type': 'application/json'} - if self.api_key: - headers['Authorization'] = 'Token ' + self.api_key - resp = requests.get(url, headers=headers, params=params) - content = {} - if resp.status_code == 200: - content = {'content': resp.json(), 'code': 200} - else: - content = {'content': {}, 'code': resp.status_code, 'error': resp.text} - return content - - def _is_valid_by_type(self, val): - return val in self.valid_values_by_type - - def _is_valid_by_code(self, val): - return val in self.valid_values_by_code - - def create_params_dict(self, by_type, by_code): - params = {} - if by_type is not None: - params["by_type"] = by_type - if by_code is not None: - params["by_code"] = by_code - return params - - def get_latest_price(self, by_type=None, by_code=None): - url = f'{self.base_endpoint}/latest/' - params = self.create_params_dict(by_type=by_type, by_code=by_code) - return self.make_request(url, params=params) - - def get_price_past_day(self, by_type=None, by_code=None): - url = f'{self.base_endpoint}/past_day/' - params = self.create_params_dict(by_type=by_type, by_code=by_code) - return self.make_request(url, params=params) diff --git a/mindsdb/integrations/handlers/oilpriceapi_handler/oilpriceapi_handler.py b/mindsdb/integrations/handlers/oilpriceapi_handler/oilpriceapi_handler.py deleted file mode 100644 index 93687ac94b2..00000000000 --- a/mindsdb/integrations/handlers/oilpriceapi_handler/oilpriceapi_handler.py +++ /dev/null @@ -1,100 +0,0 @@ -from mindsdb.integrations.handlers.oilpriceapi_handler.oilpriceapi_tables import ( - OilPriceLatestTable, - OilPricePastDayPriceTable -) -from mindsdb.integrations.handlers.oilpriceapi_handler.oilpriceapi import OilPriceAPIClient -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) - -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - - -logger = log.getLogger(__name__) - - -class OilPriceAPIHandler(APIHandler): - """The OilPriceAPI handler implementation""" - - def __init__(self, name: str, **kwargs): - """Initialize the OilPriceAPI handler. - - Parameters - ---------- - name : str - name of a handler instance - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.kwargs = kwargs - self.client = OilPriceAPIClient(self.connection_data["api_key"]) - self.is_connected = False - - latest_price_data = OilPriceLatestTable(self) - self._register_table("latest_price", latest_price_data) - - past_day_price_data = OilPricePastDayPriceTable(self) - self._register_table("past_day_price", past_day_price_data) - - def connect(self) -> StatusResponse: - """Set up the connection required by the handler. - - Returns - ------- - StatusResponse - connection object - """ - resp = StatusResponse(False) - status = self.client.get_latest_price() - if status["code"] != 200: - resp.success = False - resp.error_message = status["error"] - return resp - self.is_connected = True - return resp - - def check_connection(self) -> StatusResponse: - """Check connection to the handler. - - Returns - ------- - StatusResponse - Status confirmation - """ - response = StatusResponse(False) - - try: - status = self.client.get_latest_price() - if status["code"] == 200: - logger.info("Authentication successful") - response.success = True - else: - response.success = False - logger.info("Error connecting to OilPriceAPI. " + status["error"]) - response.error_message = status["error"] - except Exception as e: - logger.error(f"Error connecting to OilPriceAPI: {e}!") - response.error_message = e - - self.is_connected = response.success - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - - Parameters - ---------- - query : str - query in a native format - - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/oilpriceapi_handler/oilpriceapi_tables.py b/mindsdb/integrations/handlers/oilpriceapi_handler/oilpriceapi_tables.py deleted file mode 100644 index 9a9f0ee8540..00000000000 --- a/mindsdb/integrations/handlers/oilpriceapi_handler/oilpriceapi_tables.py +++ /dev/null @@ -1,208 +0,0 @@ -import pandas as pd -from typing import List -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor -from mindsdb.utilities import log -from mindsdb_sql_parser import ast - -logger = log.getLogger(__name__) - - -class OilPriceLatestTable(APITable): - """The Latest Oil Price Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://docs.oilpriceapi.com/guide/#prices-latest" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - latest oil price matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'latest_price', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - - for op, arg1, arg2 in where_conditions: - if arg1 == 'by_type': - if op == '=': - search_params["by_type"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for by_type column.") - - if not self.handler.client._is_valid_by_type(arg2): - raise ValueError("Unknown value for `by_type` parameter. The allowed values are - " + self.handler.client.valid_values_by_type) - - elif arg1 == 'by_code': - if op == '=': - search_params["by_code"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for by_code column.") - - if not self.handler.client._is_valid_by_code(arg2): - raise ValueError("Unknown value for `by_code` parameter. The allowed values are - " + self.handler.client.valid_values_by_code) - - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - latest_price_df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.client.get_latest_price(search_params.get("by_type"), search_params.get("by_code")) - - self.check_res(res=response) - - content = response["content"] - - latest_price_df = pd.json_normalize(content["data"]) - - select_statement_executor = SELECTQueryExecutor( - latest_price_df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - latest_price_df = select_statement_executor.execute_query() - - return latest_price_df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["error"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "price", - "formatted", - "currency", - "code", - "created_at", - "type" - ] - - -class OilPricePastDayPriceTable(APITable): - """The Past Day Oil Price Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://docs.oilpriceapi.com/guide/#prices-past-day" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - past day oil price matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'past_day_price', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - - for op, arg1, arg2 in where_conditions: - if arg1 == 'by_type': - if op == '=': - search_params["by_type"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for by_type column.") - - if not self.handler.client._is_valid_by_type(arg2): - raise ValueError("Unknown value for `by_type` parameter. The allowed values are - " + self.handler.client.valid_values_by_type) - - elif arg1 == 'by_code': - if op == '=': - search_params["by_code"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for by_code column.") - - if not self.handler.client._is_valid_by_code(arg2): - raise ValueError("Unknown value for `by_code` parameter. The allowed values are - " + self.handler.client.valid_values_by_code) - - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - price_df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.client.get_price_past_day(search_params.get("by_type"), search_params.get("by_code")) - - self.check_res(res=response) - - content = response["content"] - - price_df = pd.json_normalize(content["data"]["prices"]) - - select_statement_executor = SELECTQueryExecutor( - price_df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - price_df = select_statement_executor.execute_query() - - return price_df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["error"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "price", - "formatted", - "currency", - "code", - "created_at", - "type" - ] diff --git a/mindsdb/integrations/handlers/ollama_handler/__about__.py b/mindsdb/integrations/handlers/ollama_handler/__about__.py index d379f39e148..37799994782 100644 --- a/mindsdb/integrations/handlers/ollama_handler/__about__.py +++ b/mindsdb/integrations/handlers/ollama_handler/__about__.py @@ -1,9 +1,9 @@ -__title__ = 'MindsDB Ollama handler' -__package_name__ = 'mindsdb_ollama_handler' -__version__ = '0.0.1' +__title__ = "MindsDB Ollama handler" +__package_name__ = "mindsdb_ollama_handler" +__version__ = "0.0.1" __description__ = "MindsDB handler for Ollama" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' +__author__ = "MindsDB Inc" +__github__ = "https://github.com/mindsdb/mindsdb" +__pypi__ = "https://pypi.org/project/mindsdb/" +__license__ = "MIT" +__copyright__ = "Copyright 2023- mindsdb" diff --git a/mindsdb/integrations/handlers/ollama_handler/__init__.py b/mindsdb/integrations/handlers/ollama_handler/__init__.py index 806f750edb9..eea6a1903d6 100644 --- a/mindsdb/integrations/handlers/ollama_handler/__init__.py +++ b/mindsdb/integrations/handlers/ollama_handler/__init__.py @@ -1,19 +1,19 @@ from mindsdb.integrations.libs.const import HANDLER_TYPE from .__about__ import __version__ as version, __description__ as description + try: from .ollama_handler import OllamaHandler as Handler + import_error = None except Exception as e: Handler = None import_error = e -title = 'Ollama' -name = 'ollama' +title = "Ollama" +name = "ollama" type = HANDLER_TYPE.ML -icon_path = 'icon.png' +icon_path = "icon.png" permanent = False -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', 'import_error', 'icon_path' -] +__all__ = ["Handler", "version", "name", "type", "title", "description", "import_error", "icon_path"] diff --git a/mindsdb/integrations/handlers/ollama_handler/ollama_handler.py b/mindsdb/integrations/handlers/ollama_handler/ollama_handler.py index 5b03b2a1f68..639345933fa 100644 --- a/mindsdb/integrations/handlers/ollama_handler/ollama_handler.py +++ b/mindsdb/integrations/handlers/ollama_handler/ollama_handler.py @@ -14,38 +14,40 @@ class OllamaHandler(BaseMLEngine): @staticmethod def create_validation(target, args=None, **kwargs): - if 'using' not in args: + if "using" not in args: raise Exception("Ollama engine requires a USING clause! Refer to its documentation for more details.") else: - args = args['using'] + args = args["using"] - if 'model_name' not in args: - raise Exception('`model_name` must be provided in the USING clause.') + if "model_name" not in args: + raise Exception("`model_name` must be provided in the USING clause.") # check ollama service health - connection = args.get('ollama_serve_url', OllamaHandler.DEFAULT_SERVE_URL) - status = requests.get(connection + '/api/tags').status_code + connection = args.get("ollama_serve_url", OllamaHandler.DEFAULT_SERVE_URL) + status = requests.get(connection + "/api/tags").status_code if status != 200: - raise Exception(f"Ollama service is not working (status `{status}`). Please double check it is running and try again.") # noqa + raise Exception( + f"Ollama service is not working (status `{status}`). Please double check it is running and try again." + ) # noqa def create(self, target: str, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None: - """ Pull LLM artifacts with Ollama API. """ + """Pull LLM artifacts with Ollama API.""" # arg setter - args = args['using'] - args['target'] = target - connection = args.get('ollama_serve_url', OllamaHandler.DEFAULT_SERVE_URL) + args = args["using"] + args["target"] = target + connection = args.get("ollama_serve_url", OllamaHandler.DEFAULT_SERVE_URL) def _model_check(): - """ Checks model has been pulled and that it works correctly. """ + """Checks model has been pulled and that it works correctly.""" responses = {} - for endpoint in ['generate', 'embeddings']: + for endpoint in ["generate", "embeddings"]: try: code = requests.post( - connection + f'/api/{endpoint}', + connection + f"/api/{endpoint}", json={ - 'model': args['model_name'], - 'prompt': 'Hello.', - } + "model": args["model_name"], + "prompt": "Hello.", + }, ).status_code responses[endpoint] = code except Exception: @@ -57,19 +59,21 @@ def _model_check(): if 200 not in responses.values(): # pull model (blocking operation) and serve # TODO: point to the engine storage folder instead of default location - connection = args.get('ollama_serve_url', OllamaHandler.DEFAULT_SERVE_URL) - requests.post(connection + '/api/pull', json={'name': args['model_name']}) + connection = args.get("ollama_serve_url", OllamaHandler.DEFAULT_SERVE_URL) + requests.post(connection + "/api/pull", json={"name": args["model_name"]}) # try one last time responses = _model_check() if 200 not in responses.values(): - raise Exception(f"Ollama model `{args['model_name']}` is not working correctly. Please try pulling this model manually, check it works correctly and try again.") # noqa + raise Exception( + f"Ollama model `{args['model_name']}` is not working correctly. Please try pulling this model manually, check it works correctly and try again." + ) # noqa supported_modes = {k: True if v == 200 else False for k, v in responses.items()} # check if a mode has been provided and if it is valid runnable_modes = [mode for mode, supported in supported_modes.items() if supported] - if 'mode' in args: - if args['mode'] not in runnable_modes: + if "mode" in args: + if args["mode"] not in runnable_modes: raise Exception(f"Mode `{args['mode']}` is not supported by the model `{args['model_name']}`.") # if a mode has not been provided, check if the model supports only one mode @@ -77,11 +81,11 @@ def _model_check(): # if it supports multiple modes, set the default mode to 'generate' else: if len(runnable_modes) == 1: - args['mode'] = runnable_modes[0] + args["mode"] = runnable_modes[0] else: - args['mode'] = 'generate' + args["mode"] = "generate" - self.model_storage.json_set('args', args) + self.model_storage.json_set("args", args) def predict(self, df: pd.DataFrame, args: Optional[Dict] = None) -> pd.DataFrame: """ @@ -93,50 +97,73 @@ def predict(self, df: pd.DataFrame, args: Optional[Dict] = None) -> pd.DataFrame pd.DataFrame: The DataFrame containing row-wise text completions. """ # setup - pred_args = args.get('predict_params', {}) - args = self.model_storage.json_get('args') - model_name, target_col = args['model_name'], args['target'] - prompt_template = pred_args.get('prompt_template', - args.get('prompt_template', 'Answer the following question: {{{{text}}}}')) + pred_args = args.get("predict_params", {}) + args = self.model_storage.json_get("args") + model_name, target_col = args["model_name"], args["target"] + + # Auto-detect column if template is missing + # If user provided a specific template + user_template = pred_args.get("prompt_template", args.get("prompt_template")) + + # OR If no template and 'text' column is missing, then auto-detect + if user_template is None and "text" not in df.columns and len(df.columns) == 1: + col_name = df.columns[0] + # Create a template dynamically + prompt_template = "Answer the following question: {{{{" + col_name + "}}}}" + else: + # Fallback: Use user template OR default to 'text' (Old behavior) + prompt_template = user_template if user_template else "Answer the following question: {{{{text}}}}" # prepare prompts prompts, empty_prompt_ids = get_completed_prompts(prompt_template, df) - df['__mdb_prompt'] = prompts + df["__mdb_prompt"] = prompts # setup endpoint - endpoint = args.get('mode', 'generate') + endpoint = args.get("mode", "generate") # call llm completions = [] for i, row in df.iterrows(): if i not in empty_prompt_ids: - connection = args.get('ollama_serve_url', OllamaHandler.DEFAULT_SERVE_URL) + temperature = pred_args.get("temperature", args.get("temperature")) + + # Options dictionary + options = {} + if temperature is not None: + try: + options["temperature"] = float(temperature) + except ValueError: + pass + + # Calling API with the new options + connection = args.get("ollama_serve_url", OllamaHandler.DEFAULT_SERVE_URL) raw_output = requests.post( - connection + f'/api/{endpoint}', + connection + f"/api/{endpoint}", json={ - 'model': model_name, - 'prompt': row['__mdb_prompt'], - } + "model": model_name, + "prompt": row["__mdb_prompt"], + "options": options, # options passed here + }, ) - lines = raw_output.content.decode().split('\n') # stream of output tokens + lines = raw_output.content.decode().split("\n") # stream of output tokens values = [] for line in lines: - if line != '': + if line != "": info = json.loads(line) - if 'response' in info: - token = info['response'] + if "response" in info: + token = info["response"] values.append(token) - elif 'embedding' in info: - embedding = info['embedding'] + elif "embedding" in info: + embedding = info["embedding"] values.append(embedding) - if endpoint == 'embeddings': + if endpoint == "embeddings": completions.append(values) else: - completions.append(''.join(values)) + completions.append("".join(values)) else: - completions.append('') + completions.append("") # consolidate output data = pd.DataFrame(completions) @@ -144,28 +171,32 @@ def predict(self, df: pd.DataFrame, args: Optional[Dict] = None) -> pd.DataFrame return data def describe(self, attribute: Optional[str] = None) -> pd.DataFrame: - args = self.model_storage.json_get('args') - model_name, target_col = args['model_name'], args['target'] - prompt_template = args.get('prompt_template', 'Answer the following question: {{{{text}}}}') + args = self.model_storage.json_get("args") + model_name, target_col = args["model_name"], args["target"] + prompt_template = args.get("prompt_template", "Answer the following question: {{{{text}}}}") if attribute == "features": - return pd.DataFrame([[target_col, prompt_template]], columns=['target_column', 'mindsdb_prompt_template']) + return pd.DataFrame([[target_col, prompt_template]], columns=["target_column", "mindsdb_prompt_template"]) # get model info else: - connection = args.get('ollama_serve_url', OllamaHandler.DEFAULT_SERVE_URL) - model_info = requests.post(connection + '/api/show', json={'name': model_name}).json() - return pd.DataFrame([[ - model_name, - model_info.get('license', 'N/A'), - model_info.get('modelfile', 'N/A'), - model_info.get('parameters', 'N/A'), - model_info.get('template', 'N/A'), - ]], + connection = args.get("ollama_serve_url", OllamaHandler.DEFAULT_SERVE_URL) + model_info = requests.post(connection + "/api/show", json={"name": model_name}).json() + return pd.DataFrame( + [ + [ + model_name, + model_info.get("license", "N/A"), + model_info.get("modelfile", "N/A"), + model_info.get("parameters", "N/A"), + model_info.get("template", "N/A"), + ] + ], columns=[ - 'model_type', - 'license', - 'modelfile', - 'parameters', - 'ollama_base_template', - ]) + "model_type", + "license", + "modelfile", + "parameters", + "ollama_base_template", + ], + ) diff --git a/mindsdb/integrations/handlers/ollama_handler/tests/test_ollama_handler.py b/mindsdb/integrations/handlers/ollama_handler/tests/test_ollama_handler.py new file mode 100644 index 00000000000..b06caaae4e6 --- /dev/null +++ b/mindsdb/integrations/handlers/ollama_handler/tests/test_ollama_handler.py @@ -0,0 +1,45 @@ +import unittest +from unittest.mock import patch, Mock +import pandas as pd +from mindsdb.integrations.handlers.ollama_handler.ollama_handler import OllamaHandler + + +class TestOllamaHandler(unittest.TestCase): + def setUp(self): + # Mock the storage to return valid model configuration + mock_storage = Mock() + mock_storage.json_get.return_value = { + "model_name": "tinyllama", + "target": "response", + "ollama_serve_url": "http://localhost:11434", + } + + # Initialize handler with mocked storage + self.handler = OllamaHandler(name="test_ollama", model_storage=mock_storage, engine_storage={}) + + @patch("mindsdb.integrations.handlers.ollama_handler.ollama_handler.requests.post") + def test_temperature_passing(self, mock_post): + """ + Test that the temperature parameter is correctly extracted from args + and passed to the Ollama API options. + """ + # Setup mock response + mock_response = Mock() + mock_response.content = b'{"response": "Test response"}' + mock_post.return_value = mock_response + + # Create input dataframe + df = pd.DataFrame({"text": ["Hello"]}) + + # Execute prediction with temperature argument + self.handler.predict(df, args={"predict_params": {"temperature": 0.5}}) + + # Verify API call payload + call_args = mock_post.call_args[1]["json"] + + self.assertIn("options", call_args) + self.assertEqual(call_args["options"]["temperature"], 0.5) + + +if __name__ == "__main__": + unittest.main() diff --git a/mindsdb/integrations/handlers/openai_handler/__init__.py b/mindsdb/integrations/handlers/openai_handler/__init__.py index de3d33563f3..fc37a6aa911 100644 --- a/mindsdb/integrations/handlers/openai_handler/__init__.py +++ b/mindsdb/integrations/handlers/openai_handler/__init__.py @@ -1,22 +1,34 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE +from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL, HANDLER_TYPE from .__about__ import __version__ as version, __description__ as description from .creation_args import creation_args from .model_using_args import model_using_args + try: from .openai_handler import OpenAIHandler as Handler + import_error = None except Exception as e: Handler = None import_error = e -title = 'OpenAI' -name = 'openai' +title = "OpenAI" +name = "openai" type = HANDLER_TYPE.ML -icon_path = 'icon.svg' +icon_path = "icon.svg" permanent = False +support_level = HANDLER_SUPPORT_LEVEL.MINDSDB __all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path', 'creation_args', 'model_using_args' + "Handler", + "version", + "name", + "type", + "title", + "description", + "import_error", + "icon_path", + "creation_args", + "model_using_args", + "support_level", ] diff --git a/mindsdb/integrations/handlers/openbb_handler/README.md b/mindsdb/integrations/handlers/openbb_handler/README.md deleted file mode 100644 index d9a7c397c4f..00000000000 --- a/mindsdb/integrations/handlers/openbb_handler/README.md +++ /dev/null @@ -1,113 +0,0 @@ -# OpenBB Handler - -> The OpenBB platform (where this is built on) is not yet official released, so this documentation will need to be updated over the next couple of weeks. - -[OpenBB](https://openbb.co) is a leading open source investment research software platform that provides access to high-quality financial market data. Its mission is to make investment research effective, powerful, and accessible to everyone. - -This handler integrates with the [OpenBB Platform](https://my.openbb.co/app/sdk), and provides access to 500+ financial data endpoints from over 90+ different data providers. The open source platform can be found [on GitHub](https://github.com/OpenBB-finance/OpenBBTerminal). - -The data available can be found [here](https://docs.openbb.co/sdk/reference). - -## Pre-requisites - -In order to be able to access OpenBB, you will need to sign-up to [OpenBB Hub](https://my.openbb.co). - -Once you are in, you'll want to go to the [Platform API Keys](https://my.openbb.co/app/sdk/api-keys) which allows you to set the API keys from the data providers you are interested in getting data from. - -After that you'll need to get your Personal Access Token (PAT) in [here](https://my.openbb.co/app/sdk/pat). The PAT is what permits OpenBB to know what API keys to use once you hit endpoint from the data providers of interest. - -## Connect to the OpenBB Platform - -We start by creating a database to connect to the OpenBB Platform. - -```sql -CREATE DATABASE obb_db -WITH ENGINE = "openbb", -PARAMETERS = { - "PAT": "YOUR PERSONAL ACCESS TOKEN FROM OPENBB HUB" - }; -``` - -## Select Data - -In order to support all data available through OpenBB's ecosystem ([reference](https://docs.openbb.co/sdk/reference)) we have a single DB called `obb_db.openbb_fetcher` and then **ALWAYS** need to provide the `cmd` argument which selects the data endpoint the user is interested in. - -Then, based on the request sent by the user, you may need to specify other parameters, which is done through `AND` operator. - -These additional parameters need to have the exact same naming as in [OpenBB documentation](https://docs.openbb.co/sdk/reference). In addition, they need to be surrounded by double quotes AND internally they need to be represented by the same convention that the OpenBB API would be expecting. - -## Examples - -### **Example 1**: obb.crypto.price.historical - -Reference: [https://docs.openbb.co/platform/reference/crypto/price/historical](https://docs.openbb.co/platform/reference/crypto/price/historical) - ->>Note: Make Sure to add API Key corresponding to provider to [OpenBB Dashboard](https://my.openbb.co/app/platform/credentials) - -MindsDB will provide an abstraction in full SQL for commands so that they look like virtual tables: - -```sql -SELECT * -FROM obb_db.crypto_price_historical -WHERE symbol = 'BCTUSD' - AND start_date = '2023-09-01' - AND provider='fmp'; -``` - -You can also call the command like this: - -```sql -SELECT * -FROM obb_db.openbb_fetcher -WHERE cmd = "obb.crypto.price.historical" - AND symbol = "'BCTUSD'" - AND start_date = "'2023-09-01'" - AND provider = "'fmp'"; -``` - -is converted into: - -```python -obb.crypto.price.historical(symbol = 'BCTUSD', start_date = '2023-09-01', provider = 'fmp') -``` -OpenBB Results - - -### **Example 2**: obb.economy.cpi - -Reference: [https://docs.openbb.co/sdk/reference/economy/cpi](https://docs.openbb.co/sdk/reference/economy/cpi) - -```sql -SELECT * -FROM obb_db.openbb_fetcher -WHERE cmd = "obb.economy.cpi" - AND country = "'india,israel'"; -``` - -is converted into: - -```python -obb.economy.cpi(country = 'india,israel') -``` - -OpenBB Results - -## Enhance data access through OpenBB extensions - -When install the OpenBB platform, you'll get by default access to core data that OpenBB official supports. However, there's a broad range of additional data that can be utilized - for that all you need to do is to install OpenBB extensions and the python library will automatically recognize where they belong. - -Thus, you can go to the [requirements.txt](requirements.txt) file and add the extension of your choice. - -E.g. if you want to access data from yfinance, all you need to do is add `openbb-yfinance` to that file, and now you can re-run the _Example 1_ with: - -```sql - -SELECT * -FROM obb_db.equity_price_quote -where - symbol = 'RELIANCE.NS' - AND provider = "yfinance"; - -``` - -which will allow you to access financial data from stocks from India (which wasn't possible before). diff --git a/mindsdb/integrations/handlers/openbb_handler/__about__.py b/mindsdb/integrations/handlers/openbb_handler/__about__.py deleted file mode 100644 index 961709db282..00000000000 --- a/mindsdb/integrations/handlers/openbb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB OpenBB handler" -__package_name__ = "mindsdb_openbb_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for the OpenBB Platform" -__author__ = "Didier Rodrigues Lopes" -__github__ = "https://github.com/openbb-finance/openbbterminal" -__pypi__ = "https://pypi.org/project/openbb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - OpenBB" diff --git a/mindsdb/integrations/handlers/openbb_handler/__init__.py b/mindsdb/integrations/handlers/openbb_handler/__init__.py deleted file mode 100644 index b4e52f00e90..00000000000 --- a/mindsdb/integrations/handlers/openbb_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .openbb_handler import OpenBBHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "OpenBB" -name = "openbb" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/openbb_handler/icon.svg b/mindsdb/integrations/handlers/openbb_handler/icon.svg deleted file mode 100644 index cd28a91b381..00000000000 --- a/mindsdb/integrations/handlers/openbb_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/openbb_handler/openbb_handler.py b/mindsdb/integrations/handlers/openbb_handler/openbb_handler.py deleted file mode 100644 index 284dbbc1910..00000000000 --- a/mindsdb/integrations/handlers/openbb_handler/openbb_handler.py +++ /dev/null @@ -1,119 +0,0 @@ -from functools import reduce -from openbb_core.app.static.app_factory import create_app - -from mindsdb.integrations.handlers.openbb_handler.openbb_tables import create_table_class -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse -from mindsdb.utilities import log -from mindsdb.integrations.handlers.openbb_handler.openbb_tables import OpenBBtable - -logger = log.getLogger(__name__) - - -class OpenBBHandler(APIHandler): - """A class for handling connections and interactions with the OpenBB Platform. - - Attributes: - PAT (str): OpenBB's personal access token. Sign up here: https://my.openbb.co - is_connected (bool): Whether or not the user is connected to their OpenBB account. - - """ - - def __init__(self, name: str = None, **kwargs): - """Registers all API tables and prepares the handler for an API connection. - - Args: - name: (str): The handler name to use - """ - super().__init__(name) - self.PAT = None - - args = kwargs.get("connection_data", {}) - if "PAT" in args: - self.PAT = args["PAT"] - - self.is_connected = False - - # Initialize OpenBB - # pylint: disable=import-outside-toplevel - from openbb.package.__extensions__ import Extensions - self.obb = create_app(Extensions) - - for cmd in list(self.obb.coverage.command_model.keys()): - - openbb_params = self.obb.coverage.command_model[cmd]["openbb"]["QueryParams"] - openbb_data = self.obb.coverage.command_model[cmd]["openbb"]["Data"] - - # Creates the default data retrieval function for the given command - # e.g. obb.equity.price.historical, obb.equity.fa.income - # Note: Even though openbb_params just contains the standard fields that are - # common across vendors users are able to select any of the fields from the vendor - # as well. However, some of them might have no effect on the data if the vendor - # doesn't support it. Regardless, the endpoint won't crash. - table_class = create_table_class( - params_metadata=openbb_params, - response_metadata=openbb_data, - obb_function=reduce(getattr, cmd[1:].split('.'), self.obb), - func_docs=f"https://docs.openbb.co/platform/reference/{cmd[1:].replace('.', '/')}" - ) - self._register_table(cmd.replace('.', '_')[1:], table_class(self)) - - # Creates the data retrieval function for each provider - # e.g. obb.equity.price.historical_polygon, obb.equity.price.historical_intrinio - for provider in list(self.obb.coverage.command_model[cmd].keys()): - - # Skip the openbb provider since we already created it and it will look like obb.equity.price.historical - if provider == "openbb": - continue - - provider_extra_params = self.obb.coverage.command_model[cmd][provider]["QueryParams"] - combined_params = provider_extra_params.copy() # create a copy to avoid modifying the original - combined_params["fields"] = {**openbb_params["fields"], **provider_extra_params["fields"]} # merge the fields - - provider_extra_data = self.obb.coverage.command_model[cmd][provider]["Data"] - combined_data = provider_extra_data.copy() # create a copy to avoid modifying the original - combined_data["fields"] = {**openbb_data["fields"], **provider_extra_data["fields"]} # merge the fields - - table_class = create_table_class( - params_metadata=combined_params, - response_metadata=combined_data, - obb_function=reduce(getattr, cmd[1:].split('.'), self.obb), - func_docs=f"https://docs.openbb.co/platform/reference/{cmd[1:].replace('.', '/')}", - provider=provider - ) - self._register_table(f"{cmd.replace('.', '_')[1:]}_{provider}", table_class(self)) - - obb_table = OpenBBtable(self) - self._register_table("openbb_fetcher", obb_table) - - def connect(self) -> bool: - """Connects with OpenBB account through personal access token (PAT). - - Returns none. - """ - self.is_connected = False - self.obb.account.login(pat=self.PAT) - - # Check if PAT utilized is valid - # if obb.user.profile.active: - self.is_connected = True - return True - - def check_connection(self) -> StatusResponse: - """Checks connection to OpenBB accounting by checking the validity of the PAT. - - Returns StatusResponse indicating whether or not the handler is connected. - """ - - response = StatusResponse(False) - - try: - if self.connect(): - response.success = True - - except Exception as e: - logger.error(f"Error connecting to OpenBB Platform: {e}!") - response.error_message = e - - self.is_connected = response.success - return response diff --git a/mindsdb/integrations/handlers/openbb_handler/openbb_tables.py b/mindsdb/integrations/handlers/openbb_handler/openbb_tables.py deleted file mode 100644 index 69cf5631ecc..00000000000 --- a/mindsdb/integrations/handlers/openbb_handler/openbb_tables.py +++ /dev/null @@ -1,337 +0,0 @@ -from mindsdb.api.executor.utilities.sql import query_df -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb_sql_parser import ast -from mindsdb.integrations.utilities.date_utils import parse_local_date -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions, project_dataframe, filter_dataframe -from mindsdb.integrations.utilities.sql_utils import sort_dataframe -from mindsdb.utilities import log - -from typing import Dict, List, Union -from pydantic import ValidationError - -import pandas as pd - -logger = log.getLogger(__name__) - - -class OpenBBtable(APITable): - def _get_params_from_conditions(self, conditions: List) -> Dict: - """Gets aggregate trade data API params from SQL WHERE conditions. - - Returns params to use for Binance API call to klines. - - Args: - conditions (List): List of individual SQL WHERE conditions. - """ - params: dict = {} - # generic interpreter for conditions - # since these are all equality conditions due to OpenBB Platform's API - # then we can just use the first arg as the key and the second as the value - for op, arg1, arg2 in conditions: - if op != "=": - raise NotImplementedError - params[arg1] = arg2 - - return params - - def _process_cols_names(self, cols: list) -> list: - new_cols = [] - for element in cols: - # If the element is a tuple, we want to merge the elements together - if isinstance(element, tuple): - # If there's more than one element we want to merge them together - if len(element) > 1: - # Prevents the case where there's a multi column index and the index is a date - # in that instance we will have ('date', '') and this avoids having a column named 'date_' - new_element = "_".join(map(str, element)).rstrip("_") - new_cols.append(new_element) - else: - new_cols.append(element[0]) - else: - new_cols.append(element) - return new_cols - - def select(self, query: ast.Select) -> pd.DataFrame: - """Selects data from the OpenBB Platform and returns it as a pandas DataFrame. - - Returns dataframe representing the OpenBB data. - - Args: - query (ast.Select): Given SQL SELECT query - """ - conditions = extract_comparison_conditions(query.where) - params = self._get_params_from_conditions(conditions) - - try: - if params is None: - logger.error("At least cmd needs to be added!") - raise Exception("At least cmd needs to be added!") - - # Get the OpenBB command to get the data from - cmd = params.pop("cmd") - - # Ensure that the cmd provided is a valid OpenBB command - available_cmds = [f"obb{cmd}" for cmd in list(self.handler.obb.coverage.commands.keys())] - if cmd not in available_cmds: - logger.error(f"The command provided is not supported by OpenBB! Choose one of the following: {', '.join(available_cmds)}") - raise Exception(f"The command provided is not supported by OpenBB! Choose one of the following: {', '.join(available_cmds)}") - - args = "" - # If there are parameters create arguments as a string - if params: - for arg, val in params.items(): - args += f"{arg}={val}," - - # Remove the additional ',' added at the end - if args: - args = args[:-1] - - # Recreate the OpenBB command with the arguments - openbb_cmd = f"self.handler.{cmd}({args})" - - # Execute the OpenBB command and return the OBBject - openbb_object = eval(openbb_cmd) - - # Transform the OBBject into a pandas DataFrame - data = openbb_object.to_df() - - # Check if index is a datetime, if it is we want that as a column - if isinstance(data.index, pd.DatetimeIndex): - data.reset_index(inplace=True) - - # Process column names - data.columns = self._process_cols_names(data.columns) - - except Exception as e: - logger.error(f"Error accessing data from OpenBB: {e}!") - raise Exception(f"Error accessing data from OpenBB: {e}!") - - return data - - -def create_table_class( - params_metadata, - response_metadata, - obb_function, - func_docs="", - provider=None -): - """Creates a table class for the given OpenBB Platform function.""" - mandatory_fields = [key for key in params_metadata['fields'].keys() if params_metadata['fields'][key].is_required() is True] - response_columns = list(response_metadata['fields'].keys()) - - class AnyTable(APITable): - def _get_params_from_conditions(self, conditions: List) -> Dict: - """Gets aggregate trade data API params from SQL WHERE conditions. - - Returns params to use for Binance API call to klines. - - Args: - conditions (List): List of individual SQL WHERE conditions. - """ - params: dict = {} - # generic interpreter for conditions - # since these are all equality conditions due to OpenBB Platform's API - # then we can just use the first arg as the key and the second as the value - for op, arg1, arg2 in conditions: - if op == "=": - params[arg1] = arg2 - - return params - - def select(self, query: ast.Select) -> pd.DataFrame: - """Selects data from the OpenBB Platform and returns it as a pandas DataFrame. - - Returns dataframe representing the OpenBB data. - - Args: - query (ast.Select): Given SQL SELECT query - """ - conditions = extract_comparison_conditions(query.where) - arg_params = self._get_params_from_conditions(conditions=conditions) - - params = {} - if provider is not None: - params['provider'] = provider - - filters = [] - mandatory_args_set = {key: False for key in mandatory_fields} - columns_to_add = {} - strict_filter = arg_params.get('strict_filter', False) - - for op, arg1, arg2 in conditions: - if op == 'or': - raise NotImplementedError('OR is not supported') - - if arg1 in mandatory_fields: - mandatory_args_set[arg1] = True - - if ('start_' + arg1 in params_metadata['fields'] and arg1 in response_columns and arg2 is not None): - - if response_metadata['fields'][arg1].annotation == 'datetime': - date = parse_local_date(arg2) - interval = arg_params.get('interval', '1d') - - if op == '>': - params['start_' + arg1] = date.strftime('%Y-%m-%d') - elif op == '<': - params['end_' + arg1] = date.strftime('%Y-%m-%d') - elif op == '>=': - date = date - pd.Timedelta(interval) - params['start_' + arg1] = date.strftime('%Y-%m-%d') - elif op == '<=': - date = date + pd.Timedelta(interval) - params['end_' + arg1] = date.strftime('%Y-%m-%d') - elif op == '=': - date = date - pd.Timedelta(interval) - params['start_' + arg1] = date.strftime('%Y-%m-%d') - date = date + pd.Timedelta(interval) - params['end_' + arg1] = date.strftime('%Y-%m-%d') - - elif arg1 in params_metadata['fields'] or not strict_filter: - if op == '=': - params[arg1] = arg2 - columns_to_add[arg1] = arg2 - - filters.append([op, arg1, arg2]) - - if not all(mandatory_args_set.values()): - missing_args = ", ".join([k for k, v in mandatory_args_set.items() if v is False]) - text = f"You must specify the following arguments in the WHERE statement: {missing_args}\n" - - # Create docstring for the current function - text += "\nDocstring:" - for param in params_metadata['fields']: - field = params_metadata['fields'][param] - if getattr(field.annotation, '__origin__', None) is Union: - annotation = f"Union[{', '.join(arg.__name__ for arg in field.annotation.__args__)}]" - else: - annotation = field.annotation.__name__ - text += f"\n * {param}{'' if field.is_required() else ' (optional)'}: {annotation}\n{field.description}" - - text += f"\n\nFor more information check {func_docs}" - - raise NotImplementedError(text) - - try: - # Handle limit keyword correctly since it can't be parsed as a WHERE arg (i.e. WHERE limit = 50) - if query.limit is not None and 'limit' in params_metadata['fields']: - params['limit'] = query.limit.value - obbject = obb_function(**params) - - # Extract data in dataframe format - result = obbject.to_df() - - if result is None: - raise Exception(f"For more information check {func_docs}.") - - # Check if index is a datetime, if it is we want that as a column - if isinstance(result.index, pd.DatetimeIndex): - result.reset_index(inplace=True) - - if query.order_by: - result = sort_dataframe(result, query.order_by) - - if query.limit is not None: - result = result.head(query.limit.value) - - if result is None: - raise Exception(f"For more information check {func_docs}.") - - for key in columns_to_add: - result[key] = params[key] - - # filter targets - result = filter_dataframe(result, filters) - - if result is None: - raise Exception(f"For more information check {func_docs}.") - - columns = self.get_columns() - - columns += [col for col in result.columns if col not in columns] - - for full_target in query.targets: - if isinstance(full_target, ast.Star): - continue - if isinstance(full_target, ast.Identifier): - target = full_target.parts[-1].lower() - elif isinstance(full_target, ast.Function): - target = full_target.args[0].parts[-1].lower() - else: - # Could be a window function or other operation we can't handle. Defer to DuckDB. - return query_df(result, query) - if target not in columns: - raise ValueError(f"Unknown column '{target}' in 'field list'") - - # project targets - try: - result = project_dataframe(result, query.targets, columns) - except NotImplementedError: - # Target contains a function that we need DuckDB to resolve. - return query_df(result, query) - return result - - except AttributeError as e: - logger.info(f'Encountered error while executing OpenBB select: {str(e)}') - - # Create docstring for the current function - text = "Docstring:" - for param in params_metadata['fields']: - field = params_metadata['fields'][param] - if getattr(field.annotation, '__origin__', None) is Union: - annotation = f"Union[{', '.join(arg.__name__ for arg in field.annotation.__args__)}]" - else: - annotation = field.annotation.__name__ - text += f"\n * {param}{'' if field.is_required() else ' (optional)'}: {annotation}\n{field.description}" - - text += f"\n\nFor more information check {func_docs}" - - raise Exception(f"{str(e)}\n\n{text}.") from e - - except ValidationError as e: - logger.info(f'Encountered error while executing OpenBB select: {str(e)}') - - # Create docstring for the current function - text = "Docstring:" - for param in params_metadata['fields']: - field = params_metadata['fields'][param] - if getattr(field.annotation, '__origin__', None) is Union: - annotation = f"Union[{', '.join(arg.__name__ for arg in field.annotation.__args__)}]" - else: - annotation = field.annotation.__name__ - text += f"\n * {param}{'' if field.is_required() else ' (optional)'}: {annotation}\n{field.description}" - - text += f"\n\nFor more information check {func_docs}" - - raise Exception(f"{str(e)}\n\n{text}.") from e - - except Exception as e: - logger.info(f'Encountered error while executing OpenBB select: {str(e)}') - - # TODO: This one doesn't work because it's taken care of from MindsDB side - if "Table not found" in str(e): - raise Exception(f"{str(e)}\n\nCheck if the method exists here: {func_docs}.\n\n - If it doesn't you may need to look for the parent module to check whether there's a typo in the naming.\n- If it does you may need to install a new extension to the OpenBB Platform, and you can see what is available at https://my.openbb.co/app/platform/extensions.") from e - - if "Missing credential" in str(e): - raise Exception(f"{str(e)}\n\nGo to https://my.openbb.co/app/platform/api-keys to set this API key, for free.") from e - - # Catch all other errors - # Create docstring for the current function - text = "Docstring:" - for param in params_metadata['fields']: - field = params_metadata['fields'][param] - if getattr(field.annotation, '__origin__', None) is Union: - annotation = f"Union[{', '.join(arg.__name__ for arg in field.annotation.__args__)}]" - else: - annotation = field.annotation.__name__ - text += f"\n * {param}{'' if field.is_required() else ' (optional)'}: {annotation}\n{field.description}" - - text += f"\n\nFor more information check {func_docs}" - - raise Exception(f"{str(e)}\n\n{text}.") from e - - def get_columns(self): - return response_columns - - return AnyTable diff --git a/mindsdb/integrations/handlers/openbb_handler/requirements.txt b/mindsdb/integrations/handlers/openbb_handler/requirements.txt deleted file mode 100644 index e4e9ee98dd0..00000000000 --- a/mindsdb/integrations/handlers/openbb_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -openbb==4.3.1 -openbb-core==1.3.1 diff --git a/mindsdb/integrations/handlers/opengauss_handler/__about__.py b/mindsdb/integrations/handlers/opengauss_handler/__about__.py deleted file mode 100644 index 8f69a5e7dd8..00000000000 --- a/mindsdb/integrations/handlers/opengauss_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB openGauss handler' -__package_name__ = 'mindsdb_opengauss_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for openGauss" -__author__ = 'Nelson-He' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/opengauss_handler/__init__.py b/mindsdb/integrations/handlers/opengauss_handler/__init__.py deleted file mode 100644 index 2e77833ee66..00000000000 --- a/mindsdb/integrations/handlers/opengauss_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .connection_args import connection_args, connection_args_example -try: - from .opengauss_handler import OpenGaussHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = 'openGauss' -name = 'opengauss' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/opengauss_handler/connection_args.py b/mindsdb/integrations/handlers/opengauss_handler/connection_args.py deleted file mode 100644 index f9a6ee03770..00000000000 --- a/mindsdb/integrations/handlers/opengauss_handler/connection_args.py +++ /dev/null @@ -1,36 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the openGauss server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the openGauss server.', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the openGauss server.' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the openGauss server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the openGauss server. Must be an integer.' - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=5432, - user='mindsdb', - password='password', - database='database' -) diff --git a/mindsdb/integrations/handlers/opengauss_handler/icon.svg b/mindsdb/integrations/handlers/opengauss_handler/icon.svg deleted file mode 100644 index 438afc264cd..00000000000 --- a/mindsdb/integrations/handlers/opengauss_handler/icon.svg +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/opengauss_handler/opengauss_handler.py b/mindsdb/integrations/handlers/opengauss_handler/opengauss_handler.py deleted file mode 100644 index 069a86d6aba..00000000000 --- a/mindsdb/integrations/handlers/opengauss_handler/opengauss_handler.py +++ /dev/null @@ -1,11 +0,0 @@ -from mindsdb.integrations.handlers.postgres_handler import Handler as PostgresHandler - - -class OpenGaussHandler(PostgresHandler): - """ - This handler handles connection and execution of the openGauss statements. - """ - name = 'opengauss' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/opengauss_handler/tests/__init__.py b/mindsdb/integrations/handlers/opengauss_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/opengauss_handler/tests/test_opengauss_handler.py b/mindsdb/integrations/handlers/opengauss_handler/tests/test_opengauss_handler.py deleted file mode 100644 index 38bccf83f67..00000000000 --- a/mindsdb/integrations/handlers/opengauss_handler/tests/test_opengauss_handler.py +++ /dev/null @@ -1,44 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.opengauss_handler.opengauss_handler import OpenGaussHandler - - -class OpenGaussHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": "localhost", - "port": "5432", - "user": "mindsdb", - "password": "mindsdb", - "database": "test", - "ssl": False - } - cls.handler = OpenGaussHandler('test_opengauss_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_check_connection(self): - assert self.handler.check_connection() - - def test_2_native_query_show_dbs(self): - dbs = self.handler.native_query("SHOW DATABASES;") - assert isinstance(dbs, list) - - def test_3_get_tables(self): - tbls = self.handler.get_tables() - assert isinstance(tbls, list) - - def test_5_create_table(self): - try: - self.handler.native_query("CREATE TABLE test_opengauss (test_col INTEGER)") - except Exception: - pass - - def test_6_describe_table(self): - described = self.handler.get_columns("dt_test") - assert isinstance(described, list) - - def test_7_select_query(self): - query = "SELECT * FROM dt_test WHERE 'id'='a'" - self.handler.native_query(query) diff --git a/mindsdb/integrations/handlers/openstreetmap_handler/README.md b/mindsdb/integrations/handlers/openstreetmap_handler/README.md deleted file mode 100644 index a995d1b0630..00000000000 --- a/mindsdb/integrations/handlers/openstreetmap_handler/README.md +++ /dev/null @@ -1,119 +0,0 @@ -# OpenStreetMap Handler - -OpenStreetMap handler for MindsDB provides interfaces to connect to OpenStreetMap via APIs and pull map data into MindsDB. - ---- - -## Table of Contents - -- [OpenStreetMap Handler](#openstreetmap-handler) - - [Table of Contents](#table-of-contents) - - [About OpenStreetMap](#about-openstreetmap) - - [OpenStreetMap Handler Implementation](#openstreetmap-handler-implementation) - - [OpenStreetMap Handler Initialization](#openstreetmap-handler-initialization) - - [Implemented Features](#implemented-features) - - [TODO](#todo) - - [Example Usage](#example-usage) - ---- - -## About OpenStreetMap - -OpenStreetMap is a map of the world, created by people like you and free to use under an open license. -
-https://www.openstreetmap.org/about - -## OpenStreetMap Handler Implementation - -This handler was implemented using [python_overpy](https://github.com/DinoTools/python-overpy), the Overpass API wrapper for Python. - -## Implemented Features - -- [x] OpenStreetMap Nodes Table for a given Area - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection -- [x] OpenStreetMap Ways Table for a given Area - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection -- [x] OpenStreetMap Relations Table for a given Area - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - -## TODO - -- [ ] Support for more OpenStreetMap tables -- [ ] Support for more OpenStreetMap queries -- [ ] Support INSERT, UPDATE and DELETE for Nodes, Ways and Node table -- [ ] Support INSERT, UPDATE and DELETE for Ways table -- [ ] Support INSERT, UPDATE and DELETE for Relations table - -## Example Usage - -~~~~sql -CREATE DATABASE openstreetmap_datasource -WITH -ENGINE='openstreetmap', -parameters={}; -~~~~ - -Use the established connection to query your database: - -~~~~ - -```sql -SELECT * FROM openstreetmap_datasource.nodes WHERE id = 1; -``` - -```sql -SELECT * FROM openstreetmap_datasource.nodes WHERE area = 'your_area_value'; -``` - -```sql -SELECT * FROM openstreetmap_datasource.nodes ORDER BY id DESC LIMIT 10; -``` - -```sql -SELECT id, latitude, longitude FROM openstreetmap_datasource.nodes LIMIT 10; -``` - -```sql -SELECT * FROM openstreetmap_datasource.ways LIMIT 10; -``` - -```sql -SELECT * FROM openstreetmap_datasource.ways WHERE id = 1; -``` - -```sql -SELECT * FROM openstreetmap_datasource.ways ORDER BY id DESC LIMIT 10; -``` - -```sql -SELECT id, nodes, tags FROM openstreetmap_datasource.ways LIMIT 10; -``` - -```sql -SELECT * FROM openstreetmap_datasource.relations LIMIT 10; -``` - -```sql -SELECT * FROM openstreetmap_datasource.relations WHERE id = 1; -``` - -```sql -SELECT * FROM openstreetmap_datasource.relations ORDER BY id DESC LIMIT 10; -``` - -```sql -SELECT id, members, tags FROM openstreetmap_datasource.relations LIMIT 10; -``` - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/openstreetmap_handler/__about__.py b/mindsdb/integrations/handlers/openstreetmap_handler/__about__.py deleted file mode 100644 index 14dc16337d3..00000000000 --- a/mindsdb/integrations/handlers/openstreetmap_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB OpenStreetMap handler" -__package_name__ = "mindsdb_openstreetmap_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for OpenStreetMap" -__author__ = "Saurabh Kumar Singh" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023- mindsdb" diff --git a/mindsdb/integrations/handlers/openstreetmap_handler/__init__.py b/mindsdb/integrations/handlers/openstreetmap_handler/__init__.py deleted file mode 100644 index ed8327a22dd..00000000000 --- a/mindsdb/integrations/handlers/openstreetmap_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .openstreetmap_handler import OpenStreetMapHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "OpenStreetMap" -name = "openstreetmap" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/openstreetmap_handler/icon.svg b/mindsdb/integrations/handlers/openstreetmap_handler/icon.svg deleted file mode 100644 index 4ed0c7687e9..00000000000 --- a/mindsdb/integrations/handlers/openstreetmap_handler/icon.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/openstreetmap_handler/openstreetmap_handler.py b/mindsdb/integrations/handlers/openstreetmap_handler/openstreetmap_handler.py deleted file mode 100644 index b5b3cd8ac2c..00000000000 --- a/mindsdb/integrations/handlers/openstreetmap_handler/openstreetmap_handler.py +++ /dev/null @@ -1,90 +0,0 @@ -import overpy - -from mindsdb.integrations.handlers.openstreetmap_handler.openstreetmap_tables import (OpenStreetMapNodeTable, - OpenStreetMapWayTable, OpenStreetMapRelationTable) -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, -) -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - -logger = log.getLogger(__name__) - - -class OpenStreetMapHandler(APIHandler): - """The OpenStreetMap handler implementation.""" - - def __init__(self, name: str, **kwargs): - """Registers all API tables and prepares the handler for an API connection. - - Args: - name: (str): The handler name to use - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - nodes_data = OpenStreetMapNodeTable(self) - self._register_table("nodes", nodes_data) - - ways_data = OpenStreetMapWayTable(self) - self._register_table("ways", ways_data) - - relations_data = OpenStreetMapRelationTable(self) - self._register_table("relations", relations_data) - - def connect(self) -> StatusResponse: - """Set up the connection required by the handler. - - Returns: - StatusResponse: connection object - """ - if self.is_connected is True: - return self.connection - - api_session = overpy.Overpass() - - self.connection = api_session - - self.is_connected = True - - return self.connection - - def check_connection(self) -> StatusResponse: - """Check connection to the handler. - - Returns: - HandlerStatusResponse - """ - response = StatusResponse(False) - - try: - api_session = self.connect() - if api_session is not None: - response.success = True - except Exception as e: - logger.error('Error connecting to OpenStreetMap!') - response.error_message = str(e) - - self.is_connected = response.success - - return response - - def native_query(self, query: str) -> Response: - """Execute a native query on the handler. - - Args: - query (str): The query to execute. - - Returns: - Response: The response from the query. - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/openstreetmap_handler/openstreetmap_tables.py b/mindsdb/integrations/handlers/openstreetmap_handler/openstreetmap_tables.py deleted file mode 100644 index 73b71646f61..00000000000 --- a/mindsdb/integrations/handlers/openstreetmap_handler/openstreetmap_tables.py +++ /dev/null @@ -1,243 +0,0 @@ -import pandas as pd -from typing import Text, List, Dict - -from mindsdb_sql_parser import ast -from mindsdb.integrations.libs.api_handler import APITable - -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions - -from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor - - -class OpenStreetMapNodeTable(APITable): - """The OpenStreetMap Nodes Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the OpenStreetMap API endpoint. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - OpenStreetMap data matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - where_conditions = extract_comparison_conditions(query.where) - - if query.limit: - result_limit = query.limit.value - else: - result_limit = 20 - - nodes_df = pd.json_normalize(self.get_nodes(where_conditions=where_conditions, limit=result_limit)) - - selected_columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = nodes_df.columns - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - order_by_conditions = {} - if query.order_by and len(query.order_by) > 0: - order_by_conditions["columns"] = [] - order_by_conditions["ascending"] = [] - - for an_order in query.order_by: - if an_order.field.parts[0] == 'nodes': - if an_order.field.parts[1] in nodes_df.columns: - order_by_conditions["columns"].append(an_order.field.parts[1]) - - if an_order.direction == "ASC": - order_by_conditions["ascending"].append(True) - else: - order_by_conditions["ascending"].append(False) - else: - raise ValueError( - f"Order by unknown column {an_order.field.parts[1]}" - ) - - select_statement_executor = SELECTQueryExecutor( - nodes_df, - selected_columns, - [], - order_by_conditions - ) - nodes_df = select_statement_executor.execute_query() - - return nodes_df - - def get_nodes(self, **kwargs) -> List[Dict]: - where_conditions = kwargs.get('where_conditions', None) - - area, tags = None, {} - min_lat, min_lon, max_lat, max_lon = None, None, None, None - if where_conditions: - for condition in where_conditions: - if condition[1] == 'area': - area = condition[2] - - elif condition[1] == 'min_lat': - min_lat = condition[2] - - elif condition[1] == 'min_lon': - min_lon = condition[2] - - elif condition[1] == 'max_lat': - max_lat = condition[2] - - elif condition[1] == 'max_lon': - max_lon = condition[2] - - else: - tags[condition[1]] = condition[2] - - result = self.execute_osm_node_query( - tags=tags, - area=area, - min_lat=min_lat, - min_lon=min_lon, - max_lat=max_lat, - max_lon=max_lon, - limit=kwargs.get('limit', None) - ) - - nodes = [] - for node in result.nodes: - node_dict = { - "id": node.id, - "lat": node.lat, - "lon": node.lon, - "tags": node.tags - } - nodes.append(node_dict) - return nodes - - def execute_osm_node_query(self, tags, area=None, min_lat=None, min_lon=None, max_lat=None, max_lon=None, limit=None): - query_template = """ - [out:json]; - {area_clause} - node{area_node_clause}{tags_clause}{bbox}; - out {limit}; - """ - - tags_clause = "" - if tags: - for tag_key, tag_value in tags.items(): - tags_clause += '["{}"="{}"]'.format(tag_key, tag_value) - - area_clause, area_node_clause = "", "" - if area: - area_clause = 'area[name="{}"]->.city;\n'.format(area) - area_node_clause = "(area.city)" - - bbox_clause = "" - if min_lat or min_lon or max_lat or max_lon: - bbox_clause = "{},{},{},{}".format(min_lat, min_lon, max_lat, max_lon) - - limit_clause = limit if limit else "" - - query = query_template.format( - area_clause=area_clause, - area_node_clause=area_node_clause, - tags_clause=tags_clause, - bbox=bbox_clause, - limit=limit_clause - ) - - api = self.handler.connect() - - result = api.query(query) - return result - - -class OpenStreetMapWayTable(APITable): - """The OpenStreetMap Ways Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - - select_statement_parser = SELECTQueryParser( - query, - 'ways', - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - ways_df = pd.json_normalize(self.get_ways(limit=result_limit)) - - select_statement_executor = SELECTQueryExecutor( - ways_df, - selected_columns, - where_conditions, - order_by_conditions - ) - ways_df = select_statement_executor.execute_query() - - return ways_df - - def get_columns(self) -> List[Text]: - return pd.json_normalize(self.get_ways(limit=1)).columns.tolist() - - def get_ways(self, **kwargs) -> List[Dict]: - - api_session = self.handler.connect() - ways = api_session.query(""" - way - ({{bbox}}); - out; - """, - # bbox=self.connection_data['bbox'] - ) - return [way.to_dict() for way in ways.ways] - - -class OpenStreetMapRelationTable(APITable): - """The OpenStreetMap Relations Table implementation""" - - def select_relations(self, query: ast.Select) -> pd.DataFrame: - - select_statement_parser = SELECTQueryParser( - query, - 'relations', - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - relations_df = pd.json_normalize(self.get_relations(limit=result_limit)) - - select_statement_executor = SELECTQueryExecutor( - relations_df, - selected_columns, - where_conditions, - order_by_conditions - ) - relations_df = select_statement_executor.execute_query() - - return relations_df - - def get_columns(self) -> List[Text]: - return pd.json_normalize(self.get_relations(limit=1)).columns.tolist() - - def get_relations(self, **kwargs) -> List[Dict]: - - api_session = self.handler.connect() - relations = api_session.query(""" - relation - ({{bbox}}); - out; - """, - # bbox=self.connection_data['bbox'] - ) - return [relation.to_dict() for relation in relations.relations] diff --git a/mindsdb/integrations/handlers/openstreetmap_handler/requirements.txt b/mindsdb/integrations/handlers/openstreetmap_handler/requirements.txt deleted file mode 100644 index 2f89be59fc7..00000000000 --- a/mindsdb/integrations/handlers/openstreetmap_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -overpy \ No newline at end of file diff --git a/mindsdb/integrations/handlers/openstreetmap_handler/tests/__init__.py b/mindsdb/integrations/handlers/openstreetmap_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/openstreetmap_handler/tests/test_openstreetmap_handler.py b/mindsdb/integrations/handlers/openstreetmap_handler/tests/test_openstreetmap_handler.py deleted file mode 100644 index b0da024d189..00000000000 --- a/mindsdb/integrations/handlers/openstreetmap_handler/tests/test_openstreetmap_handler.py +++ /dev/null @@ -1,37 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.openstreetmap_handler.openstreetmap_handler import OpenStreetMapHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class OpenStreetMapHandlerTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.handler = OpenStreetMapHandler(name='test_handler', connection_data={}) - - def test_0_connect(self): - assert self.handler.connect() - - def test_1_check_connection(self): - assert self.handler.check_connection() - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_select_query(self): - query = "SELECT * FROM openstreetmap_datasource.nodes WHERE id = 1;" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_4_native_query(self): - query = "SELECT * FROM openstreetmap_datasource.nodes WHERE area = 'New Delhi';" - response = self.handler.native_query(query) - assert response.type is RESPONSE_TYPE.ERROR - - def test_5_disconnect(self): - assert self.handler.disconnect() - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/oracle_handler/oracle_handler.py b/mindsdb/integrations/handlers/oracle_handler/oracle_handler.py index ad9c4cde578..79d4c342ff4 100644 --- a/mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +++ b/mindsdb/integrations/handlers/oracle_handler/oracle_handler.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Text +from typing import Any, Generator import oracledb import pandas as pd @@ -10,9 +10,15 @@ HandlerStatusResponse as StatusResponse, HandlerResponse as Response, RESPONSE_TYPE, + TableResponse, + OkResponse, + ErrorResponse, + DataHandlerResponse, ) from mindsdb.utilities import log from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender +from mindsdb.utilities.config import config as mindsdb_config +from mindsdb.utilities.types.column import Column import mindsdb.utilities.profiler as profiler from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE @@ -80,43 +86,43 @@ def _map_type(internal_type_name: str) -> MYSQL_DATA_TYPE: return MYSQL_DATA_TYPE.VARCHAR -def _make_table_response(result: list[tuple[Any]], cursor: Cursor) -> Response: - """Build response from result and cursor. +def _get_colums(cursor: Cursor) -> list[Column]: + """Get columns from cursor. Args: - result (list[tuple[Any]]): result of the query. - cursor (oracledb.Cursor): cursor object. + cursor (psycopg.Cursor): cursor object. Returns: - Response: response object. + List of columns """ - description: list[tuple[Any]] = cursor.description - mysql_types: list[MYSQL_DATA_TYPE] = [] - for column in description: + columns = [] + for column in cursor.description: + column_name = column[0] db_type = column[1] precision = column[4] scale = column[5] + mysql_type = None if db_type is oracledb.DB_TYPE_JSON: - mysql_types.append(MYSQL_DATA_TYPE.JSON) + mysql_type = MYSQL_DATA_TYPE.JSON elif db_type is oracledb.DB_TYPE_VECTOR: - mysql_types.append(MYSQL_DATA_TYPE.VECTOR) + mysql_type = MYSQL_DATA_TYPE.VECTOR elif db_type is oracledb.DB_TYPE_NUMBER: if scale != 0: - mysql_types.append(MYSQL_DATA_TYPE.FLOAT) + mysql_type = MYSQL_DATA_TYPE.FLOAT else: # python max int is 19 digits, oracle can return more if precision > 18: - mysql_types.append(MYSQL_DATA_TYPE.DECIMAL) + mysql_type = MYSQL_DATA_TYPE.DECIMAL else: - mysql_types.append(MYSQL_DATA_TYPE.INT) + mysql_type = MYSQL_DATA_TYPE.INT elif db_type is oracledb.DB_TYPE_BINARY_FLOAT: - mysql_types.append(MYSQL_DATA_TYPE.FLOAT) + mysql_type = MYSQL_DATA_TYPE.FLOAT elif db_type is oracledb.DB_TYPE_BINARY_DOUBLE: - mysql_types.append(MYSQL_DATA_TYPE.FLOAT) + mysql_type = MYSQL_DATA_TYPE.FLOAT elif db_type is oracledb.DB_TYPE_BINARY_INTEGER: - mysql_types.append(MYSQL_DATA_TYPE.INT) + mysql_type = MYSQL_DATA_TYPE.INT elif db_type is oracledb.DB_TYPE_BOOLEAN: - mysql_types.append(MYSQL_DATA_TYPE.BOOLEAN) + mysql_type = MYSQL_DATA_TYPE.BOOLEAN elif db_type in ( oracledb.DB_TYPE_CHAR, oracledb.DB_TYPE_NCHAR, @@ -125,22 +131,35 @@ def _make_table_response(result: list[tuple[Any]], cursor: Cursor) -> Response: oracledb.DB_TYPE_VARCHAR, oracledb.DB_TYPE_LONG_NVARCHAR, ): - mysql_types.append(MYSQL_DATA_TYPE.TEXT) + mysql_type = MYSQL_DATA_TYPE.TEXT elif db_type in (oracledb.DB_TYPE_RAW, oracledb.DB_TYPE_LONG_RAW): - mysql_types.append(MYSQL_DATA_TYPE.BINARY) + mysql_type = MYSQL_DATA_TYPE.BINARY elif db_type is oracledb.DB_TYPE_DATE: - mysql_types.append(MYSQL_DATA_TYPE.DATE) + mysql_type = MYSQL_DATA_TYPE.DATE elif db_type is oracledb.DB_TYPE_TIMESTAMP: - mysql_types.append(MYSQL_DATA_TYPE.TIMESTAMP) + mysql_type = MYSQL_DATA_TYPE.TIMESTAMP else: # fallback - mysql_types.append(MYSQL_DATA_TYPE.TEXT) + mysql_type = MYSQL_DATA_TYPE.TEXT + + columns.append(Column(name=column_name, type=mysql_type)) + return columns + + +def _make_df(result: list[tuple[Any]], columns: list[Column]) -> pd.DataFrame: + """Make pandas DataFrame from result and columns. + + Args: + result (list[tuple[Any]]): result of the query. + columns (list[Column]): list of columns. - # region cast int and bool to nullable types + Returns: + pd.DataFrame: pandas DataFrame. + """ serieses = [] - for i, mysql_type in enumerate(mysql_types): + for i, column in enumerate(columns): expected_dtype = None - if mysql_type in ( + if column.type in ( MYSQL_DATA_TYPE.SMALLINT, MYSQL_DATA_TYPE.INT, MYSQL_DATA_TYPE.MEDIUMINT, @@ -148,13 +167,11 @@ def _make_table_response(result: list[tuple[Any]], cursor: Cursor) -> Response: MYSQL_DATA_TYPE.TINYINT, ): expected_dtype = "Int64" - elif mysql_type in (MYSQL_DATA_TYPE.BOOL, MYSQL_DATA_TYPE.BOOLEAN): + elif column.type in (MYSQL_DATA_TYPE.BOOL, MYSQL_DATA_TYPE.BOOLEAN): expected_dtype = "boolean" - serieses.append(pd.Series([row[i] for row in result], dtype=expected_dtype, name=description[i][0])) + serieses.append(pd.Series([row[i] for row in result], dtype=expected_dtype, name=column.name)) df = pd.concat(serieses, axis=1, copy=False) - # endregion - - return Response(RESPONSE_TYPE.TABLE, data_frame=df, mysql_types=mysql_types) + return df class OracleHandler(MetaDatabaseHandler): @@ -163,14 +180,15 @@ class OracleHandler(MetaDatabaseHandler): """ name = "oracle" + stream_response = True - def __init__(self, name: Text, connection_data: Optional[Dict], **kwargs) -> None: + def __init__(self, name: str, connection_data: dict | None, **kwargs) -> None: """ Initializes the handler. Args: - name (Text): The name of the handler instance. - connection_data (Dict): The connection data required to connect to OracleDB. + name (str): The name of the handler instance. + connection_data (dict | None): The connection data required to connect to OracleDB. kwargs: Arbitrary keyword arguments. """ super().__init__(name) @@ -304,78 +322,99 @@ def check_connection(self) -> StatusResponse: return response - @profiler.profile() - def native_query(self, query: Text) -> Response: - """ - Executes a SQL query on the Oracle database and returns the result. + def native_query(self, query: str, stream: bool = True, **kwargs) -> TableResponse | OkResponse | ErrorResponse: + """Executes a SQL query on the Oracle database and returns the result. Args: - query (Text): The SQL query to be executed. + query (str): The SQL query to be executed. + stream (bool): Whether to execute the query on the server side (streaming). + **kwargs: Additional keyword arguments. Returns: - Response: A response object containing the result of the query or an error message. + TableResponse | OkResponse | ErrorResponse: A response object containing the result of the query or an error message. """ - need_to_close = self.is_connected is False + if stream is False: + response = self._execute_fetchall(query, **kwargs) + else: + generator = self._execute_fetchmany(query, **kwargs) + try: + response: TableResponse = next(generator) + response.data_generator = generator + except StopIteration as e: + response = e.value + if isinstance(response, DataHandlerResponse) is False: + raise + return response + def _execute_fetchmany(self, query: str) -> Generator[pd.DataFrame, None, OkResponse | ErrorResponse]: connection = self.connect() - with connection.cursor() as cur: + with connection.cursor() as cursor: try: - cur.execute(query) - if cur.description is None: - response = Response(RESPONSE_TYPE.OK, affected_rows=cur.rowcount) - else: - result = cur.fetchall() - response = _make_table_response(result, cur) - connection.commit() - except DatabaseError as database_error: - logger.error(f"Error running query: {query} on Oracle, {database_error}!") - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(database_error), - ) - connection.rollback() + # Configure cursor for optimal server-side streaming + fetch_size = mindsdb_config["data_stream"]["fetch_size"] + cursor.arraysize = fetch_size - except Exception as unknown_error: - logger.error(f"Unknwon error running query: {query} on Oracle, {unknown_error}!") - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(unknown_error), - ) - connection.rollback() + cursor.execute(query) - if need_to_close is True: - self.disconnect() - return response + if cursor.description is None: + connection.commit() + return OkResponse(affected_rows=cursor.rowcount) - def query_stream(self, query: ASTNode, fetch_size: int = 1000): - """ - Executes a SQL query represented by an ASTNode and retrieves the data in a streaming fashion. + columns = _get_colums(cursor) + yield TableResponse(affected_rows=cursor.rowcount, columns=columns) + # Stream data in batches + while result := cursor.fetchmany(cursor.arraysize): + yield _make_df(result, columns) + connection.commit() + except Exception as e: + return self._handle_query_exception(e, query, connection) + + def _execute_fetchall(self, query: str) -> DataHandlerResponse: + """Executes a SQL query and fetches all results at once (client-side). Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - fetch_size (int): The number of rows to fetch in each batch. - Yields: - pd.DataFrame: A DataFrame containing a batch of rows from the query result. - Response: In case of an error, yields a Response object with the error details. - """ - query_str = SqlalchemyRender("oracle").get_string(query, with_failback=True) - need_to_close = self.is_connected is False + query (str): The SQL query to be executed. + Returns: + TableResponse | OkResponse | ErrorResponse: A response object containing the result of the query or an error message. + """ connection = self.connect() - with connection.cursor() as cur: + with connection.cursor() as cursor: try: - cur.execute(query_str) - while True: - result = cur.fetchmany(fetch_size) - if not result: - break - df = pd.DataFrame(result, columns=[col[0] for col in cur.description]) - yield df + cursor.execute(query) + if cursor.description is None: + response = OkResponse(affected_rows=cursor.rowcount) + else: + # Fetch all results at once + result = cursor.fetchall() + columns = _get_colums(cursor) + df = _make_df(result, columns) + response = TableResponse(data=df, affected_rows=cursor.rowcount, columns=columns) connection.commit() - finally: - connect - if need_to_close is True: - self.disconnect() + except Exception as e: + response = self._handle_query_exception(e, query, connection) + + return response + + def _handle_query_exception(self, e: Exception, query: str, connection) -> ErrorResponse: + """Handle query execution errors with appropriate logging and rollback. + + Args: + e: The exception that was raised + query: The SQL query that failed + connection: The database connection to rollback + + Returns: + ErrorResponse with appropriate error details + """ + if isinstance(e, DatabaseError): + logger.error(f"Error running query: {query} on Oracle, {e}!") + connection.rollback() + return ErrorResponse(error_code=0, error_message=str(e)) + + logger.error(f"Unknown error running query: {query} on Oracle, {e}!") + connection.rollback() + return ErrorResponse(error_code=0, error_message=str(e)) def insert(self, table_name: str, df: pd.DataFrame) -> Response: """ @@ -454,12 +493,12 @@ def get_tables(self) -> Response: """ return self.native_query(query) - def get_columns(self, table_name: Text) -> Response: + def get_columns(self, table_name: str) -> Response: """ Retrieves column details for a specified table in the Oracle database. Args: - table_name (Text): The name of the table for which to retrieve column information. + table_name (str): The name of the table for which to retrieve column information. Returns: Response: A response object containing the column details, formatted as per the `Response` class. @@ -485,11 +524,11 @@ def get_columns(self, table_name: Text) -> Response: ORDER BY TABLE_NAME, COLUMN_ID """ result = self.native_query(query) - if result.resp_type is RESPONSE_TYPE.TABLE: + if result.type is RESPONSE_TYPE.TABLE: result.to_columns_table_response(map_type_fn=_map_type) return result - def meta_get_tables(self, table_names: Optional[List[str]]) -> Response: + def meta_get_tables(self, table_names: list[str] | None) -> Response: """ Retrieves metadata about all non-system tables and views accessible to the current user. @@ -524,11 +563,11 @@ def meta_get_tables(self, table_names: Optional[List[str]]) -> Response: result = self.native_query(query) return result - def meta_get_columns(self, table_names: Optional[List[str]]) -> Response: + def meta_get_columns(self, table_names: list[str] | None) -> Response: """Retrieves metadata about the columns of specified tables accessible to the current user. Args: - table_names (list[str]): A list of table names for which to retrieve column metadata. + table_names (list[str] | None): A list of table names for which to retrieve column metadata. Returns: Response: A response object containing column metadata. @@ -564,11 +603,11 @@ def meta_get_columns(self, table_names: Optional[List[str]]) -> Response: result = self.native_query(query) return result - def meta_get_column_statistics(self, table_names: Optional[List[str]]) -> Response: + def meta_get_column_statistics(self, table_names: list[str] | None) -> Response: """Retrieves statistics about the columns of specified tables accessible to the current user. Args: - table_names (list[str]): A list of table names for which to retrieve column statistics. + table_names (list[str] | None): A list of table names for which to retrieve column statistics. Returns: Response: A response object containing column statistics. @@ -623,12 +662,12 @@ def meta_get_column_statistics(self, table_names: Optional[List[str]]) -> Respon result = self.native_query(query) - if result.resp_type is RESPONSE_TYPE.TABLE and result.data_frame is not None: + if result.type is RESPONSE_TYPE.TABLE and result.data_frame is not None: df = result.data_frame def extract_min_max( histogram_str: str, - ) -> tuple[Optional[float], Optional[float]]: + ) -> tuple[float | None, float | None]: if histogram_str and str(histogram_str).lower() not in ["nan", "none"]: values = str(histogram_str).split(",") if values: @@ -643,12 +682,12 @@ def extract_min_max( df.drop(columns=["HISTOGRAM_BOUNDS"], inplace=True) return result - def meta_get_primary_keys(self, table_names: Optional[List[str]]) -> Response: + def meta_get_primary_keys(self, table_names: list[str] | None) -> Response: """ Retrieves the primary keys for the specified tables accessible to the current user. Args: - table_names (list[str]): A list of table names for which to retrieve primary keys. + table_names (list[str] | None): A list of table names for which to retrieve primary keys. Returns: Response: A response object containing primary key information. @@ -681,12 +720,12 @@ def meta_get_primary_keys(self, table_names: Optional[List[str]]) -> Response: result = self.native_query(query) return result - def meta_get_foreign_keys(self, table_names: Optional[List[str]]) -> Response: + def meta_get_foreign_keys(self, table_names: list[str] | None) -> Response: """ Retrieves the foreign keys for the specified tables accessible to the current user. Args: - table_names (list[str]): A list of table names for which to retrieve foreign keys. + table_names (list[str] | None): A list of table names for which to retrieve foreign keys. Returns: Response: A response object containing foreign key information. diff --git a/mindsdb/integrations/handlers/orioledb_handler/README.md b/mindsdb/integrations/handlers/orioledb_handler/README.md deleted file mode 100644 index 277e430568f..00000000000 --- a/mindsdb/integrations/handlers/orioledb_handler/README.md +++ /dev/null @@ -1,41 +0,0 @@ -# OrioleDB Handler - -This is the implementation of the OrioleDB handler for MindsDB. - -## OrioleDB -OrioleDB is a new storage engine for PostgreSQL, bringing a modern approach to database capacity, capabilities and performance to the world's most-loved database platform. - -OrioleDB consists of an extension, building on the innovative table access method framework and other standard Postgres extension interfaces. By extending and enhancing the current table access methods, OrioleDB opens the door to a future of more powerful storage models that are optimized for cloud and modern hardware architectures. -## Implementation - -This handler was implemented by extending postres connector. - -The required arguments to establish a connection are: - -* `host`: the host name of the OrioleDB connection -* `port`: the port to use when connecting -* `user`: the user to authenticate -* `password`: the password to authenticate the user -* `database`: database name - -## Usage - -In order to make use of this handler and connect to a OrioleDB server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE orioledb_data -WITH ENGINE = "orioledb", -PARAMETERS = { - "user": "root", - "password": "root", - "host": "hostname", - "port": 5432, - "database": "postgres" -} -``` - -Now, you can use this established connection to query your database as follows: - -```sql -SELECT * FROM orioledb_data.loveU LIMIT 10; -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/orioledb_handler/__about__.py b/mindsdb/integrations/handlers/orioledb_handler/__about__.py deleted file mode 100644 index f83acbe8666..00000000000 --- a/mindsdb/integrations/handlers/orioledb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB OrioleDB handler' -__package_name__ = 'mindsdb_orioledb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for OrioleDB" -__author__ = 'Parthiv Makwana' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/orioledb_handler/__init__.py b/mindsdb/integrations/handlers/orioledb_handler/__init__.py deleted file mode 100644 index efc41da72c5..00000000000 --- a/mindsdb/integrations/handlers/orioledb_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .orioledb_handler import OrioleDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - - -title = 'OrioleDB' -name = 'orioledb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path', 'connection_args', 'connection_args_example' -] diff --git a/mindsdb/integrations/handlers/orioledb_handler/connection_args.py b/mindsdb/integrations/handlers/orioledb_handler/connection_args.py deleted file mode 100644 index d0f19bcd74f..00000000000 --- a/mindsdb/integrations/handlers/orioledb_handler/connection_args.py +++ /dev/null @@ -1,36 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the OrioleDB server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the OrioleDB server.', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the OrioleDB server.' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the OrioleDB server. ' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the OrioleDB server. Must be an integer.' - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=5432, - user='postgres', - password='', - database='database' -) diff --git a/mindsdb/integrations/handlers/orioledb_handler/icon.svg b/mindsdb/integrations/handlers/orioledb_handler/icon.svg deleted file mode 100644 index 1f185c9c27a..00000000000 --- a/mindsdb/integrations/handlers/orioledb_handler/icon.svg +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/orioledb_handler/orioledb_handler.py b/mindsdb/integrations/handlers/orioledb_handler/orioledb_handler.py deleted file mode 100644 index 85a5179d756..00000000000 --- a/mindsdb/integrations/handlers/orioledb_handler/orioledb_handler.py +++ /dev/null @@ -1,11 +0,0 @@ -from mindsdb.integrations.handlers.postgres_handler import Handler as PostgresHandler - - -class OrioleDBHandler(PostgresHandler): - """ - This handler handles connection and execution of the OrioleDB statements. - """ - name = 'orioledb' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/orioledb_handler/tests/__init__.py b/mindsdb/integrations/handlers/orioledb_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/orioledb_handler/tests/test_orioledb_handler.py b/mindsdb/integrations/handlers/orioledb_handler/tests/test_orioledb_handler.py deleted file mode 100644 index 221cc16dc87..00000000000 --- a/mindsdb/integrations/handlers/orioledb_handler/tests/test_orioledb_handler.py +++ /dev/null @@ -1,54 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.orioledb_handler.orioledb_handler import OrioleDBHandler -from mindsdb.integrations.libs.response import RESPONSE_TYPE - - -class OrioleDBHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "localhost", - "port": 5432, - "user": "postgres", - "password": "1234", - "database": "postgres", - } - } - cls.handler = OrioleDBHandler('test_orioledb_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_connect(self): - assert self.handler.connect() - - def test_2_create_table(self): - query = "CREATE Table IF NOT EXISTS Lover(name varchar(101));" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_3_insert(self): - query = "INSERT INTO LOVER VALUES('Shiv Shakti');" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_4_native_query_select(self): - query = "SELECT * FROM LOVER;" - result = self.handler.query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is RESPONSE_TYPE.TABLE - - def test_6_get_columns(self): - columns = self.handler.get_columns('LOVER') - - query = "DROP Table IF EXISTS Lover;" - self.handler.query(query) - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/palm_handler/README.md b/mindsdb/integrations/handlers/palm_handler/README.md deleted file mode 100644 index 658fd7f8051..00000000000 --- a/mindsdb/integrations/handlers/palm_handler/README.md +++ /dev/null @@ -1,142 +0,0 @@ -# PaLM API Handler - -This is the integration of PaLM2 API for MindsDB. - -## PaLM API - -[PaLM API](https://developers.generativeai.google/products/palm) is used to build generative AI applications for use cases like content generation, dialog agents, summarization, classification, and more - - -## Implementation - -This handler uses `google-generativeai` library to connect to the PaLM API. - -The required arguments to establish a connection are: - -* `api_key`: the api key for authenticate with the PaLM API -either -* `question_column`: the name of the column in the dataset to be used as input -or -* `prompt_template`: the prompt template to be used as placeholder - - -## Usage - -In order to make use of this handler and query the PaLM API, the following syntax can be used: - -### Question Answering prompt - -You can create a model by providing a name for table of the question and answers to query later. - -```sql --- you might already have a database -CREATE PROJECT palm_dev; - -CREATE ML_ENGINE palm_engine -FROM palm -USING - palm_api_key = 'palm_api_key'; - -CREATE MODEL palm_dev.model_name -PREDICT answer -USING - engine="palm_engine", - mode = "default", - question_column = "question"; -``` - -Now, this model could be used to query as a prompt to the PaLM as follows: - -```sql -SELECT - question, - answer -FROM - palm_dev.model_name -WHERE - question = "What is mindsdb?"; -``` - -### Placeholder/Template prompt - -You can create a model with a placeholder/template prompt to query later with the given values for the placeholder keys. - -```sql -CREATE MODEL palm_dev.model_name -PREDICT answer -USING - engine="palm_engine", - mode = "default", - prompt_template = "list some facts about {{ thing }}"; -``` - -Now, this model can be used by providing the values of the keys in the `prompt_template` as follows: - -```sql -SELECT - * -FROM - palm_dev.model_name -WHERE - thing = "mindsdb"; -``` - -### Embeddings - -You can create a model that generates the emebeddings of the given text. - -```sql -CREATE MODEL palm_dev.model_name -PREDICT answer -USING - engine="palm_engine", - mode = "embedding", - question_column = "question"; -``` - -Then, this model can be queried to get the embeddings for the given text as follows: - -```sql -SELECT - * -FROM - palm_dev.model_name -WHERE - question = "What is mindsdb?"; -``` - -### User Input and Prompt - -You can create a model for a specific prompt and specifically ask the user for input with a given context. - -```sql -CREATE MODEL palm_dev.model_name -PREDICT answer -USING - engine = 'palm_engine', - prompt = 'tell some joke about programming', - user_column = 'user_input'; -``` - -Then, this model can be queried to get the answer as follows: - -```sql -SELECT - * -FROM - palm_dev.model_name -WHERE - user_input = 'python'; -``` - -## Features Implemented - -- [x] Question Answering -- [x] Placeholder/Template -- [x] Embeddings -- [x] User Input Prompts - -## TODOs - -- [ ] Conversational chat prompts -- [ ] Moderation of Prompts diff --git a/mindsdb/integrations/handlers/palm_handler/__about__.py b/mindsdb/integrations/handlers/palm_handler/__about__.py deleted file mode 100644 index 14df2148f89..00000000000 --- a/mindsdb/integrations/handlers/palm_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB PaLM handler' -__package_name__ = 'mindsdb_palm_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for PaLM" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/palm_handler/__init__.py b/mindsdb/integrations/handlers/palm_handler/__init__.py deleted file mode 100644 index 7f777b33124..00000000000 --- a/mindsdb/integrations/handlers/palm_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .palm_handler import PalmHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Google PaLM' -name = 'palm' -type = HANDLER_TYPE.ML -icon_path = 'icon.svg' -permanent = False - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/palm_handler/icon.svg b/mindsdb/integrations/handlers/palm_handler/icon.svg deleted file mode 100644 index 70664681d47..00000000000 --- a/mindsdb/integrations/handlers/palm_handler/icon.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/palm_handler/palm_handler.py b/mindsdb/integrations/handlers/palm_handler/palm_handler.py deleted file mode 100644 index 178f57c62ed..00000000000 --- a/mindsdb/integrations/handlers/palm_handler/palm_handler.py +++ /dev/null @@ -1,446 +0,0 @@ -import textwrap -from pydantic import BaseModel, Extra - -import google.generativeai as palm -import numpy as np -import pandas as pd - -from mindsdb.utilities.hooks import before_palm_query, after_palm_query -from mindsdb.utilities import log -from mindsdb.integrations.libs.base import BaseMLEngine -from mindsdb.integrations.libs.llm.utils import get_completed_prompts - -from mindsdb.integrations.utilities.handler_utils import get_api_key - -CHAT_MODELS = ( - "models/chat-bison-001", - "models/embedding-gecko-001", - "models/text-bison-001", -) - -logger = log.getLogger(__name__) - - -class PalmHandlerArgs(BaseModel): - target: str = None - model_name: str = "models/chat-bison-001" - mode: str = "default" - predict_params: dict = None - input_text: str = None - ft_api_info: dict = None - ft_result_stats: dict = None - runtime: str = None - max_output_tokens: int = 64 - temperature: float = 0.0 - api_key: str = None - palm_api_key: str = None - - question_column: str = None - answer_column: str = None - context_column: str = None - prompt_template: str = None - prompt: str = None - user_column: str = None - assistant_column: str = None - - class Config: - # for all args that are not expected, raise an error - extra = Extra.forbid - - -class PalmHandler(BaseMLEngine): - name = "palm" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.generative = True - self.model_name = "models/chat-bison-001" - self.model_name = ( - "default" # can also be 'conversational' or 'conversational-full' - ) - self.supported_modes = [ - "default", - "conversational", - "conversational-full", - "embedding", - ] - self.rate_limit = 60 # requests per minute - self.max_batch_size = 20 - self.default_max_output_tokens = 64 - self.chat_completion_models = CHAT_MODELS - - @staticmethod - def create_validation(target, args=None, **kwargs): - if "using" not in args: - raise Exception( - "palm engine requires a USING clause! Refer to its documentation for more details." - ) - else: - args = args["using"] - - if ( - len(set(args.keys()) & {"question_column", "prompt_template", "prompt"}) - == 0 - ): - raise Exception( - "One of `question_column` or `prompt_template` is required for this engine." - ) - - # TODO: add example_column for conversational mode - keys_collection = [ - ["prompt_template"], - ["question_column", "context_column"], - ["prompt", "user_column", "assistant_column"], - ] - for keys in keys_collection: - if keys[0] in args and any( - x[0] in args for x in keys_collection if x != keys - ): - raise Exception( - textwrap.dedent( - """\ - Please provide one of - 1) a `prompt_template` - 2) a `question_column` and an optional `context_column` - 3) a `prompt' and 'user_column' and 'assistant_column` - """ - ) - ) - - def create(self, target, args=None, **kwargs): - args = args["using"] - args_model = PalmHandlerArgs(**args) - - args_model.target = target - api_key = get_api_key("palm", args["using"], self.engine_storage, strict=False) - - # Set palm api key - palm.configure(api_key=api_key) - - available_models = [m.name for m in palm.list_models()] - - if not args_model.model_name: - args_model.model_name = self.model_name - elif args_model.model_name not in available_models: - raise Exception(f"Invalid model name. Please use one of {available_models}") - - if not args_model.mode: - args_model.mode = self.model_name - elif args_model.mode not in self.supported_modes: - raise Exception( - f"Invalid operation mode. Please use one of {self.supported_modes}" - ) - - self.model_storage.json_set("args", args_model.model_dump()) - - def predict(self, df, args=None): - """ - If there is a prompt template, we use it. Otherwise, we use the concatenation of `context_column` (optional) and `question_column` to ask for a completion. - """ # noqa - # TODO: support for edits, embeddings and moderation - - pred_args = args["predict_params"] if args else {} - args_model = PalmHandlerArgs(**self.model_storage.json_get("args")) - df = df.reset_index(drop=True) - - if pred_args.get("mode"): - if pred_args["mode"] in self.supported_modes: - args_model.mode = pred_args["mode"] - else: - raise Exception( - f"Invalid operation mode. Please use one of {self.supported_modes}." - ) # noqa - - if pred_args.get("prompt_template", False): - base_template = pred_args[ - "prompt_template" - ] # override with predict-time template if available - elif args_model.prompt_template: - base_template = args_model.prompt_template - else: - base_template = None - - # Embedding Mode - if args_model.mode == "embedding": - api_args = { - "model": pred_args.get("model_name", "models/embedding-gecko-001") - } - model_name = "models/embedding-gecko-001" - if args_model.question_column: - prompts = list(df[args_model.question_column].apply(lambda x: str(x))) - empty_prompt_ids = np.where( - df[[args_model.question_column]].isna().all(axis=1).values - )[0] - else: - raise Exception("Embedding mode needs a question_column") - - # Chat or normal completion mode - else: - if ( - args_model.question_column - and args_model.question_column not in df.columns - ): - raise Exception( - f"This model expects a question to answer in the '{args_model.question_column}' column." - ) - - if ( - args_model.context_column - and args_model.context_column not in df.columns - ): - raise Exception( - f"This model expects context in the '{args_model.context_column}' column." - ) - - # api argument validation - model_name = args_model.model_name - api_args = { - "max_output_tokens": pred_args.get( - "max_output_tokens", - args_model.max_output_tokens, - ), - "temperature": min( - 1.0, - max(0.0, pred_args.get("temperature", args_model.temperature)), - ), - "top_p": pred_args.get("top_p", None), - "candidate_count": pred_args.get("candidate_count", None), - "stop_sequences": pred_args.get("stop_sequences", None), - } - - if ( - args_model.mode != "default" - and model_name not in self.chat_completion_models - ): - raise Exception( - f"Conversational modes are only available for the following models: {', '.join(self.chat_completion_models)}" - ) # noqa - - if args_model.prompt_template: - prompts, empty_prompt_ids = get_completed_prompts( - base_template, df - ) - if len(prompts) == 0: - raise Exception("No prompts found") - - elif args_model.context_column: - empty_prompt_ids = np.where( - df[[args_model.context_column, args_model.question_column]] - .isna() - .all(axis=1) - .values - )[0] - contexts = list(df[args_model.context_column].apply(lambda x: str(x))) - questions = list(df[args_model.question_column].apply(lambda x: str(x))) - prompts = [ - f"Context: {c}\nQuestion: {q}\nAnswer: " - for c, q in zip(contexts, questions) - ] - api_args["context"] = "".join(contexts) - - elif args_model.prompt: - empty_prompt_ids = [] - prompts = list(df[args_model.user_column]) - if len(prompts) == 0: - raise Exception("No prompts found") - else: - empty_prompt_ids = np.where( - df[[args_model.question_column]].isna().all(axis=1).values - )[0] - prompts = list(df[args_model.question_column].apply(lambda x: str(x))) - - # remove prompts without signal from completion queue - prompts = [j for i, j in enumerate(prompts) if i not in empty_prompt_ids] - - api_key = get_api_key("palm", args["using"], self.engine_storage, strict=False) - api_args = { - k: v for k, v in api_args.items() if v is not None - } # filter out non-specified api args - completion = self._completion( - model_name, prompts, api_key, api_args, args_model, df - ) - - # add null completion for empty prompts - for i in sorted(empty_prompt_ids): - completion.insert(i, None) - - pred_df = pd.DataFrame(completion, columns=[args_model.target]) - - return pred_df - - def _completion(self, model_name, prompts, api_key, api_args, args_model, df): - """ - Handles completion for an arbitrary amount of rows. - Additionally, single completion calls are done with exponential backoff to guarantee all prompts are processed, - because even with previous checks the tokens-per-minute limit may apply. - """ - - def _submit_completion(model_name, prompts, api_key, api_args, args_model, df): - kwargs = { - "model": model_name, - } - - # configure the PaLM SDK with the provided API KEY - palm.configure(api_key=api_key) - - if model_name == "models/embedding-gecko-001": - prompts = "".join(prompts) - return _submit_embedding_completion(kwargs, prompts, api_args) - elif model_name == args_model.model_name: - return _submit_chat_completion( - kwargs, - prompts, - api_args, - df, - mode=args_model.mode, - ) - else: - prompts = "".join(prompts) - return _submit_normal_completion(kwargs, prompts, api_args) - - def _log_api_call(params, response): - after_palm_query(params, response) - - params2 = params.copy() - params2.pop("palm_api_key", None) - params2.pop("user", None) - logger.debug(f">>>palm call: {params2}:\n{response}") - - def _submit_normal_completion(kwargs, prompts, api_args): - def _tidy(comp): - tidy_comps = [] - if comp.candidates and len(comp.candidates) == 0: - return ["No completions found"] - for c in comp.candidates: - if "output" in c: - tidy_comps.append(c["output"].strip("\n").strip("")) - return tidy_comps - - kwargs["prompt"] = prompts - kwargs = {**kwargs, **api_args} - - before_palm_query(kwargs) - - # call the palm sdk with text-bison-001 model - resp = _tidy(palm.generate_text(**kwargs)) - _log_api_call(kwargs, resp) - return resp - - def _submit_embedding_completion(kwargs, prompts, api_args): - def _tidy(comp): - tidy_comps = [] - if "embedding" not in comp: - return [f"No completion found, err {comp}"] - for c in comp["embedding"]: - tidy_comps.append([c]) - return tidy_comps - - kwargs = {} - kwargs["model"] = api_args["model"] - kwargs["text"] = prompts - - before_palm_query(kwargs) - - # call the palm sdk with embedding-gecko-001 model - resp = _tidy(palm.generate_embeddings(**kwargs)) - _log_api_call(kwargs, resp) - return resp - - def _submit_chat_completion( - kwargs, prompts, api_args, df, mode="conversational" - ): - def _tidy(comp): - tidy_comps = [] - if comp.candidates and len(comp.candidates) == 0: - return ["No completions found"] - - for c in comp.candidates: - if "content" in c: - tidy_comps.append(c["content"].strip("\n").strip("")) - if "output" in c: - tidy_comps.append(c["output"].strip("\n").strip("")) - return tidy_comps - - completions = [] - if mode != "conversational": - initial_prompt = { - "author": "system", - "content": "You are a helpful assistant. Your task is to continue the chat.", - } # noqa - else: - # get prompt from model - prompt = "".join(prompts) - initial_prompt = {"author": "system", "content": prompt} # noqa - kwargs["messages"] = [initial_prompt] - - last_completion_content = None - - for pidx in range(len(prompts)): - if mode == "conversational": - kwargs["messages"].append( - {"author": "user", "content": prompts[pidx]} - ) - - if mode == "conversational-full" or ( - mode == "conversational" and pidx == len(prompts) - 1 - ): - pkwargs = {**kwargs, **api_args} - pkwargs["candidate_count"] = 3 - pkwargs.pop("max_output_tokens") - before_palm_query(kwargs) - - # call the palm sdk with chat-bison-001 model - resp = _tidy(palm.chat(**pkwargs)) - - _log_api_call(pkwargs, resp) - - completions.extend(resp) - elif mode == "default": - pkwargs = {**kwargs, **api_args} - - pkwargs["model"] = "models/text-bison-001" - pkwargs["prompt"] = prompts[pidx] - before_palm_query(kwargs) - if pkwargs["prompt"] == "": - return ["No prompt provided"] - - # call the palm sdk with text-bison-001 model - resp = _tidy(palm.generate_text(**pkwargs)) - _log_api_call(pkwargs, resp) - - completions.extend(resp) - else: - # in "normal" conversational mode, we request completions only for the last row - last_completion_content = None - if args_model.answer_column in df.columns: - # insert completion if provided, which saves redundant API calls - completions.extend([df.iloc[pidx][args_model.answer_column]]) - else: - completions.extend([""]) - - if args_model.answer_column in df.columns: - kwargs["messages"].append( - { - "author": "assistant", - "content": df.iloc[pidx][args_model.answer_column], - } - ) - elif last_completion_content: - # interleave assistant responses with user input - kwargs["messages"].append( - {"author": "assistant", "content": last_completion_content[0]} - ) - - return completions - - try: - completion = _submit_completion( - model_name, prompts, api_key, api_args, args_model, df - ) - return completion - except Exception as e: - completion = [] - logger.exception(e) - completion.extend({"error": str(e)}) - - return completion diff --git a/mindsdb/integrations/handlers/palm_handler/requirements.txt b/mindsdb/integrations/handlers/palm_handler/requirements.txt deleted file mode 100644 index 17e8ced1b48..00000000000 --- a/mindsdb/integrations/handlers/palm_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -google-generativeai >= 0.1.0 diff --git a/mindsdb/integrations/handlers/paypal_handler/README.md b/mindsdb/integrations/handlers/paypal_handler/README.md deleted file mode 100644 index 3bfadb199e0..00000000000 --- a/mindsdb/integrations/handlers/paypal_handler/README.md +++ /dev/null @@ -1,160 +0,0 @@ -# PayPal Handler - -PayPal handler for MindsDB provides interfaces to connect to PayPal via APIs and pull data into MindsDB. - ---- - -## Table of Contents - -- [PayPal Handler](#paypal-handler) - - [Table of Contents](#table-of-contents) - - [About PayPal](#about-paypal) - - [PayPal Handler Implementation](#paypal-handler-implementation) - - [PayPal Handler Initialization](#paypal-handler-initialization) - - [Implemented Features](#implemented-features) - - [TODO](#todo) - - [Example Usage](#example-usage) - ---- - -## About PayPal - -PayPal is an online payment system that makes paying for things online and sending and receiving money safe and secure. -
-https://www.bankrate.com/finance/credit-cards/guide-to-using-paypal/ - -## PayPal Handler Implementation - -This handler was implemented using [PayPal-Python-SDK](https://github.com/paypal/PayPal-Python-SDK), the Python SDK for PayPal RESTful APIs. - -## PayPal Handler Initialization - -The PayPal handler is initialized with the following parameters: - -- `mode`: The mode of the PayPal API. Can be `sandbox` or `live`. -- `client_id`: The client ID of the PayPal API. -- `client_secret`: The client secret of the PayPal API. - -## Implemented Features - -- [x] PayPal Payments Table for a given account - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - -- [x] PayPal Invoices Table for a given account - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - -- [x] PayPal Subscriptions table for a given account - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - - -## TODO - -- [ ] Support INSERT, UPDATE and DELETE for the Payments table -- [ ] PayPal Orders table -- [ ] PayPal Payouts table -- [ ] Many more - -## Example Usage - -The first step is to create a database with the new `paypal` engine by passing in the required parameters: - -~~~~sql -CREATE DATABASE paypal_datasource -WITH ENGINE = 'paypal', -PARAMETERS = { - "mode": "sandbox", - "client_id": "EBWKjlELKMYqRNQ6sYvFo64FtaRLRR5BdHEESmha49TM", - "client_secret": "EO422dn3gQLgDbuwqTjzrFgFtaRLRR5BdHEESmha49TM" -}; -~~~~ - -Use the established connection to query your database: - -Query Payments_table: -~~~~sql -SELECT * FROM paypal_datasource.payments -~~~~ - -Query Invoices_table: -~~~~sql -SELECT * FROM paypal_datasource.invoices -~~~~ - -Query Subscriptions_table: -~~~~sql -SELECT * FROM paypal_datasource.subscriptions -~~~~ - -Run more advanced queries: - -`Payments_table` -~~~~sql -SELECT intent, cart -FROM paypal_datasource.payments -WHERE state = 'approved' -ORDER BY id -LIMIT 5 -~~~~ - -`Invoices_table` - -Query Invoices with specific columns: - -~~~~sql -SELECT invoice_number, total_amount, status FROM paypal_datasource.invoices -~~~~ - -Query Invoices with conditions and ordering: - -~~~~sql -SELECT invoice_number, total_amount -FROM paypal_datasource.invoices -WHERE status = 'PAID' -ORDER BY total_amount DESC -LIMIT 10 -~~~~ - -`Subscriptions_table` -Query Subscriptions with specific columns: - -~~~~sql -SELECT id, name FROM paypal_datasource.subscriptions -~~~~ - -Query Subscriptions with conditions and ordering: - -~~~~sql -SELECT id , state, name -FROM paypal_datasource.subscriptions -WHERE state ="CREATED" -LIMIT 5 -~~~~ - - -`Orders_table` -Query Orders with specific columns: - -~~~~sql -SELECT id, name FROM paypal_datasource.orders -~~~~ - -Query Orders with conditions and ordering: - -~~~~sql -SELECT id , state, amount -FROM paypal_datasource.orders -WHERE ids = ("{id1}","{id2}") -ORDER BY total_amount DESC -~~~~ diff --git a/mindsdb/integrations/handlers/paypal_handler/__about__.py b/mindsdb/integrations/handlers/paypal_handler/__about__.py deleted file mode 100644 index 0a0b3c42c6c..00000000000 --- a/mindsdb/integrations/handlers/paypal_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB PayPal handler" -__package_name__ = "mindsdb_paypal_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for PayPal" -__author__ = "Minura Punchihewa" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/paypal_handler/__init__.py b/mindsdb/integrations/handlers/paypal_handler/__init__.py deleted file mode 100644 index af5148887b1..00000000000 --- a/mindsdb/integrations/handlers/paypal_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .paypal_handler import PayPalHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "PayPal" -name = "paypal" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/paypal_handler/icon.svg b/mindsdb/integrations/handlers/paypal_handler/icon.svg deleted file mode 100644 index 3a4dcb7f744..00000000000 --- a/mindsdb/integrations/handlers/paypal_handler/icon.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/paypal_handler/paypal_handler.py b/mindsdb/integrations/handlers/paypal_handler/paypal_handler.py deleted file mode 100644 index 519be2dd205..00000000000 --- a/mindsdb/integrations/handlers/paypal_handler/paypal_handler.py +++ /dev/null @@ -1,145 +0,0 @@ -import paypalrestsdk - -from mindsdb.integrations.handlers.paypal_handler.paypal_tables import ( - InvoicesTable, - PaymentsTable, - SubscriptionsTable, - OrdersTable, - PayoutsTable -) -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) - -from mindsdb.utilities import log -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE -from mindsdb_sql_parser import parse_sql -from collections import OrderedDict - -logger = log.getLogger(__name__) - - -class PayPalHandler(APIHandler): - """ - The PayPal handler implementation. - """ - - name = 'paypal' - - def __init__(self, name: str, **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - payments_data = PaymentsTable(self) - self._register_table("payments", payments_data) - - invoices_data = InvoicesTable(self) - self._register_table("invoices", invoices_data) - - subscriptions_data = SubscriptionsTable(self) - self._register_table("subscriptions", subscriptions_data) - - orders_data = OrdersTable(self) - self._register_table("orders", orders_data) - - payouts_data = PayoutsTable(self) - self._register_table("payouts", payouts_data) - - def connect(self): - """ - Set up the connection required by the handler. - Returns - ------- - StatusResponse - connection object - """ - if self.is_connected is True: - return self.connection - - self.connection = paypalrestsdk.Api( - { - "mode": self.connection_data['mode'], - "client_id": self.connection_data['client_id'], - "client_secret": self.connection_data['client_secret'], - } - ) - - self.is_connected = True - - return self.connection - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - - try: - connection = self.connect() - connection.get_access_token() - response.success = True - except Exception as e: - logger.error('Error connecting to PayPal!') - response.error_message = str(e) - - self.is_connected = response.success - - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - Parameters - ---------- - query : str - query in a native format - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) - - -connection_args = OrderedDict( - mode={ - "type": ARG_TYPE.STR, - "description": "Environment mode of the app", - "required": True, - "label": "MODE", - }, - client_id={ - "type": ARG_TYPE.PWD, - "description": "Client id of the App", - "required": True, - "label": "Client ID", - }, - client_secret={ - "type": ARG_TYPE.STR, - "description": "Client secret of the App", - "required": True, - "label": "Client Secret", - }, -) - -connection_args_example = OrderedDict( - mode="sandbox", - client_id="xxxx-xxxx-xxxx-xxxx", - client_secret="", -) diff --git a/mindsdb/integrations/handlers/paypal_handler/paypal_tables.py b/mindsdb/integrations/handlers/paypal_handler/paypal_tables.py deleted file mode 100644 index 14ce78f9932..00000000000 --- a/mindsdb/integrations/handlers/paypal_handler/paypal_tables.py +++ /dev/null @@ -1,284 +0,0 @@ -import paypalrestsdk -import pandas as pd -from typing import Text, List, Dict - -from mindsdb_sql_parser import ast -from mindsdb.integrations.libs.api_handler import APITable - -from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor - - -class PaymentsTable(APITable): - - def select(self, query: ast.Select) -> pd.DataFrame: - """ - Pulls PayPal Payments data. - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - Returns - ------- - pd.DataFrame - PayPal Payments matching the query - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'payments', - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - payments_df = pd.json_normalize(self.get_payments(count=result_limit)) - select_statement_executor = SELECTQueryExecutor( - payments_df, - selected_columns, - where_conditions, - order_by_conditions - ) - payments_df = select_statement_executor.execute_query() - - return payments_df - - def get_columns(self) -> List[Text]: - return pd.json_normalize(self.get_payments(count=1)).columns.tolist() - - def get_payments(self, **kwargs) -> List[Dict]: - connection = self.handler.connect() - payments = paypalrestsdk.Payment.all(kwargs, api=connection) - return [payment.to_dict() for payment in payments['payments']] - - -class InvoicesTable(APITable): - - def select(self, query: ast.Select) -> pd.DataFrame: - select_statement_parser = SELECTQueryParser( - query, - 'invoices', - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - invoices_df = pd.json_normalize(self.get_invoices(count=result_limit)) - select_statement_executor = SELECTQueryExecutor( - invoices_df, - selected_columns, - where_conditions, - order_by_conditions - ) - invoices_df = select_statement_executor.execute_query() - - return invoices_df - - def get_columns(self) -> List[Text]: - return pd.json_normalize(self.get_invoices(count=1)).columns.tolist() - - def get_invoices(self, **kwargs) -> List[Dict]: - connection = self.handler.connect() - invoices = paypalrestsdk.Invoice.all(kwargs, api=connection) - return [invoice.to_dict() for invoice in invoices['invoices']] - - -class SubscriptionsTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - select_statement_parser = SELECTQueryParser( - query, - 'subscriptions', - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - subscriptions_df = pd.json_normalize(self.get_subscriptions(count=result_limit)) - select_statement_executor = SELECTQueryExecutor( - subscriptions_df, - selected_columns, - where_conditions, - order_by_conditions - ) - subscriptions_df = select_statement_executor.execute_query() - return subscriptions_df - - def get_columns(self) -> List[Text]: - return pd.json_normalize(self.get_subscriptions(count=1)).columns.tolist() - - def get_subscriptions(self, **kwargs) -> List[Dict]: - connection = self.handler.connect() - subscriptions = paypalrestsdk.BillingPlan.all(kwargs, api=connection) - return [subscription.to_dict() for subscription in subscriptions['plans']] - - -class OrdersTable(APITable): - """The PayPal Orders Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """ - Pulls PayPal Orders data. - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - Returns - ------- - pd.DataFrame - PayPal Orders matching the query - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - select_statement_parser = SELECTQueryParser( - query, - 'orders', - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - id = None - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'id': - if op == '=': - id = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for 'ids' column") - elif arg1 in ['state', 'amount', 'create_time', 'update_time', 'links', 'pending_reason', 'parent_payment']: - subset_where_conditions.append([op, arg1, arg2]) - - if not id: - raise NotImplementedError("id column is required for this table") - - orders_df = pd.json_normalize(self.get_orders(id)) - select_statement_executor = SELECTQueryExecutor( - orders_df, - selected_columns, - subset_where_conditions, - order_by_conditions - ) - orders_df = select_statement_executor.execute_query() - return orders_df - - def get_columns(self) -> List[Text]: - return ["id", - "status", - "intent", - "purchase_units", - "links", - "create_time"] - - # restore this or similar header list for API 2.0 refactor - # restore this list when restore paypalsdk api, and retired the request call - # return ["id", - # "status", - # "intent", - # "gross_total_amount.value", - # "gross_total_amount.currency", - # "purchase_units", - # "metadata.supplementary_data", - # "redirect_urls.return_url", - # "redirect_urls.cancel_url", - # "links", - # "create_time"] - - def get_orders(self, id) -> List[Dict]: - # we can use the paypalrestsdk api to get the order if they refactor their code - connection = self.handler.connect() - endpoint = f"v2/checkout/orders/{id}" - order = connection.get(endpoint) - if not order: - raise ValueError("Could not get order, check order id") - return order - - -class PayoutsTable(APITable): - - def select(self, query: ast.Select) -> pd.DataFrame: - """ - Pulls PayPal payouts data. - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - Returns - ------- - pd.DataFrame - PayPal payouts matching the query - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'payouts', - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - payout_batch_id = "" - - for a_where in where_conditions: - if a_where[1] == "payout_batch_id": - if a_where[0] != "=": - raise ValueError("Unsupported where operation for state") - - payout_batch_id = a_where[2] - if not payout_batch_id: - raise NotImplementedError("payout_batch_id column is required for this table") - - payouts_data = self.get_payout(payout_batch_id) # Get the data - payouts_df = pd.DataFrame(payouts_data) # Create a DataFrame - - select_statement_executor = SELECTQueryExecutor( - payouts_df, - selected_columns, - where_conditions, - order_by_conditions - ) - - payouts_df = select_statement_executor.execute_query() - - return payouts_df - - def get_columns(self) -> List[Text]: - return [ - "payout_batch_id", - "batch_status", - "time_created", - "time_completed", - "sender_batch_id", - "email_subject", - "email_message", - "funding_source", - "amount_currency", - "amount_value", - "fees_currency", - "fees_value", - ] - - def get_payout(self, payout_batch_id: str) -> List[Dict]: - connection = self.handler.connect() - endpoint = f"v1/payments/payouts/{payout_batch_id}" - payout = connection.get(endpoint) - - payout_data = { - "payout_batch_id": payout['batch_header']['payout_batch_id'], - "batch_status": payout['batch_header']['batch_status'], - "time_created": payout['batch_header']['time_created'], - "time_completed": payout['batch_header']['time_completed'], - "sender_batch_id": payout['batch_header']['sender_batch_header']['sender_batch_id'], - "email_subject": payout['batch_header']['sender_batch_header']['email_subject'], - "email_message": payout['batch_header']['sender_batch_header']['email_message'], - "funding_source": payout['batch_header']['funding_source'], - "amount_currency": payout['batch_header']['amount']['currency'], - "amount_value": payout['batch_header']['amount']['value'], - "fees_currency": payout['batch_header']['fees']['currency'], - "fees_value": payout['batch_header']['fees']['value'], - } - - return [payout_data] diff --git a/mindsdb/integrations/handlers/paypal_handler/requirements.txt b/mindsdb/integrations/handlers/paypal_handler/requirements.txt deleted file mode 100644 index c7b77ada9e5..00000000000 --- a/mindsdb/integrations/handlers/paypal_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -paypalrestsdk \ No newline at end of file diff --git a/mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py b/mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py index ab9aac0b340..5d24c3b5578 100644 --- a/mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +++ b/mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py @@ -2,7 +2,7 @@ import json import hashlib from collections import OrderedDict -from typing import Dict, List, Literal, Tuple +from typing import List, Literal, Tuple from urllib.parse import urlparse import pandas as pd @@ -32,7 +32,6 @@ from mindsdb.integrations.libs.vectordatabase_handler import ( FilterCondition, VectorStoreHandler, - DistanceFunction, TableField, FilterOperator, ) @@ -131,7 +130,7 @@ def query(self, query: ASTNode) -> Response: if isinstance(query, DropTables): query.tables = [self._check_table(table.parts[-1]) for table in query.tables] query_str, params = self.renderer.get_exec_params(query, with_failback=True) - return self.native_query(query_str, params, no_restrict=True) + return self.native_query(query_str, params, no_restrict=True, stream=False) return super().query(query) def native_query(self, query, params=None, no_restrict=False) -> Response: @@ -146,7 +145,7 @@ def native_query(self, query, params=None, no_restrict=False) -> Response: return super().native_query(query, params=params) def raw_query(self, query, params=None) -> Response: - resp = super().native_query(query, params) + resp = super().native_query(query, params, stream=False) if resp.resp_type == RESPONSE_TYPE.ERROR: raise RuntimeError(resp.error_message) if resp.resp_type == RESPONSE_TYPE.TABLE: @@ -527,118 +526,6 @@ def keyword_select( return result - def hybrid_search( - self, - table_name: str, - embeddings: List[float], - query: str = None, - metadata: Dict[str, str] = None, - distance_function=DistanceFunction.COSINE_DISTANCE, - **kwargs, - ) -> pd.DataFrame: - """ - Executes a hybrid search, combining semantic search and one or both of keyword/metadata search. - - For insight on the query construction, see: https://docs.pgvecto.rs/use-case/hybrid-search.html#advanced-search-merge-the-results-of-full-text-search-and-vector-search. - - Args: - table_name(str): Name of underlying table containing content, embeddings, & metadata - embeddings(List[float]): Embedding vector to perform semantic search against - query(str): User query to convert into keywords for keyword search - metadata(Dict[str, str]): Metadata filters to filter content rows against - distance_function(DistanceFunction): Distance function used to compare embeddings vectors for semantic search - - Kwargs: - id_column_name(str): Name of ID column in underlying table - content_column_name(str): Name of column containing document content in underlying table - embeddings_column_name(str): Name of column containing embeddings vectors in underlying table - metadata_column_name(str): Name of column containing metadata key-value pairs in underlying table - - Returns: - df(pd.DataFrame): Hybrid search result, sorted by hybrid search rank - """ - if query is None and metadata is None: - raise ValueError( - "Must provide at least one of: query for keyword search, or metadata filters. For only embeddings search, use normal search instead." - ) - - id_column_name = kwargs.get("id_column_name", "id") - content_column_name = kwargs.get("content_column_name", "content") - embeddings_column_name = kwargs.get("embeddings_column_name", "embeddings") - metadata_column_name = kwargs.get("metadata_column_name", "metadata") - # Filter by given metadata for semantic search & full text search CTEs, if present. - where_clause = " WHERE " - if metadata is None: - where_clause = "" - metadata = {} - for i, (k, v) in enumerate(metadata.items()): - where_clause += f"{metadata_column_name}->>'{k}' = '{v}'" - if i < len(metadata.items()) - 1: - where_clause += " AND " - - # See https://docs.pgvecto.rs/use-case/hybrid-search.html#advanced-search-merge-the-results-of-full-text-search-and-vector-search. - # - # We can break down the below query as follows: - # - # Start with a CTE (Common Table Expression) called semantic_search (https://www.postgresql.org/docs/current/queries-with.html). - # This expression calculates rank by the defined distance function, which measures the distance between the - # embeddings column and the given embeddings vector. Results are ordered by this rank. - # - # Next, define another CTE called full_text_search if we are doing keyword search. - # This calculates rank using the built-in ts_rank function (https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-RANKING). - # We convert the content column to a ts_vector and match rows for the given tsquery in the content column. Results are ordered by this ts_rank. - # - # For both of these CTEs, we filter by any given metadata fields. - # - # See https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-PARSING-DOCUMENTS for to_tsvector - # See https://www.postgresql.org/docs/current/functions-textsearch.html#FUNCTIONS-TEXTSEARCH for tsquery syntax - # - # Finally, we use a FULL OUTER JOIN to SELECT from both CTEs defined above. - # The COALESCE function is used to handle cases where one CTE has null values. - # - # Or, if we are only doing metadata search, we leave out the JOIN & full text search CTEs. - # - # We calculate the final "hybrid" rank by summing the reciprocals of the ranks from each individual CTE. - semantic_search_cte = f"""WITH semantic_search AS ( - SELECT {id_column_name}, {content_column_name}, {embeddings_column_name}, - RANK () OVER (ORDER BY {embeddings_column_name} {distance_function.value} '{str(embeddings)}') AS rank - FROM "{table_name}" {where_clause} - ORDER BY {embeddings_column_name} {distance_function.value} '{str(embeddings)}'::vector - )""" - - full_text_search_cte = "" - if query is not None: - ts_vector_clause = ( - f"WHERE to_tsvector('english', {content_column_name}) @@ plainto_tsquery('english', '{query}')" - ) - if metadata: - ts_vector_clause = ( - f"AND to_tsvector('english', {content_column_name}) @@ plainto_tsquery('english', '{query}')" - ) - full_text_search_cte = f""", - full_text_search AS ( - SELECT {id_column_name}, {content_column_name}, {embeddings_column_name}, - RANK () OVER (ORDER BY ts_rank(to_tsvector('english', {content_column_name}), plainto_tsquery('english', '{query}')) DESC) AS rank - FROM "{table_name}" {where_clause} - {ts_vector_clause} - ORDER BY ts_rank(to_tsvector('english', {content_column_name}), plainto_tsquery('english', '{query}')) DESC - )""" - - hybrid_select = """ - SELECT * FROM semantic_search""" - if query is not None: - hybrid_select = f""" - SELECT - COALESCE(semantic_search.{id_column_name}, full_text_search.{id_column_name}) AS id, - COALESCE(semantic_search.{content_column_name}, full_text_search.{content_column_name}) AS content, - COALESCE(semantic_search.{embeddings_column_name}, full_text_search.{embeddings_column_name}) AS embeddings, - COALESCE(1.0 / (1 + semantic_search.rank), 0.0) + COALESCE(1.0 / (1 + full_text_search.rank), 0.0) AS rank - FROM semantic_search FULL OUTER JOIN full_text_search USING ({id_column_name}) ORDER BY rank DESC; - """ - - full_search_query = f"{semantic_search_cte}{full_text_search_cte}{hybrid_select}" - return self.raw_query(full_search_query) - def create_table(self, table_name: str): """Create a table with a vector column.""" diff --git a/mindsdb/integrations/handlers/pgvector_handler/requirements.txt b/mindsdb/integrations/handlers/pgvector_handler/requirements.txt index 1047dcfb1f5..92ec66f21ce 100644 --- a/mindsdb/integrations/handlers/pgvector_handler/requirements.txt +++ b/mindsdb/integrations/handlers/pgvector_handler/requirements.txt @@ -1 +1 @@ -pgvector==0.3.6 \ No newline at end of file +pgvector==0.3.6 diff --git a/mindsdb/integrations/handlers/pgvector_handler/tests/test_pgvector_handler.py b/mindsdb/integrations/handlers/pgvector_handler/tests/test_pgvector_handler.py new file mode 100644 index 00000000000..178e1bdeb00 --- /dev/null +++ b/mindsdb/integrations/handlers/pgvector_handler/tests/test_pgvector_handler.py @@ -0,0 +1,88 @@ +import os +import psycopg2 +import pytest + +from mindsdb.integrations.handlers.pgvector_handler.pgvector_handler import PgVectorHandler + + +TEST_DB_NAME = os.environ.get("MDB_TEST_PGVECTOR_DATABASE", "pgvector_handler_test_db") +# Should match table name in data/pgvector/seed.sql +TEST_TABLE_NAME = "items" +# Should match column names in data/pgvector/seed.sql +COLUMN_NAMES = ["id", "content", "embeddings", "metadata"] + +HANDLER_KWARGS = { + "connection_data": { + "host": os.environ.get("MDB_TEST_PGVECTOR_HOST", "127.0.0.1"), + "port": os.environ.get("MDB_TEST_PGVECTOR_PORT", "5432"), + "user": os.environ.get("MDB_TEST_PGVECTOR_USER", "postgres"), + "password": os.environ.get("MDB_TEST_PGVECTOR_PASSWORD", "supersecret"), + "database": TEST_DB_NAME, + } +} + + +def init_db(): + """Seed the test DB with some data""" + conn_info = HANDLER_KWARGS["connection_data"].copy() + conn_info["database"] = "postgres" + db = psycopg2.connect(**conn_info) + db.autocommit = True + cursor = db.cursor() + + try: + cursor.execute(f"DROP DATABASE IF EXISTS {TEST_DB_NAME};") + db.commit() + + # Create the test database + cursor.execute(f"CREATE DATABASE {TEST_DB_NAME};") + db.commit() + + # Reconnect to the new database + conn_info["database"] = TEST_DB_NAME + db = psycopg2.connect(**conn_info) + db.autocommit = True + cursor = db.cursor() + + # Seed the database with data + curr_dir = os.path.dirname(os.path.realpath(__file__)) + seed_sql_path = os.path.join(curr_dir, "data", "pgvector", "seed.sql") + with open(seed_sql_path, "r") as sql_seed_file: + cursor.execute(sql_seed_file.read()) + db.commit() + + finally: + # Close the cursor and the connection + cursor.close() + db.close() + + +@pytest.fixture(scope="module") +def handler(): + init_db() + handler = PgVectorHandler("test_handler", **HANDLER_KWARGS) + yield handler + + +@pytest.mark.skipif( + os.environ.get("MDB_TEST_PGVECTOR_HOST") is None, reason="MDB_TEST_PGVECTOR_HOST environment variable not set" +) +class TestPgvectorConnection: + def test_connect(self, handler): + handler.connect() + assert handler.is_connected, "connection error" + + def test_check_connection(self, handler): + res = handler.check_connection() + assert res.success, res.error_message + + +@pytest.mark.skipif( + os.environ.get("MDB_TEST_PGVECTOR_HOST") is None, reason="MDB_TEST_PGVECTOR_HOST environment variable not set" +) +class TestPgvectorQuery: + def test_select(self, handler): + result = handler.select(TEST_TABLE_NAME) + assert not result.empty + for col in COLUMN_NAMES: + assert col in result.columns diff --git a/mindsdb/integrations/handlers/phoenix_handler/README.md b/mindsdb/integrations/handlers/phoenix_handler/README.md deleted file mode 100644 index f2ac6cd0654..00000000000 --- a/mindsdb/integrations/handlers/phoenix_handler/README.md +++ /dev/null @@ -1,70 +0,0 @@ -# Apache Phoenix Handler - -This is the implementation of the Apache Phoenix handler for MindsDB. - -## Apache Phoenix -Apache Phoenix takes your SQL query, compiles it into a series of HBase scans, and orchestrates the running of those scans to produce regular JDBC result sets. -
-https://phoenix.apache.org/ - -It is a SQL skin over HBase delivered as a client-embedded JDBC driver targeting low latency queries over HBase data. - -## Implementation -This handler was implemented using the `phoenixdb` and `pyphoenix` libraries, the Python APIs for accessing the Phoenix SQL database using the remote query server introduced in Phoenix 4.4. - -The required arguments to establish a connection are, -* `url`: the URL to the Phoenix Query Server - -## Usage -In order to make use of this handler and connect to an Apache Phoenix Query Server in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE phoenix_datasource -WITH ENGINE = 'phoenix', -PARAMETERS = { - "url": "http://127.0.0.1:8765", - "autocommit": True -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM phoenix_datasource.example_tbl -~~~~ - -## Quickstart -To quickly spin up Apache Phoenix + HBase in Docker, the following repository can be used, -https://github.com/mrauhu/apache-hbase-phoenix - -Close this repository locally, -~~~~bash -git clone https://github.com/mrauhu/apache-hbase-phoenix.git -~~~~ - -Run the containers in the background, -~~~~bash -docker-compose up -d -~~~~ - -Install MindsDB on your local Python environment, -~~~~bash -pip install mindsdb -~~~~ - -Launch the MindsDB SQL Editor, -~~~~bash -python -m mindsdb -~~~~ - -Execute the following commands to create a data source and query the system table `SYSTEM.CATALOG` table as explained under the Usage section, - -~~~~sql -CREATE DATABASE phoenix_datasource -WITH ENGINE = 'phoenix', -PARAMETERS = { - "url": "http://127.0.0.1:8765", - "autocommit": True -}; - -SELECT * -FROM phoenix_datasource.SYSTEM.CATALOG -~~~~ \ No newline at end of file diff --git a/mindsdb/integrations/handlers/phoenix_handler/__about__.py b/mindsdb/integrations/handlers/phoenix_handler/__about__.py deleted file mode 100644 index 52ed27ef67f..00000000000 --- a/mindsdb/integrations/handlers/phoenix_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Apache Phoenix handler' -__package_name__ = 'mindsdb_phoenix_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Apache Phoenix" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/phoenix_handler/__init__.py b/mindsdb/integrations/handlers/phoenix_handler/__init__.py deleted file mode 100644 index e3b9941dab1..00000000000 --- a/mindsdb/integrations/handlers/phoenix_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .phoenix_handler import PhoenixHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Apache Phoenix' -name = 'phoenix' -type = HANDLER_TYPE.DATA -icon_path = 'icon.png' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/phoenix_handler/connection_args.py b/mindsdb/integrations/handlers/phoenix_handler/connection_args.py deleted file mode 100644 index bd75db5211c..00000000000 --- a/mindsdb/integrations/handlers/phoenix_handler/connection_args.py +++ /dev/null @@ -1,50 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - url={ - 'type': ARG_TYPE.STR, - 'description': 'The URL to the Phoenix Query Server.' - }, - max_retries={ - 'type': ARG_TYPE.INT, - 'description': 'The maximum number of retries in case there is a connection error.' - }, - autocommit={ - 'type': ARG_TYPE.BOOL, - 'description': 'The flag for switching the connection to autocommit mode.' - }, - auth={ - 'type': ARG_TYPE.STR, - 'description': 'An authentication configuration object as expected by the underlying python_requests and python_requests_gssapi library.' - }, - authentication={ - 'type': ARG_TYPE.STR, - 'description': 'An alternative way to specify the authentication mechanism that mimics the semantics of the JDBC drirver.' - }, - avatica_user={ - 'type': ARG_TYPE.STR, - 'description': 'The username for BASIC or DIGEST authentication. Use in conjunction with the authentication option.' - }, - avatica_password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password for BASIC or DIGEST authentication. Use in conjunction with the authentication option.', - 'secret': True - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'If authentication is BASIC or DIGEST then alias for avatica_user. If authentication is NONE or SPNEGO then alias for do_as' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'If authentication is BASIC or DIGEST then alias for avatica_password.', - 'secret': True - } -) - -connection_args_example = OrderedDict( - url='http://127.0.0.1:8765', - autocommit=True -) diff --git a/mindsdb/integrations/handlers/phoenix_handler/icon.png b/mindsdb/integrations/handlers/phoenix_handler/icon.png deleted file mode 100644 index 090b17d7ac1..00000000000 Binary files a/mindsdb/integrations/handlers/phoenix_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/phoenix_handler/phoenix_handler.py b/mindsdb/integrations/handlers/phoenix_handler/phoenix_handler.py deleted file mode 100644 index 0090a4beb6d..00000000000 --- a/mindsdb/integrations/handlers/phoenix_handler/phoenix_handler.py +++ /dev/null @@ -1,230 +0,0 @@ -from typing import Optional - -import pandas as pd -import phoenixdb - -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.base import DatabaseHandler -from pyphoenix.sqlalchemy_phoenix import PhoenixDialect - -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - - -logger = log.getLogger(__name__) - - -class PhoenixHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Apache Phoenix statements. - """ - - name = 'phoenix' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = 'phoenix' - - optional_parameters = ['max_retries', 'autocommit', 'auth', 'authentication', 'avatica_user', 'avatica_password', 'user', 'password'] - for parameter in optional_parameters: - if parameter not in connection_data: - connection_data[parameter] = None - - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - if self.is_connected is True: - return self.connection - - self.connection = phoenixdb.connect( - url=self.connection_data['url'], - max_retries=self.connection_data['max_retries'], - autocommit=self.connection_data['autocommit'], - auth=self.connection_data['auth'], - authentication=self.connection_data['authentication'], - avatica_user=self.connection_data['avatica_user'], - avatica_password=self.connection_data['avatica_password'], - user=self.connection_data['user'], - password=self.connection_data['password'] - ) - self.is_connected = True - - return self.connection - - def disconnect(self): - """ Close any existing connections - - Should switch self.is_connected. - """ - - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return self.is_connected - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to the Phoenix Query Server, {e}!') - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - cursor = connection.cursor() - - try: - cursor.execute(query) - result = cursor.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, - columns=[x[0] for x in cursor.description] - ) - ) - else: - connection.commit() - response = Response(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f'Error running query: {query} on the Phoenix Query Server!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - cursor.close() - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - - renderer = SqlalchemyRender(PhoenixDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - - query = """ - SELECT DISTINCT TABLE_NAME, TABLE_SCHEM FROM SYSTEM.CATALOG - """ - result = self.native_query(query) - df = result.data_frame - df = df[df['TABLE_SCHEM'] != 'SYSTEM'] - df = df.drop('TABLE_SCHEM', axis=1) - result.data_frame = df.rename(columns={df.columns[0]: 'table_name'}) - return result - - def get_columns(self, table_name: str) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - cursor = connection.cursor() - - try: - query = f"SELECT * from {table_name} LIMIT 5" - cursor.execute(query) - cursor.fetchall() - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - [(x[0], x[1]) for x in cursor.description], - columns=['column_name', 'data_type'] - ) - ) - - except Exception as e: - logger.error(f'Error running query: {query} on the Phoenix Query Server!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - cursor.close() - if need_to_close is True: - self.disconnect() - - return response diff --git a/mindsdb/integrations/handlers/phoenix_handler/requirements.txt b/mindsdb/integrations/handlers/phoenix_handler/requirements.txt deleted file mode 100644 index 7d8fd10bbc0..00000000000 --- a/mindsdb/integrations/handlers/phoenix_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -pyphoenix -phoenixdb diff --git a/mindsdb/integrations/handlers/phoenix_handler/tests/__init__.py b/mindsdb/integrations/handlers/phoenix_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/phoenix_handler/tests/test_phoenix_handler.py b/mindsdb/integrations/handlers/phoenix_handler/tests/test_phoenix_handler.py deleted file mode 100644 index a0ae7cef4b9..00000000000 --- a/mindsdb/integrations/handlers/phoenix_handler/tests/test_phoenix_handler.py +++ /dev/null @@ -1,33 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.phoenix_handler.phoenix_handler import PhoenixHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class PhoenixHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "url": 'http://127.0.0.1:8765', - "autocommit": True - } - cls.handler = PhoenixHandler('test_phoenix_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM USERS" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_get_columns(self): - columns = self.handler.get_columns('USERS') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/pinecone_handler/README.md b/mindsdb/integrations/handlers/pinecone_handler/README.md deleted file mode 100644 index 2fcadbd4b6c..00000000000 --- a/mindsdb/integrations/handlers/pinecone_handler/README.md +++ /dev/null @@ -1,106 +0,0 @@ -# Pinecone Handler - -This is the implementation of the Pinecone for MindsDB. - -## Pinecone - -Pinecone is a vector database which is fully-managed, developer-friendly, and easily scalable. - -## Implementation - -This handler uses `pinecone-client` python library connect to a pinecone environment. - -The required arguments to establish a connection are: - -* `api_key`: the API key that can be found in your pinecone account - -These optional arguments are used with `CREATE TABLE` statements: - -* `dimension`: dimensions of the vectors to be stored in the index (default=8) -* `metric`: distance metric to be used for similarity search (default='cosine') -* `spec`: the spec of the index to be created. This is a dictionary that can contain the following keys: - * `cloud`: the cloud provider to use (default='aws') - * `region`: the region to use (default='us-east-1') - - -Only the creation of serverless indexes is supported at the moment when running `CREATE TABLE` statements. - - -## Limitations - -- [ ] `DROP TABLE` support -- [ ] Support for [namespaces](https://docs.pinecone.io/docs/namespaces) -- [ ] Display score/distance -- [ ] Support for creating/reading sparse values -- [ ] `content` column is not supported since it does not exist in Pinecone - -## Usage - -In order to make use of this handler and connect to an environment, use the following syntax: - -```sql -CREATE DATABASE pinecone_dev -WITH ENGINE = "pinecone", -PARAMETERS = { - "api_key": "..." -}; -``` - -You can query pinecone indexes (`temp` in the following examples) based on `id` or `search_vector`, but not both: - -```sql -SELECT * from pinecone_dev.temp -WHERE id = "abc" -LIMIT 1 -``` - -```sql -SELECT * from pinecone_dev.temp -WHERE search_vector = "[1,2,3,4,5,6,7,8]" -``` - -If you are using subqueries, make sure that the result is only a single row since the use of multiple search vectors is not allowed - -```sql -SELECT * from pinecone_database.temp -WHERE search_vector = ( - SELECT embeddings FROM sqlitetesterdb.test WHERE id = 10 -) -``` - -Optionally, you can filter based on metadata too: - -```sql -SELECT * from pinecone_dev.temp -WHERE id = "abc" AND metadata.hello < 100 -``` - -You can delete records using `id` or `metadata` like so: - -```sql -DELETE FROM pinecone_dev.temp -WHERE id = "abc" -``` - -Note that deletion through metadata is not supported in starter tier - -```sql -DELETE FROM pinecone_dev.temp -WHERE metadata.tbd = true -``` - -You can insert data into a new collection like so: - -```sql -CREATE TABLE pinecone_dev.temp ( -SELECT * FROM mysql_demo_db.temp LIMIT 10); -``` - -To update records, you can use insert statement. When there is a conflicting ID in pinecone index, the record is updated with new values. It might take a bit to see it reflected. - -```sql -INSERT INTO pinecone_test.testtable (id,content,metadata,embeddings) -VALUES ( - 'id1', 'this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]' -); -``` diff --git a/mindsdb/integrations/handlers/pinecone_handler/__about__.py b/mindsdb/integrations/handlers/pinecone_handler/__about__.py deleted file mode 100644 index 8f4ed58a0b9..00000000000 --- a/mindsdb/integrations/handlers/pinecone_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Pinecone handler" -__package_name__ = "mindsdb_pinecone_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Pinecone" -__author__ = "Aditya Azad" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/pinecone_handler/__init__.py b/mindsdb/integrations/handlers/pinecone_handler/__init__.py deleted file mode 100644 index 9d27a9ae6d0..00000000000 --- a/mindsdb/integrations/handlers/pinecone_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version -from .connection_args import connection_args, connection_args_example -try: - from .pinecone_handler import PineconeHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Pinecone" -name = "pinecone" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/pinecone_handler/connection_args.py b/mindsdb/integrations/handlers/pinecone_handler/connection_args.py deleted file mode 100644 index e76ec96211b..00000000000 --- a/mindsdb/integrations/handlers/pinecone_handler/connection_args.py +++ /dev/null @@ -1,53 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - api_key={ - "type": ARG_TYPE.STR, - "description": "The API key that can be found in your pinecone account", - "required": True, - "secret": True - }, - environment={ - "type": ARG_TYPE.STR, - "description": "The environment name corresponding to the `api_key`", - "required": True, - }, - dimension={ - "type": ARG_TYPE.INT, - "description": "dimensions of the vectors to be stored in the index (default=8)", - "required": False, - }, - metric={ - "type": ARG_TYPE.STR, - "description": "distance metric to be used for similarity search (default='cosine')", - "required": False, - }, - pods={ - "type": ARG_TYPE.INT, - "description": "number of pods for the index to use, including replicas (default=1)", - "required": False, - }, - replicas={ - "type": ARG_TYPE.INT, - "description": "the number of replicas. replicas duplicate your index. they provide higher availability and throughput (default=1)", - "required": False, - }, - pod_type={ - "type": ARG_TYPE.STR, - "description": "the type of pod to use, refer to pinecone documentation (default='p1')", - "required": False, - }, -) - -connection_args_example = OrderedDict( - api_key="00000000-0000-0000-0000-000000000000", - environment="gcp-starter", - dimension=8, - metric="cosine", - pods=1, - replicas=1, - pod_type='p1', -) diff --git a/mindsdb/integrations/handlers/pinecone_handler/icon.svg b/mindsdb/integrations/handlers/pinecone_handler/icon.svg deleted file mode 100644 index 98821204ee4..00000000000 --- a/mindsdb/integrations/handlers/pinecone_handler/icon.svg +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py b/mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py deleted file mode 100644 index 9d4706b5968..00000000000 --- a/mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +++ /dev/null @@ -1,355 +0,0 @@ -import ast -from typing import List, Optional - -import numpy as np -from pinecone import Pinecone, ServerlessSpec -from pinecone.core.openapi.shared.exceptions import NotFoundException, PineconeApiException -import pandas as pd - -from mindsdb.integrations.libs.response import RESPONSE_TYPE -from mindsdb.integrations.libs.response import HandlerResponse -from mindsdb.integrations.libs.response import HandlerResponse as Response -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse -from mindsdb.integrations.libs.vectordatabase_handler import ( - FilterCondition, - FilterOperator, - TableField, - VectorStoreHandler, -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - -DEFAULT_CREATE_TABLE_PARAMS = { - "dimension": 8, - "metric": "cosine", - "spec": { - "cloud": "aws", - "region": "us-east-1" - } -} -MAX_FETCH_LIMIT = 10000 -UPSERT_BATCH_SIZE = 99 # API reccomendation - - -class PineconeHandler(VectorStoreHandler): - """This handler handles connection and execution of the Pinecone statements.""" - - name = "pinecone" - - def __init__(self, name: str, connection_data: dict, **kwargs): - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def _get_index_handle(self, index_name): - """Returns handler to index specified by `index_name`""" - connection = self.connect() - index = connection.Index(index_name) - try: - index.describe_index_stats() - except Exception: - index = None - return index - - def _get_pinecone_operator(self, operator: FilterOperator) -> str: - """Convert FilterOperator to an operator that pinecone's query language can undersand""" - mapping = { - FilterOperator.EQUAL: "$eq", - FilterOperator.NOT_EQUAL: "$ne", - FilterOperator.GREATER_THAN: "$gt", - FilterOperator.GREATER_THAN_OR_EQUAL: "$gte", - FilterOperator.LESS_THAN: "$lt", - FilterOperator.LESS_THAN_OR_EQUAL: "$lte", - FilterOperator.IN: "$in", - FilterOperator.NOT_IN: "$nin", - } - if operator not in mapping: - raise Exception(f"Operator {operator} is not supported by Pinecone!") - return mapping[operator] - - def _translate_metadata_condition(self, conditions: List[FilterCondition]) -> Optional[dict]: - """ - Translate a list of FilterCondition objects a dict that can be used by pinecone. - E.g., - [ - FilterCondition( - column="metadata.created_at", - op=FilterOperator.LESS_THAN, - value="2020-01-01", - ), - FilterCondition( - column="metadata.created_at", - op=FilterOperator.GREATER_THAN, - value="2019-01-01", - ) - ] - --> - { - "$and": [ - {"created_at": {"$lt": "2020-01-01"}}, - {"created_at": {"$gt": "2019-01-01"}} - ] - } - """ - # we ignore all non-metadata conditions - if conditions is None: - return None - metadata_conditions = [ - condition - for condition in conditions - if condition.column.startswith(TableField.METADATA.value) - ] - if len(metadata_conditions) == 0: - return None - - # we translate each metadata condition into a dict - pinecone_conditions = [] - for condition in metadata_conditions: - metadata_key = condition.column.split(".")[-1] - pinecone_conditions.append( - { - metadata_key: { - self._get_pinecone_operator(condition.op): condition.value - } - } - ) - - # we combine all metadata conditions into a single dict - metadata_condition = ( - {"$and": pinecone_conditions} - if len(pinecone_conditions) > 1 - else pinecone_conditions[0] - ) - return metadata_condition - - def _matches_to_dicts(self, matches: List): - """Converts the custom pinecone response type to a list of python dict""" - return [match.to_dict() for match in matches] - - def connect(self): - """Connect to a pinecone database.""" - if self.is_connected is True: - return self.connection - - if 'api_key' not in self.connection_data: - raise ValueError('Required parameter (api_key) must be provided.') - - try: - self.connection = Pinecone(api_key=self.connection_data['api_key']) - return self.connection - except Exception as e: - logger.error(f"Error connecting to Pinecone client, {e}!") - self.is_connected = False - - def disconnect(self): - """Close the pinecone connection.""" - if self.is_connected is False: - return - self.connection = None - self.is_connected = False - - def check_connection(self): - """Check the connection to pinecone.""" - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - connection = self.connect() - connection.list_indexes() - response.success = True - except Exception as e: - logger.error(f"Error connecting to pinecone , {e}!") - response.error_message = str(e) - - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def get_tables(self) -> HandlerResponse: - """Get the list of indexes in the pinecone database.""" - connection = self.connect() - indexes = connection.list_indexes() - df = pd.DataFrame( - columns=["table_name"], - data=[index['name'] for index in indexes], - ) - return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=df) - - def create_table(self, table_name: str, if_not_exists=True): - """Create an index with the given name in the Pinecone database.""" - connection = self.connect() - - # TODO: Should other parameters be supported? Pod indexes? - # TODO: Should there be a better way to provide these parameters rather than when establishing the connection? - create_table_params = {} - for key, val in DEFAULT_CREATE_TABLE_PARAMS.items(): - if key in self.connection_data: - create_table_params[key] = self.connection_data[key] - else: - create_table_params[key] = val - - create_table_params["spec"] = ServerlessSpec(**create_table_params["spec"]) - - try: - connection.create_index(name=table_name, **create_table_params) - except PineconeApiException as pinecone_error: - if pinecone_error.status == 409 and if_not_exists: - return - raise Exception(f"Error creating index '{table_name}': {pinecone_error}") - - def insert(self, table_name: str, data: pd.DataFrame): - """Insert data into pinecone index passed in through `table_name` parameter.""" - index = self._get_index_handle(table_name) - if index is None: - raise Exception(f"Error getting index '{table_name}', are you sure the name is correct?") - - data.rename(columns={ - TableField.ID.value: "id", - TableField.EMBEDDINGS.value: "values"}, - inplace=True) - - columns = ["id", "values"] - - if TableField.METADATA.value in data.columns: - data.rename(columns={TableField.METADATA.value: "metadata"}, inplace=True) - # fill None and NaN values with empty dict - if data['metadata'].isnull().any(): - data['metadata'] = data['metadata'].apply(lambda x: {} if x is None or (isinstance(x, float) and np.isnan(x)) else x) - columns.append("metadata") - - data = data[columns] - - # convert the embeddings to lists if they are strings - data["values"] = data["values"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) - - for chunk in (data[pos:pos + UPSERT_BATCH_SIZE] for pos in range(0, len(data), UPSERT_BATCH_SIZE)): - chunk = chunk.to_dict(orient="records") - index.upsert(vectors=chunk) - - def drop_table(self, table_name: str, if_exists=True): - """Delete an index passed in through `table_name` from the pinecone .""" - connection = self.connect() - try: - connection.delete_index(table_name) - except NotFoundException: - if if_exists: - return - raise Exception(f"Error deleting index '{table_name}', are you sure the name is correct?") - - def delete(self, table_name: str, conditions: List[FilterCondition] = None): - """Delete records in pinecone index `table_name` based on ids or based on metadata conditions.""" - filters = self._translate_metadata_condition(conditions) - ids = [ - condition.value - for condition in conditions - if condition.column == TableField.ID.value - ] or None - if filters is None and ids is None: - raise Exception("Delete query must have either id condition or metadata condition!") - index = self._get_index_handle(table_name) - if index is None: - raise Exception(f"Error getting index '{table_name}', are you sure the name is correct?") - - if filters is None: - index.delete(ids=ids) - else: - index.delete(filter=filters) - - def select( - self, - table_name: str, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - ): - """Run query on pinecone index named `table_name` and get results.""" - # TODO: Add support for namespaces. - index = self._get_index_handle(table_name) - if index is None: - raise Exception(f"Error getting index '{table_name}', are you sure the name is correct?") - - query = { - "include_values": True, - "include_metadata": True - } - - # check for metadata filter - metadata_filters = self._translate_metadata_condition(conditions) - if metadata_filters is not None: - query["filter"] = metadata_filters - - # check for vector and id filters - vector_filters = [] - id_filters = [] - - if conditions: - for condition in conditions: - if condition.column == TableField.SEARCH_VECTOR.value: - vector_filters.append(condition.value) - elif condition.column == TableField.ID.value: - id_filters.append(condition.value) - - if vector_filters: - if len(vector_filters) > 1: - raise Exception("You cannot have multiple search_vectors in query") - - query["vector"] = vector_filters[0] - # For subqueries, the vector filter is a list of list of strings - if isinstance(query["vector"], list) and isinstance(query["vector"][0], str): - if len(query["vector"]) > 1: - raise Exception("You cannot have multiple search_vectors in query") - - try: - query["vector"] = ast.literal_eval(query["vector"][0]) - except Exception as e: - raise Exception(f"Cannot parse the search vector '{query['vector']}'into a list: {e}") - - if id_filters: - if len(id_filters) > 1: - raise Exception("You cannot have multiple IDs in query") - - query["id"] = id_filters[0] - - if not vector_filters and not id_filters: - raise Exception("You must provide either a search_vector or an ID in the query") - - # check for limit - if limit is not None: - query["top_k"] = limit - else: - query["top_k"] = MAX_FETCH_LIMIT - - # exec query - try: - result = index.query(**query) - except Exception as e: - raise Exception(f"Error running SELECT query on '{table_name}': {e}") - - # convert to dataframe - df_columns = { - "id": TableField.ID.value, - "metadata": TableField.METADATA.value, - "values": TableField.EMBEDDINGS.value, - } - results_df = pd.DataFrame.from_records(self._matches_to_dicts(result["matches"])) - if bool(len(results_df.columns)): - results_df.rename(columns=df_columns, inplace=True) - else: - results_df = pd.DataFrame(columns=list(df_columns.values())) - results_df[TableField.CONTENT.value] = "" - return results_df[columns] - - def get_columns(self, table_name: str) -> HandlerResponse: - return super().get_columns(table_name) diff --git a/mindsdb/integrations/handlers/pinecone_handler/requirements.txt b/mindsdb/integrations/handlers/pinecone_handler/requirements.txt deleted file mode 100644 index e1260389554..00000000000 --- a/mindsdb/integrations/handlers/pinecone_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pinecone-client==5.0.1 \ No newline at end of file diff --git a/mindsdb/integrations/handlers/pinecone_handler/tests/__init__.py b/mindsdb/integrations/handlers/pinecone_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/pinot_handler/README.md b/mindsdb/integrations/handlers/pinot_handler/README.md deleted file mode 100644 index 35fff52b4fc..00000000000 --- a/mindsdb/integrations/handlers/pinot_handler/README.md +++ /dev/null @@ -1,69 +0,0 @@ -# Apache Pinot Handler - -This is the implementation of the Apache Pinot handler for MindsDB. - -## Apache Pinot -Apache Pinot is a real-time distributed OLAP database designed for low-latency query execution even at extremely high throughput. Apache Pinot can ingest directly from streaming sources like Apache Kafka and make events available for querying immediately. -
-https://www.startree.ai/what-is-apache-pinot -## Implementation -This handler was implemented using the `pinotdb` library, the Python DB-API and SQLAlchemy dialect for Pinot. - -The required arguments to establish a connection are, -* `host`: the host name or IP address of the Apache Pinot cluster -* `broker_port`: the port that the Broker of the Apache Pinot cluster is running on -* `controller_port`: the port that the Controller of the Apache Pinot cluster is running on -* `path`: the query path - -## Usage -In order to make use of this handler and connect to a Apache Pinot cluster in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE pinot_datasource -WITH -engine='pinot', -parameters={ - "host":"localhost", - "broker_port": 8000, - "controller_port": 9000, - "path": "/query/sql", - "scheme": "http" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM pinot_datasource.example_tbl -~~~~ - -## Quickstart -To quickly spin up Apache Pinot in Docker, run the following command, -~~~~bash -docker run --name pinot-quickstart -p 2123:2123 -p 9000:9000 -p 8000:8000 -d apachepinot/pinot:latest QuickStart -type batch -~~~~ - -Install MindsDB on your local Python environment, -~~~~bash -pip install mindsdb -~~~~ - -Launch the MindsDB SQL Editor, -~~~~bash -python -m mindsdb -~~~~ - -Execute the following commands to create a data source and query the `baseballStats` table as explained under the Usage section, - -~~~~sql -CREATE DATABASE pinot_datasource -WITH -engine='pinot', -parameters={ - "host":"localhost", - "broker_port": 8000, - "controller_port": 9000, - "path": "/query/sql", - "scheme": "http" -}; - -SELECT * FROM pinot_datasource.baseballStats -~~~~ \ No newline at end of file diff --git a/mindsdb/integrations/handlers/pinot_handler/__about__.py b/mindsdb/integrations/handlers/pinot_handler/__about__.py deleted file mode 100644 index b646121d1e6..00000000000 --- a/mindsdb/integrations/handlers/pinot_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Apache Pinot handler' -__package_name__ = 'mindsdb_pinot_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Apache Pinot" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/pinot_handler/__init__.py b/mindsdb/integrations/handlers/pinot_handler/__init__.py deleted file mode 100644 index 8efd5597686..00000000000 --- a/mindsdb/integrations/handlers/pinot_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .pinot_handler import PinotHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Apache Pinot' -name = 'pinot' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/pinot_handler/connection_args.py b/mindsdb/integrations/handlers/pinot_handler/connection_args.py deleted file mode 100644 index 4778e74df25..00000000000 --- a/mindsdb/integrations/handlers/pinot_handler/connection_args.py +++ /dev/null @@ -1,49 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Apache Pinot cluster.' - }, - broker_port={ - 'type': ARG_TYPE.INT, - 'description': 'The port that the Broker of the Apache Pinot cluster is running on.' - }, - controller_port={ - 'type': ARG_TYPE.INT, - 'description': 'The port that the Controller of the Apache Pinot cluster is running on.' - }, - path={ - 'type': ARG_TYPE.STR, - 'description': 'The query path.' - }, - scheme={ - 'type': ARG_TYPE.STR, - 'description': 'The URI schema. This parameter is optional and the default will be https.' - }, - username={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Apache Pinot cluster. This parameter is optional.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password used to authenticate with the Apache Pinot cluster. This parameter is optional.', - 'secret': True - }, - verify_ssl={ - 'type': ARG_TYPE.STR, - 'description': 'The flag for whether SSL certificates should be verified or not. This parameter is optional and ' - 'if specified, it should be either True or False' - }, -) - -connection_args_example = OrderedDict( - host='localhost', - broker_port=8000, - controller_port=9000, - path='/query/sql', - scheme='http' -) diff --git a/mindsdb/integrations/handlers/pinot_handler/icon.svg b/mindsdb/integrations/handlers/pinot_handler/icon.svg deleted file mode 100644 index 2b027fe0182..00000000000 --- a/mindsdb/integrations/handlers/pinot_handler/icon.svg +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/pinot_handler/pinot_handler.py b/mindsdb/integrations/handlers/pinot_handler/pinot_handler.py deleted file mode 100644 index ad777015a53..00000000000 --- a/mindsdb/integrations/handlers/pinot_handler/pinot_handler.py +++ /dev/null @@ -1,224 +0,0 @@ -from typing import Optional - -import pandas as pd -import pinotdb -import requests -from requests.exceptions import InvalidSchema -import json - -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.base import DatabaseHandler -from pinotdb.sqlalchemy import PinotDialect - -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - - -logger = log.getLogger(__name__) - - -class PinotHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Apache Pinot statements. - """ - - name = 'pinot' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = 'pinot' - - optional_parameters = ['username', 'password'] - for parameter in optional_parameters: - if parameter not in connection_data: - connection_data[parameter] = None - - if 'verify_ssl' not in connection_data: - connection_data['verify_ssl'] = 'False' - - if 'scheme' not in connection_data: - connection_data['scheme'] = 'http' - - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - if self.is_connected is True: - return self.connection - - self.connection = pinotdb.connect( - host=self.connection_data['host'], - port=self.connection_data['broker_port'], - path=self.connection_data['path'], - scheme=self.connection_data['scheme'], - username=self.connection_data['username'], - password=self.connection_data['password'], - verify_ssl=json.loads(self.connection_data['verify_ssl'].lower()) - ) - self.is_connected = True - - return self.connection - - def disconnect(self): - """ Close any existing connections - - Should switch self.is_connected. - """ - self.is_connected = False - return - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to Pinot, {e}!') - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - cursor = connection.cursor() - - try: - cursor.execute(query) - result = cursor.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, - columns=[x[0] for x in cursor.description] - ) - ) - else: - connection.commit() - response = Response(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f'Error running query: {query} on Pinot!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - cursor.close() - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - renderer = SqlalchemyRender(PinotDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - - api_url = f"{self.connection_data['host']}:{self.connection_data['controller_port']}/tables" - try: - result = requests.get(api_url) - except InvalidSchema: - api_url = f"{self.connection_data['scheme']}://{api_url}" - result = requests.get(api_url) - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - json.loads(result.content)['tables'], - columns=['table_name'] - ) - ) - - return response - - def get_columns(self, table_name: str) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - - api_url = f"{self.connection_data['host']}:{self.connection_data['controller_port']}/tables/{table_name}/schema" - try: - result = requests.get(api_url) - except InvalidSchema: - api_url = f"{self.connection_data['scheme']}://{api_url}" - result = requests.get(api_url) - - df = pd.DataFrame(json.loads(result.content)['dimensionFieldSpecs']) - df = df.rename(columns={'name': 'column_name', 'dataType': 'data_type'}) - - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=df - ) - - return response diff --git a/mindsdb/integrations/handlers/pinot_handler/requirements.txt b/mindsdb/integrations/handlers/pinot_handler/requirements.txt deleted file mode 100644 index 0254cfbcca8..00000000000 --- a/mindsdb/integrations/handlers/pinot_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pinotdb \ No newline at end of file diff --git a/mindsdb/integrations/handlers/pinot_handler/tests/__init__.py b/mindsdb/integrations/handlers/pinot_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/pinot_handler/tests/test_pinot_handler.py b/mindsdb/integrations/handlers/pinot_handler/tests/test_pinot_handler.py deleted file mode 100644 index b98ffcd61ee..00000000000 --- a/mindsdb/integrations/handlers/pinot_handler/tests/test_pinot_handler.py +++ /dev/null @@ -1,36 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.pinot_handler.pinot_handler import PinotHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class PinotHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": "localhost", - "broker_port": 8000, - "controller_port": 9000, - "path": "/query/sql", - "scheme": "http" - } - cls.handler = PinotHandler('test_pinot_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM baseballStats" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_get_columns(self): - columns = self.handler.get_columns('baseballStats') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/pirateweather_handler/README.md b/mindsdb/integrations/handlers/pirateweather_handler/README.md deleted file mode 100644 index 57b9af74630..00000000000 --- a/mindsdb/integrations/handlers/pirateweather_handler/README.md +++ /dev/null @@ -1,108 +0,0 @@ -# PirateWeather Handler - -The PirateWeather handler allows you to query historical weather data from [PirateWeather](http://pirateweather.net/). - -## PirateWeather Handler Setup - -The PirateWeather handler is initialized with the following parameters: - -- `api_key`: your PirateWeather API key to use for authentication - -Read about creating a PirateWeather API key [here](http://pirateweather.net/en/latest/). - -Provided Tables - -- `hourly` - historical hourly weather data for a given location. Columns: - - `localtime` - - `icon` - - `summary` - - `precipAccumulation` - - `precipType` - - `temperature` - - `apparentTemperature` - - `dewPoint` - - `pressure` - - `windSpeed` - - `windBearing` - - `cloudCover` - - `latitude` - - `longitude` - - `timezone` - - `offset` -- `daily` - historical daily weather data for a given location. Columns: - - `localtime` - - `icon` - - `summary` - - `precipAccumulation` - - `precipType` - - `temperature` - - `apparentTemperature` - - `dewPoint` - - `pressure` - - `windSpeed` - - `windBearing` - - `cloudCover` - - `latitude` - - `longitude` - - `timezone` - - `offset` - -See [here](http://pirateweather.net/en/latest/API/#time-machine-request) for more information. - -Both tables support the following parameters: - -* `latitude` - latitude of the location. Required. -* `longitude` - longitude of the location. Required. -* `time` - Date for which to fetch historical data. Optional, defaults to the current date. -* `units` - Units to use for temperature and wind speed. Optional, defaults to `us` (Imperial units). Other options are: - * `ca`: SI, with Wind Speed and Wind Gust in kilometres per hour. - * `uk`: SI, with Wind Speed and Wind Gust in miles per hour and visibility are in miles. - * `us`: Imperial units - * `si`: SI units - -## Example Usage - -The first step is to create a database with the new `pirateweather` engine. - -~~~~sql -CREATE -DATABASE pirateweather -WITH ENGINE = 'pirateweather', -PARAMETERS = { - "api_key": "your_api_key" -}; -~~~~ - -Use the established connection to query your database: - -~~~~sql -SELECT * -FROM pirateweather.hourly -WHERE latitude = 51.507351 - AND longitude = -0.127758 - AND time ="1672578052" -~~~~ - -~~~~sql -SELECT * -FROM pirateweather.daily -WHERE latitude = 51.507351 - AND longitude = -0.127758 - AND time ="1672578052" -~~~~ - -You can further query the returned data as usual: - -~~~~sql -SELECT * -FROM pirateweather.daily -WHERE latitude = 51.507351 - AND longitude = -0.127758 - AND time ="1672578052" - AND temperature - > 50 -ORDER BY temperature DESC - LIMIT 10 -~~~~ - - diff --git a/mindsdb/integrations/handlers/pirateweather_handler/__about__.py b/mindsdb/integrations/handlers/pirateweather_handler/__about__.py deleted file mode 100644 index 59f457f0032..00000000000 --- a/mindsdb/integrations/handlers/pirateweather_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB PirateWeather API handler' -__package_name__ = 'mindsdb_pirateweather_api_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for the PriateWeather API" -__author__ = 'Patrick Sean Klein' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/pirateweather_handler/__init__.py b/mindsdb/integrations/handlers/pirateweather_handler/__init__.py deleted file mode 100644 index 589e15b0798..00000000000 --- a/mindsdb/integrations/handlers/pirateweather_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version -from .connection_args import connection_args, connection_args_example -try: - from .pirateweather_handler import PirateWeatherAPIHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Pirate Weather" -name = "pirateweather" -type = HANDLER_TYPE.DATA -icon_path = "icon.png" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", - "connection_args_example", - "connection_args", -] diff --git a/mindsdb/integrations/handlers/pirateweather_handler/connection_args.py b/mindsdb/integrations/handlers/pirateweather_handler/connection_args.py deleted file mode 100644 index b89cc6689dc..00000000000 --- a/mindsdb/integrations/handlers/pirateweather_handler/connection_args.py +++ /dev/null @@ -1,14 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - api_key={ - "type": ARG_TYPE.STR, - "description": "Your PirateWeather API key.", - "secret": True - } -) - -connection_args_example = OrderedDict(api_key="knlsndlknslk") diff --git a/mindsdb/integrations/handlers/pirateweather_handler/icon.png b/mindsdb/integrations/handlers/pirateweather_handler/icon.png deleted file mode 100644 index a285f5f66c7..00000000000 Binary files a/mindsdb/integrations/handlers/pirateweather_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/pirateweather_handler/pirateweather_handler.py b/mindsdb/integrations/handlers/pirateweather_handler/pirateweather_handler.py deleted file mode 100644 index a602f92b92c..00000000000 --- a/mindsdb/integrations/handlers/pirateweather_handler/pirateweather_handler.py +++ /dev/null @@ -1,236 +0,0 @@ -import os -from typing import Any - -import pandas as pd -import requests -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser import ast - -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor -from mindsdb.integrations.libs.api_handler import APIHandler, APITable -from mindsdb.integrations.libs.response import HandlerResponse, HandlerStatusResponse -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb.utilities.config import Config - - -class PirateWeatherAPIBaseTable(APITable): - allowed_select_keys = {} - columns = [] - table_name = "" - - def __init__(self, handler: "PirateWeatherAPIHandler"): - super().__init__(handler) - - def select(self, query: ast.Select) -> pd.DataFrame: - """Select data from the collected weather data and return it as a pandas DataFrame. - - Args: - query (ast.Select): The SQL query to be executed. - - Returns: - pandas.DataFrame: A pandas DataFrame containing the selected data. - """ - conditions = extract_comparison_conditions(query.where) - - params = {} - for op, arg1, arg2 in conditions: - if arg1 in self.allowed_select_keys: - if op == "=": - params[arg1] = arg2 - else: - raise NotImplementedError(f"Operator for argument {arg1} is not supported: {op}") - - if "latitude" not in params or "longitude" not in params: - raise ValueError("Latitude and longitude are required") - - result = self.handler.call_application_api(method_name=self.table_name, params=params) - - # Reparse the query and run through SELECTQueryExecutor - query_parser = SELECTQueryParser(query, table=self.table_name, columns=self.get_columns()) - selected_columns, where_conditions, order_by_conditions, result_limit = query_parser.parse_query() - - # Remove request parameters from where conditions - where_conditions = [c for c in where_conditions if c[1] not in self.allowed_select_keys] - - query_executor = SELECTQueryExecutor(result, selected_columns, where_conditions, order_by_conditions, - result_limit) - - return query_executor.execute_query() - - def get_columns(self) -> list: - return self.columns - - -class PiratePirateWeatherAPIHourlyTable(PirateWeatherAPIBaseTable): - allowed_select_keys = { - "latitude", - "longitude", - "time", - "units" - } - columns = [ - "localtime", - "icon", - "summary", - "precipAccumulation", - "precipType", - "temperature", - "apparentTemperature", - "dewPoint", - "pressure", - "windSpeed", - "windBearing", - "cloudCover", - "latitude", - "longitude", - "timezone", - "offset" - ] - table_name = "hourly" - - -class PiratePirateWeatherAPIDailyTable(PirateWeatherAPIBaseTable): - allowed_select_keys = { - "latitude", - "longitude", - "time", - "units" - } - - columns = [ - "localtime", - "icon", - "summary", - "sunriseTime", - "sunsetTime", - "moonPhase", - "precipAccumulation", - "precipType", - "temperatureHigh", - "temperatureHighTime", - "temperatureLow", - "temperatureLowTime", - "apparentTemperatureHigh", - "apparentTemperatureHighTime", - "apparentTemperatureLow", - "apparentTemperatureLowTime", - "dewPoint", - "pressure", - "windSpeed", - "windBearing", - "cloudCover", - "temperatureMin", - "temperatureMinTime", - "temperatureMax", - "temperatureMaxTime", - "apparentTemperatureMin", - "apparentTemperatureMinTime", - "apparentTemperatureMax", - "apparentTemperatureMaxTime", - "latitude", - "longitude", - "timezone", - "offset" - ] - table_name = "daily" - - -class PirateWeatherAPIHandler(APIHandler): - query_string_template = "https://timemachine.pirateweather.net/forecast/{api_key}/{latitude},{longitude}" - - def __init__(self, name: str, **kwargs): - super().__init__(name) - self._tables = {} - - args = kwargs.get("connection_data", {}) - handler_config = Config().get("weather_handler", {}) - - connection_args = {} - - for k in ["api_key"]: - if k in args: - connection_args[k] = args[k] - elif f"WEATHER_{k.upper()}" in os.environ: - connection_args[k] = os.environ[f"WEATHER_{k.upper()}"] - elif k in handler_config: - connection_args[k] = handler_config[k] - - self._api_key = connection_args["api_key"] - - # Register tables - self._register_table("hourly", PiratePirateWeatherAPIHourlyTable(self)) - self._register_table("daily", PiratePirateWeatherAPIDailyTable(self)) - - def _register_table(self, table_name: str, table_class: Any): - self._tables[table_name] = table_class - - def connect(self) -> HandlerStatusResponse: - return HandlerStatusResponse(success=True) - - def check_connection(self) -> HandlerStatusResponse: - response = HandlerStatusResponse(False) - - try: - self.call_application_api(method_name="daily", params=dict(latitude=51.507351, - longitude=-0.127758, - time="1672578052")) - response.success = True - - except Exception as e: - response.error_message = str(e) - - return response - - def native_query(self, query: Any): - ast = parse_sql(query) - table = str(ast.from_table) - data = self._tables[table].select(ast) - return HandlerResponse(RESPONSE_TYPE.TABLE, data_frame=data) - - def call_application_api( - self, method_name: str = None, params: dict = None - ) -> pd.DataFrame: - # This will implement api base on the native query - # By processing native query to convert it to api callable parameters - if method_name not in ["hourly", "daily"]: - raise NotImplementedError(f"Method {method_name} is not implemented") - - if "latitude" not in params or "longitude" not in params: - raise ValueError("Latitude and longitude are required") - - opt_params = { - "exclude": "currently,minutely,alerts,hourly,daily".replace("," + method_name, ""), - "units": params.get("units"), - } - - # Build the query - query = self.query_string_template.format( - api_key=self._api_key, - latitude=params["latitude"], - longitude=params["longitude"] - ) - if "time" in params: - query += f",{params['time']}" - # Add optional parameters - query += "?" + "&".join([f"{k}={v}" for k, v in opt_params.items() if v]) - - # Call the API - response = requests.get(query) - response.raise_for_status() - - # Parse the response - data = response.json() - if method_name not in data: - raise ValueError(f"API response did not contain {method_name} data. Check your API key. Got response: {data}") - - # Convert to dataframe - df = pd.DataFrame(data[method_name]["data"]).assign( - latitude=params["latitude"], - longitude=params["longitude"], - timezone=data["timezone"], - offset=data["offset"] - ) - df["localtime"] = pd.to_datetime(df["time"], utc=True, unit="s").dt.tz_convert(data["timezone"]) - df.drop(columns="time", inplace=True) - return df diff --git a/mindsdb/integrations/handlers/plaid_handler/README.md b/mindsdb/integrations/handlers/plaid_handler/README.md deleted file mode 100644 index 2ec8b642738..00000000000 --- a/mindsdb/integrations/handlers/plaid_handler/README.md +++ /dev/null @@ -1,92 +0,0 @@ -# Automate Financial Data Processing with MindsDB and Plaid - -Are you tired of manually processing financial data? With MindsDB and Plaid, you can easily automate this tedious task. - -First, connect your Plaid account by following these steps. Once you have your client ID, secret, and public key, create a database in MindsDB: - -> Get client_id, secret and plaid_env from [here](https://dashboard.plaid.com/team/keys) -> and access_token can be generated with help of docs [here](https://plaid.com/docs/api/tokens/#itempublic_tokenexchange) - -```sql -CREATE DATABASE my_plaid -WITH - ENGINE = 'plaid', - PARAMETERS = { - "client_id": "YOUR_CLIENT_ID", - "secret": "YOUR_SECRET", - "access_token": "YOUR_PUBLIC_KEY", - "plaid_env": "ENV" - }; -``` - - -This creates a database called my_plaid. This database comes with a table called **transactions** and **balance** which we can use to search for and analyze transactions - - -Analyzing transactions in SQL - -Let's get a list of transactions for a specific account: - - -```sql -SELECT - id, merchant_name, authorized_date, amount ,payment_channel -FROM my_plaid.transactions -WHERE - start_date = '2022-01-01' - AND end_date = '2023-04-11' -LIMIT 20; -``` - -# Native Queries -Plaid integration also supports native queries, which allows you to call any function available in the Plaid API: - - -This will retrieve the latest transactions for the given account -```sql -SELECT * FROM my_plaid ( - get_transactions( - start_date = '2022-01-01', - end_date = '2022-02-01' - ) -); -``` -# Building a Machine Learning Model - -Now that we have our data, let's build a machine learning model that can predict future expenses. We will be using MindsDB's built-in regression engine to create this model. - -```sql -CREATE MODEL mindsdb.expense_prediction -FROM my_plaid - ( SELECT merchant_name, date, amount - FROM transactions - WHERE start_date='2023-01-01' - AND end_date='2023-04-11'; ) -PREDICT amount -ORDER BY date -GROUP BY merchant_name -WINDOW 25 -HORIZON 15 -USING ENGINE = 'statsforecast'; -``` -This creates a virtual table called expense_prediction. We can use this table to make predictions on future transactions: - - -```sql -SELECT expense_prediction.amount as predicted_amount -FROM mindsdb.expense_prediction -WHERE - merchant_name= 'UBER' - AND date ='2022-03-01'; -``` - -``` -+-----------------+ -|predicted_amount | -+-----------------+ -| 25.0 | -+-----------------+ -``` -# Schedule a Job - -Finally, we can automate this process by scheduling a job that runs our machine learning model on new transactions every day. No more manual data processing! diff --git a/mindsdb/integrations/handlers/plaid_handler/__about__.py b/mindsdb/integrations/handlers/plaid_handler/__about__.py deleted file mode 100644 index 148f82041bb..00000000000 --- a/mindsdb/integrations/handlers/plaid_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Plaid handler' -__package_name__ = 'mindsdb_plaid_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Plaid" -__author__ = 'Parthiv Makwana' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/plaid_handler/__init__.py b/mindsdb/integrations/handlers/plaid_handler/__init__.py deleted file mode 100644 index 24bef3317f8..00000000000 --- a/mindsdb/integrations/handlers/plaid_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .plaid_handler import ( - PlaidHandler as Handler - ) - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Plaid' -name = 'plaid' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/plaid_handler/icon.svg b/mindsdb/integrations/handlers/plaid_handler/icon.svg deleted file mode 100644 index ff87533ae40..00000000000 --- a/mindsdb/integrations/handlers/plaid_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/plaid_handler/plaid_handler.py b/mindsdb/integrations/handlers/plaid_handler/plaid_handler.py deleted file mode 100644 index 258669f98db..00000000000 --- a/mindsdb/integrations/handlers/plaid_handler/plaid_handler.py +++ /dev/null @@ -1,225 +0,0 @@ -import pandas as pd -from mindsdb.utilities import log -from mindsdb.integrations.libs.api_handler import APIHandler, FuncParser -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -from datetime import datetime -import plaid -from plaid.api import plaid_api -from plaid.model.accounts_balance_get_request import AccountsBalanceGetRequest -from plaid.model.accounts_balance_get_request_options import AccountsBalanceGetRequestOptions -from plaid.model.transactions_get_request import TransactionsGetRequest -from plaid.model.transactions_get_request_options import TransactionsGetRequestOptions -from .plaid_tables import BalanceTable, TransactionTable -from .utils import parse_transaction - - -PLAID_ENV = { - 'production': plaid.Environment.Production, - 'development': plaid.Environment.Development, - 'sandbox': plaid.Environment.Sandbox, -} - -logger = log.getLogger(__name__) - - -class PlaidHandler(APIHandler): - '''A class for handling connections and interactions with the Plaid API. - - Attributes: - plaid_env (str): Enviroment used by user [ 'sandbox'(default) OR 'development' OR 'production' ]. - client_id (str): Your Plaid API client_id. - secret (str): Your Plaid API secret - access_token (str): The access token for the Plaid account. - ''' - - def __init__(self, name=None, **kwargs): - super().__init__(name) - - args = kwargs.get('connection_data', {}) - - self.plaid_config = plaid.Configuration( - host=PLAID_ENV[args.get('plaid_env', 'sandbox')], - api_key={ - 'clientId': args.get('client_id'), - 'secret': args.get('secret') - } - ) - - self.access_token = args.get('access_token') - - self.api = None - self.is_connected = False - - balance = BalanceTable(self) - transactions = TransactionTable(self) - self._register_table('balance', balance) - self._register_table('transactions', transactions) - - def connect(self): - '''Authenticate with the Plaid API using the API keys and secrets stored in the `plaid_env`, `client_id`, `secret` , and `access_token` attributes.''' # noqa - - if self.is_connected is True: - return self.api - - api_client = plaid.ApiClient(self.plaid_config) - self.api = plaid_api.PlaidApi(api_client) - self.is_connected = True - return self.api - - def check_connection(self) -> StatusResponse: - '''It evaluates if the connection with Plaid API is alive and healthy. - Returns: - HandlerStatusResponse - ''' - - response = StatusResponse(False) - - try: - api = self.connect() - api.accounts_balance_get(AccountsBalanceGetRequest( - access_token=self.access_token) - ) - response.success = True - - except Exception as e: - response.error_message = f'Error connecting to Plaid api: {e}. ' - logger.error(response.error_message) - - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query_string: str = None): - '''It parses any native statement string and acts upon it (for example, raw syntax commands). - Args: - query (Any): query in native format (str for sql databases, - dict for mongo, api's json etc) - Returns: - HandlerResponse - ''' - - method_name, params = FuncParser().from_string(query_string) - df = self.call_plaid_api(method_name, params) - return Response( - RESPONSE_TYPE.TABLE, - data_frame=df - ) - - def call_plaid_api(self, method_name: str = None, params: dict = {}): - '''Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind of query: SELECT, INSERT, DELETE, etc - Returns: - DataFrame - ''' - - result = pd.DataFrame() - if method_name == 'get_balance': - result = self.get_balance(params=params) - result = BalanceTable(self).filter_columns(result=result) - - elif method_name == 'get_transactions': - result = self.get_transactions(params=params) - result = TransactionTable(self).filter_columns(result=result) - - return result - - def get_balance(self, params=None): - '''Filters data from Plaid API's balance endpoint and returns a DataFrame with the required information. - - Args: - params (dict, optional): A dictionary of options to be passed to the Plaid API. - - Returns: - pandas.DataFrame: A DataFrame containing the filtered data. - ''' - - self.connect() - if params.get('last_updated_datetime') is not None: - options = AccountsBalanceGetRequestOptions( - min_last_updated_datetime=datetime.strptime( - params.get('last_updated_datetime') - ) - ) - - response = self.api.accounts_balance_get( - AccountsBalanceGetRequest( - access_token=self.access_token, - options=options - ) - ) - else: - response = self.api.accounts_balance_get( - AccountsBalanceGetRequest(access_token=self.access_token) - ) - - messages = [] - for obj in response['accounts']: - message_dict = {} - for i in obj.to_dict().keys(): - if i.startswith('account_'): - message_dict[i] = obj[i] - elif i == 'balances': - dict_obj = obj[i].to_dict() - for j in dict_obj.keys(): - message_dict[f'balance_{j}'] = dict_obj[j] - else: - message_dict[f'account_{i}'] = obj[i] - messages.append(message_dict) - df = pd.DataFrame(messages) - - return df - - def get_transactions(self, params={}): - ''' - Filters data from Plaid API's transaction endpoint and returns a DataFrame with the required information. - Args: - params (dict, optional): A dictionary of options to be passed to the Plaid API. - - Returns: - pandas.DataFrame: A DataFrame containing the filtered data. - ''' - - self.connect() - if params.get('start_date') and params.get('end_date'): - start_date = datetime.strptime(params.get('start_date'), '%Y-%m-%d').date() - end_date = datetime.strptime(params.get('end_date'), '%Y-%m-%d').date() - else: - raise Exception('start_date and end_date is required in format YYYY-MM-DD ') - - request = TransactionsGetRequest( - access_token=self.access_token, - start_date=start_date, - end_date=end_date, - options=TransactionsGetRequestOptions() - ) - - response = self.api.transactions_get(request) - transactions = parse_transaction(response['transactions']) - - # Manipulate the count and offset parameters to paginate - # transactions and retrieve all available data - while len(transactions) < response['total_transactions']: - request = TransactionsGetRequest( - access_token=self.access_token, - start_date=start_date, - end_date=end_date, - options=TransactionsGetRequestOptions( - offset=len(transactions) - ) - ) - response = self.api.transactions_get(request) - transactions.extend(parse_transaction(response['transactions'])) - - # Converting date column from str - df = pd.DataFrame(transactions) - for i in ['date', 'authorized_date']: - df[i] = pd.to_datetime(df[i]).dt.date - - return df diff --git a/mindsdb/integrations/handlers/plaid_handler/plaid_tables.py b/mindsdb/integrations/handlers/plaid_handler/plaid_tables.py deleted file mode 100644 index 76bef68c052..00000000000 --- a/mindsdb/integrations/handlers/plaid_handler/plaid_tables.py +++ /dev/null @@ -1,157 +0,0 @@ -import pandas as pd -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb_sql_parser import ast - - -class BalanceTable(APITable): - '''A class representing the balance table. - - This class inherits from APITable and provides functionality to select data - from the balance endpoint of the Plaid API and return it as a pandas DataFrame. - - Methods: - select(ast.Select): Select data from the balance table and return it as a pandas DataFrame. - get_columns(): Get the list of column names for the balance table. - - ''' - - def select(self, query: ast.Select): - '''Select data from the balance table and return it as a pandas DataFrame. - - Args: - query (ast.Select): The SQL query to be executed. - - Returns: - pandas.DataFrame: A pandas DataFrame containing the selected data. - ''' - - conditions = extract_comparison_conditions(query.where) - params = {} - for i in conditions: - if i[1] == 'last_updated_datetime': - if i[0] == '=': - params[i[1]] = i[2] - else: - raise Exception("Only equals to '=' is Supported with 'last_updated_datetime'") - - result = self.handler.call_plaid_api(method_name='get_balance', params=params) - - self.filter_columns(query=query, result=result) - return result - - def get_columns(self): - '''Get the list of column names for the balance table. - - Returns: - list: A list of column names for the balance table. - - ''' - return [ - 'account_id', - 'account_name', - 'account_mask', - 'account_type', - 'account_subtype', - 'account_official_name', - 'balance_iso_currency_code', - 'balance_unofficial_currency_code', - 'balance_available', - 'balance_current', - 'balance_limit' - ] - - def filter_columns(self, result: pd.DataFrame, query: ast.Select = None): - - columns = [] - if query is not None: - for target in query.targets: - if isinstance(target, ast.Star): - columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - else: - columns = self.get_columns() - - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - result = result[columns] - - if query is not None and query.limit is not None: - return result.head(query.limit.value) - - return result - - -class TransactionTable(BalanceTable): - '''A class representing the transaction table. - - This class inherits from APITable and provides functionality to select data - from the transactions endpoint of the Plaid API and return it as a pandas DataFrame. - - Methods: - select(ast.Select): Select data from the transaction table and return it as a pandas DataFrame. - get_columns(): Get the list of column names for the transaction table. - - ''' - - def select(self, query: ast.Select): - '''Select data from the transaction table and return it as a pandas DataFrame. - - Args: - query (ast.Select): The SQL query to be executed. - - Returns: - pandas.DataFrame: A pandas DataFrame containing the selected data. - ''' - all_conditions = extract_comparison_conditions(query.where) - condition = [] - params = {} - for op, v, c in all_conditions: - - op = '==' if op == '=' else op # converting '=' to '==' - - if (v == 'start_date' or v == 'end_date'): - params[v] = c - - elif v in ['date', 'authorized_date'] or isinstance(c, str): - condition.append(f"({v}{op}'{c}')") - - else: - condition.append(f"({v}{op}{c})") - - merge_condition = ' and '.join(condition) - - result = self.handler.call_plaid_api(method_name='get_transactions', params=params) - if merge_condition != '': - result = result.query(merge_condition) - - result = self.filter_columns(query=query, result=result) - return result - - def get_columns(self): - '''Get the list of column names for the transaction table. - - Returns: - list: A list of column names for the transaction table. - ''' - return [ - 'account_id', - 'transaction_id', - 'amount', - 'iso_currency_code', - 'check_number', - 'date', - 'authorized_date', - 'merchant_name', - 'payment_channel', - 'pending', - ] diff --git a/mindsdb/integrations/handlers/plaid_handler/requirements.txt b/mindsdb/integrations/handlers/plaid_handler/requirements.txt deleted file mode 100644 index 4ffe350c541..00000000000 --- a/mindsdb/integrations/handlers/plaid_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -plaid-python -urllib3>=2.6.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/mindsdb/integrations/handlers/plaid_handler/utils.py b/mindsdb/integrations/handlers/plaid_handler/utils.py deleted file mode 100644 index 0d675ba55b0..00000000000 --- a/mindsdb/integrations/handlers/plaid_handler/utils.py +++ /dev/null @@ -1,7 +0,0 @@ -def parse_transaction(res: list): - parsed = [] - for dic in res: - dic = dic.to_dict() - parsed.append(dic) - - return parsed diff --git a/mindsdb/integrations/handlers/planetscale_handler/README.md b/mindsdb/integrations/handlers/planetscale_handler/README.md deleted file mode 100644 index 1a05e0c6a73..00000000000 --- a/mindsdb/integrations/handlers/planetscale_handler/README.md +++ /dev/null @@ -1,35 +0,0 @@ -## Implementation - -This handler is implemented by extending the `MySQLHandler` that uses `mysql-connector-python` library. -The required arguments to establish a connection are as follows: - -* `user` is the database user. -* `password` is the database password. -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `database` is the database name. - - -## Usage - -In order to make use of this handler and connect to the PlanetScale, the following syntax can be used: - -```sql -CREATE DATABASE ps_datasource -WITH - ENGINE = 'planet_scale', - PARAMETERS = { - "host": "127.0.0.1", - "port": 3306, - "database": "mysql", - "user": "root", - "password": "password" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM ps_datasource.example_table; -``` diff --git a/mindsdb/integrations/handlers/planetscale_handler/__about__.py b/mindsdb/integrations/handlers/planetscale_handler/__about__.py deleted file mode 100644 index 07d3d62dc97..00000000000 --- a/mindsdb/integrations/handlers/planetscale_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB PlanetScale handler' -__package_name__ = 'mindsdb_planetscale_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for PlanetScale" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/planetscale_handler/__init__.py b/mindsdb/integrations/handlers/planetscale_handler/__init__.py deleted file mode 100644 index f1fc41b167f..00000000000 --- a/mindsdb/integrations/handlers/planetscale_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .planetscale_handler import PlanetScaleHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'PlanetScale' -name = 'planet_scale' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/planetscale_handler/connection_args.py b/mindsdb/integrations/handlers/planetscale_handler/connection_args.py deleted file mode 100644 index 4f9d28cb6cf..00000000000 --- a/mindsdb/integrations/handlers/planetscale_handler/connection_args.py +++ /dev/null @@ -1,36 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Planetscale server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the Planetscale server.', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the Planetscale server.' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Planetscale server.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the Planetscale server. Must be an integer.' - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=4000, - user='root', - password='password', - database='database' -) diff --git a/mindsdb/integrations/handlers/planetscale_handler/icon.svg b/mindsdb/integrations/handlers/planetscale_handler/icon.svg deleted file mode 100644 index 8c740977477..00000000000 --- a/mindsdb/integrations/handlers/planetscale_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/planetscale_handler/planetscale_handler.py b/mindsdb/integrations/handlers/planetscale_handler/planetscale_handler.py deleted file mode 100644 index 811b079b1e3..00000000000 --- a/mindsdb/integrations/handlers/planetscale_handler/planetscale_handler.py +++ /dev/null @@ -1,11 +0,0 @@ -from mindsdb.integrations.handlers.mysql_handler import Handler as MySQLHandler - - -class PlanetScaleHandler(MySQLHandler): - """ - This handler handles the connection and execution of queries against PlanetScale. - """ - name = 'planet_scale' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/planetscale_handler/requirements.txt b/mindsdb/integrations/handlers/planetscale_handler/requirements.txt deleted file mode 100644 index ee467569031..00000000000 --- a/mindsdb/integrations/handlers/planetscale_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/mysql_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/portkey_handler/README.md b/mindsdb/integrations/handlers/portkey_handler/README.md deleted file mode 100644 index 887341e5bbd..00000000000 --- a/mindsdb/integrations/handlers/portkey_handler/README.md +++ /dev/null @@ -1,75 +0,0 @@ ---- -title: Portkey -sidebarTitle: Portkey ---- - -This documentation describes the integration of MindsDB with [Portkey](https://www.portkey.com/), an AI Gateway that allows developers to connect to All the AI models in the world with a single API. -Portkey also brings in observability, caching, and other features that are useful for building production-grade AI applications. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To use Portkey within MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). -3. Obtain the Portkey API key required to deploy and use Portkey within MindsDB. Follow the [instructions for obtaining the API key](https://docs.portkey.ai/docs/api-reference/introduction). - -## Setup - -Create an AI engine from the [Portkey handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/portkey_handler). - - -You can pass all the parameters that are supported by Portkey inside the `USING` clause. - - - -```sql -CREATE ML_ENGINE portkey_engine -FROM portkey -USING - portkey_api_key = '{PORTKEY_API_KEY}', - config = '{PORTKEY_CONFIG_ID}'; -``` - -Create a model using `portkey_engine` as an engine. - -You can pass all the parameters supported by Portkey Chat completions here inside the `USING` clause. - - -```sql -CREATE MODEL portkey_model -PREDICT answer -USING - engine = 'portkey_engine', - temperature = 0.2; -
`` - - - -The integrations between Portkey and MindsDB was implemented using [Portkey Python SDK](https://docs.portkey.ai/docs/api-reference/portkey-sdk-client). - - -Query the model to get predictions. - -```sql -SELECT question, answer -FROM portkey_model -WHERE question = 'Where is Stockholm located?'; -``` - -Here is the output: - -```sql -+-----------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ -| question | answer | -+-----------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ -| Where is Stockholm located? | Stockholm is the capital and largest city of Sweden. It is located on Sweden's south-central east coast, where Lake MΓ€laren meets the Baltic Sea. | -+-----------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ -``` - - - -**Next Steps** - -Go to the [Use Cases](https://docs.mindsdb.com/use-cases/overview) section to see more examples. - diff --git a/mindsdb/integrations/handlers/portkey_handler/__about__.py b/mindsdb/integrations/handlers/portkey_handler/__about__.py deleted file mode 100644 index 63afa9dbae4..00000000000 --- a/mindsdb/integrations/handlers/portkey_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Portkey handler' -__package_name__ = 'mindsdb_portkey_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Portkey" -__author__ = 'Naren Gogineni' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2024 - mindsdb' diff --git a/mindsdb/integrations/handlers/portkey_handler/__init__.py b/mindsdb/integrations/handlers/portkey_handler/__init__.py deleted file mode 100644 index 24d0b2029dd..00000000000 --- a/mindsdb/integrations/handlers/portkey_handler/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -from .__about__ import __version__ as version -from .__about__ import __description__ as description -from mindsdb.integrations.libs.const import HANDLER_TYPE -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -try: - from .portkey_handler import PortkeyHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Portkey" -name = "portkey" -type = HANDLER_TYPE.ML -icon_path = 'icon.svg' -permanent = False -__all__ = ["Handler", "version", "name", "type", "title", "description", "import_error", "icon_path"] diff --git a/mindsdb/integrations/handlers/portkey_handler/icon.svg b/mindsdb/integrations/handlers/portkey_handler/icon.svg deleted file mode 100644 index 6ea6c96b0d7..00000000000 --- a/mindsdb/integrations/handlers/portkey_handler/icon.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/mindsdb/integrations/handlers/portkey_handler/portkey_handler.py b/mindsdb/integrations/handlers/portkey_handler/portkey_handler.py deleted file mode 100644 index d91a4127f79..00000000000 --- a/mindsdb/integrations/handlers/portkey_handler/portkey_handler.py +++ /dev/null @@ -1,79 +0,0 @@ -from typing import Dict, Optional - -import pandas as pd -from portkey_ai import Portkey - -from mindsdb.integrations.libs.base import BaseMLEngine -from mindsdb.utilities import log - -from mindsdb.integrations.utilities.handler_utils import get_api_key - -logger = log.getLogger(__name__) - -DEFAULT_METADATA = { - "_source": "portkey-mindsdb-integration", -} - - -class PortkeyHandler(BaseMLEngine): - """ - Integration with the Portkey LLM Python Library - """ - - name = "portkey" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.generative = True - - def create( - self, - target: str, - df: Optional[pd.DataFrame] = None, - args: Optional[Dict] = None, - ) -> None: - - if "using" not in args: - raise Exception( - "Portkey engine requires a USING clause! Refer to its documentation for more details." - ) - - self.model_storage.json_set("args", args) - - def predict( - self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None - ) -> None: - - args = self.model_storage.json_get("args") - api_key = get_api_key('portkey', args["using"], self.engine_storage, strict=False) - - self.client = Portkey( - **self.engine_storage.get_connection_args(), - api_key=api_key, - metadata=DEFAULT_METADATA - ) - - result_df = pd.DataFrame() - - result_df["predictions"] = df["question"].apply(self._predict_answer) - - result_df = result_df.rename(columns={"predictions": args["target"]}) - - return result_df - - def _predict_answer(self, text): - """ - connects with portkey messages api to predict the answer for the particular question - - """ - - model_args = self.model_storage.json_get("args") - - message = self.client.chat.completions.create( - **model_args, - messages=[ - {"role": "user", "content": text} - ] - ) - - return message.choices[0].message.content diff --git a/mindsdb/integrations/handlers/portkey_handler/requirements.txt b/mindsdb/integrations/handlers/portkey_handler/requirements.txt deleted file mode 100644 index 3a896f22968..00000000000 --- a/mindsdb/integrations/handlers/portkey_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -portkey-ai>=1.8.2 diff --git a/mindsdb/integrations/handlers/postgres_handler/postgres_handler.py b/mindsdb/integrations/handlers/postgres_handler/postgres_handler.py index 9e8330c19e9..a3456a8e95a 100644 --- a/mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +++ b/mindsdb/integrations/handlers/postgres_handler/postgres_handler.py @@ -1,7 +1,7 @@ import time import json import logging -from typing import Optional, Any +from typing import Optional, Any, Generator import pandas as pd from pandas import DataFrame @@ -10,19 +10,25 @@ from psycopg.postgres import TypeInfo, types as pg_types from psycopg.pq import ExecStatus -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender +from mindsdb_sql_parser import parse_sql, Select from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.integrations.libs.base import MetaDatabaseHandler +import mindsdb.utilities.profiler as profiler +from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender +from mindsdb.utilities.types.column import Column from mindsdb.utilities import log +from mindsdb.integrations.libs.base import MetaDatabaseHandler from mindsdb.integrations.libs.response import ( HandlerStatusResponse as StatusResponse, HandlerResponse as Response, RESPONSE_TYPE, + TableResponse, + OkResponse, + ErrorResponse, + DataHandlerResponse, ) -import mindsdb.utilities.profiler as profiler from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE +from mindsdb.utilities.config import config as mindsdb_config logger = log.getLogger(__name__) @@ -70,15 +76,14 @@ def _map_type(internal_type_name: str | None) -> MYSQL_DATA_TYPE: return fallback_type -def _make_table_response(result: list[tuple[Any]], cursor: Cursor) -> Response: - """Build response from result and cursor. +def _get_columns(cursor: Cursor) -> list[Column]: + """Get columns from cursor. Args: - result (list[tuple[Any]]): result of the query. cursor (psycopg.Cursor): cursor object. Returns: - Response: response object. + List of columns """ description: list[PGColumn] = cursor.description mysql_types: list[MYSQL_DATA_TYPE] = [] @@ -108,11 +113,9 @@ def _make_table_response(result: list[tuple[Any]], cursor: Cursor) -> Response: mysql_type = _map_type(regtype) mysql_types.append(mysql_type) - # region cast int and bool to nullable types - serieses = [] - for i, mysql_type in enumerate(mysql_types): - expected_dtype = None - if mysql_type in ( + result = [] + for i, column in enumerate(cursor.description): + if mysql_types[i] in ( MYSQL_DATA_TYPE.SMALLINT, MYSQL_DATA_TYPE.INT, MYSQL_DATA_TYPE.MEDIUMINT, @@ -120,13 +123,30 @@ def _make_table_response(result: list[tuple[Any]], cursor: Cursor) -> Response: MYSQL_DATA_TYPE.TINYINT, ): expected_dtype = "Int64" - elif mysql_type in (MYSQL_DATA_TYPE.BOOL, MYSQL_DATA_TYPE.BOOLEAN): + elif mysql_types[i] in (MYSQL_DATA_TYPE.BOOL, MYSQL_DATA_TYPE.BOOLEAN): expected_dtype = "boolean" - serieses.append(pd.Series([row[i] for row in result], dtype=expected_dtype, name=description[i].name)) - df = pd.concat(serieses, axis=1, copy=False) - # endregion + else: + expected_dtype = None + result.append( + Column(name=column.name, type=mysql_types[i], original_type=column.type_display, dtype=expected_dtype) + ) + return result - return Response(RESPONSE_TYPE.TABLE, data_frame=df, affected_rows=cursor.rowcount, mysql_types=mysql_types) + +def _make_df(result: list[tuple[Any]], columns: list[Column]) -> pd.DataFrame: + """Make pandas DataFrame from result and columns. + + Args: + result (list[tuple[Any]]): result of the query. + columns (list[Column]): list of columns. + + Returns: + pd.DataFrame: pandas DataFrame. + """ + serieses = [] + for i, column in enumerate(columns): + serieses.append(pd.Series([row[i] for row in result], dtype=column.dtype, name=column.name)) + return pd.concat(serieses, axis=1, copy=False) class PostgresHandler(MetaDatabaseHandler): @@ -135,6 +155,7 @@ class PostgresHandler(MetaDatabaseHandler): """ name = "postgres" + stream_response = True @profiler.profile("init_pg_handler") def __init__(self, name=None, **kwargs): @@ -282,19 +303,47 @@ def _cast_dtypes(self, df: DataFrame, description: list) -> DataFrame: logger.error(f"Error casting column {col.name} to {types_map[pg_type_info.name]}: {e}") df.columns = columns - @profiler.profile() - def native_query(self, query: str, params=None, **kwargs) -> Response: - """ - Executes a SQL query on the PostgreSQL database and returns the result. + def native_query(self, query: str, params=None, stream: bool = True, **kwargs) -> DataHandlerResponse: + """Executes a SQL query on the PostgreSQL database and returns the result. + NOTE: 'INSERT' (and may be some else) queries can not be executed on the server side, + but there are fallbackto client side execution. Args: query (str): The SQL query to be executed. + params (list): The parameters to be passed to the query. + stream (bool): Whether to stream the results of the query. + **kwargs: Additional keyword arguments. Returns: - Response: A response object containing the result of the query or an error message. + DataHandlerResponse: A response object containing the result of the query or an error message. """ - need_to_close = not self.is_connected + if stream is False: + response = self._execute_client_side(query, params, **kwargs) + elif params is not None: + logger.info("Server side cursor does not support 'fetchmany', executing with client side cursor") + response = self._execute_client_side(query, params, **kwargs) + else: + generator = self._execute_server_side(query, **kwargs) + try: + response: TableResponse = next(generator) + response.data_generator = generator + except StopIteration as e: + response = e.value + if isinstance(response, DataHandlerResponse) is False: + raise + return response + def _execute_client_side(self, query: str, params=None, **kwargs) -> TableResponse | OkResponse | ErrorResponse: + """Executes a SQL query on the PostgreSQL database and returns the result. + + Args: + query (str): The SQL query to be executed. + params (list): The parameters to be passed to the query. + **kwargs: Additional keyword arguments. + + Returns: + TableResponse | OkResponse | ErrorResponse: A response object containing the result of the query or an error message. + """ connection = self.connect() with connection.cursor() as cur: try: @@ -303,66 +352,86 @@ def native_query(self, query: str, params=None, **kwargs) -> Response: else: cur.execute(query) if cur.pgresult is None or ExecStatus(cur.pgresult.status) == ExecStatus.COMMAND_OK: - response = Response(RESPONSE_TYPE.OK, affected_rows=cur.rowcount) + response = OkResponse(affected_rows=cur.rowcount) else: result = cur.fetchall() - response = _make_table_response(result, cur) + columns: list[Column] = _get_columns(cur) + response = TableResponse( + affected_rows=cur.rowcount, columns=columns, data=_make_df(result, columns) + ) connection.commit() - except (psycopg.ProgrammingError, psycopg.DataError) as e: - # These is 'expected' exceptions, they should not be treated as mindsdb's errors - # ProgrammingError: table not found or already exists, syntax error, etc - # DataError: division by zero, numeric value out of range, etc. - # https://www.psycopg.org/psycopg3/docs/api/errors.html - log_message = "Database query failed with error, likely due to invalid SQL query" - if logger.isEnabledFor(logging.DEBUG): - log_message += f". Executed query:\n{query}" - logger.info(log_message) - response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e), is_expected_error=True) - connection.rollback() except Exception as e: - logger.error(f"Error running query:\n{query}\non {self.database}, {e}") - response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e)) - connection.rollback() - - if need_to_close: - self.disconnect() + response = self._handle_query_exception(e, query, connection) return response - def query_stream(self, query: ASTNode, fetch_size: int = 1000): - """ - Executes a SQL query and stream results outside by batches + def _execute_server_side( + self, query: str, **kwargs + ) -> Generator[TableResponse | pd.DataFrame, None, OkResponse | ErrorResponse]: + """Execute a SQL query on the PostgreSQL database and return a generator of data frames. - :param query: An ASTNode representing the SQL query to be executed. - :param fetch_size: size of the batch - :return: generator with query results - """ - query_str, params = self.renderer.get_exec_params(query, with_failback=True) - - need_to_close = not self.is_connected + Args: + query (str): The SQL query to be executed. + params (list): The parameters to be passed to the query. + **kwargs: Additional keyword arguments. + Returns: + Generator[TableResponse | pd.DataFrame, None, OkResponse | ErrorResponse]: Generator of data frames. + """ connection = self.connect() - with connection.cursor() as cur: + with connection.cursor(name=f"mindsdb_{id(self)}") as cursor: try: - if params is not None: - cur.executemany(query_str, params) - else: - cur.execute(query_str) - - if cur.pgresult is not None and ExecStatus(cur.pgresult.status) != ExecStatus.COMMAND_OK: - while True: - result = cur.fetchmany(fetch_size) - if not result: - break - df = DataFrame(result, columns=[x.name for x in cur.description]) - self._cast_dtypes(df, cur.description) - yield df + try: + cursor.execute(query) + except psycopg.errors.SyntaxError as e: + # NOTE: INSERT queries cannot be executed server-side. When they fail, they produce a syntax error + # that always starts with the text below, regardless of the INSERT query format. + lower_e = str(e).lower() + if not lower_e.startswith('syntax error at or near "insert"') and not lower_e.startswith( + 'syntax error at or near "drop"' + ): + raise + connection.rollback() + return self._execute_client_side(query=query) + + if cursor.description is None: + connection.commit() + return OkResponse(affected_rows=cursor.rowcount) + + columns: list[Column] = _get_columns(cursor) + yield TableResponse(affected_rows=cursor.rowcount, columns=columns) + while result := cursor.fetchmany(size=mindsdb_config["data_stream"]["fetch_size"]): + yield _make_df(result, columns) connection.commit() - finally: - connection.rollback() + except Exception as e: + return self._handle_query_exception(e, query, connection) - if need_to_close: - self.disconnect() + def _handle_query_exception(self, e: Exception, query: str, connection) -> ErrorResponse: + """Handle query execution errors with appropriate logging and rollback. + + Args: + e: The exception that was raised + query: The SQL query that failed + connection: The database connection to rollback + + Returns: + ErrorResponse with appropriate error details + """ + if isinstance(e, (psycopg.ProgrammingError, psycopg.DataError)): + # These are 'expected' exceptions, they should not be treated as mindsdb's errors + # ProgrammingError: table not found or already exists, syntax error, etc + # DataError: division by zero, numeric value out of range, etc. + # https://www.psycopg.org/psycopg3/docs/api/errors.html + log_message = "Database query failed with error, likely due to invalid SQL query" + if logger.isEnabledFor(logging.DEBUG): + log_message += f". Executed query:\n{query}" + logger.info(log_message) + connection.rollback() + return ErrorResponse(error_code=0, error_message=str(e), is_expected_error=True) + else: + logger.error(f"Error running query:\n{query}\non {self.database}, {e}") + connection.rollback() + return ErrorResponse(error_code=0, error_message=str(e)) def insert(self, table_name: str, df: pd.DataFrame) -> Response: need_to_close = not self.is_connected @@ -401,7 +470,7 @@ def insert(self, table_name: str, df: pd.DataFrame) -> Response: return Response(RESPONSE_TYPE.OK, affected_rows=rowcount) @profiler.profile() - def query(self, query: ASTNode) -> Response: + def query(self, query: ASTNode) -> DataHandlerResponse: """ Executes a SQL query represented by an ASTNode and retrieves the data. @@ -409,11 +478,13 @@ def query(self, query: ASTNode) -> Response: query (ASTNode): An ASTNode representing the SQL query to be executed. Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. + DataHandlerResponse: The response from the `native_query` method, + containing the result of the SQL query execution. """ query_str, params = self.renderer.get_exec_params(query, with_failback=True) logger.debug(f"Executing SQL query: {query_str}") - return self.native_query(query_str, params) + support_stream = isinstance(query, Select) + return self.native_query(query_str, params, stream=support_stream) def get_tables(self, all: bool = False) -> Response: """ @@ -545,7 +616,7 @@ def subscribe(self, stop_event, callback, table_name, columns=None, **kwargs): def process_event(event): try: row = json.loads(event.payload) - except json.JSONDecoder: + except json.JSONDecodeError: return # check column in input data @@ -687,31 +758,33 @@ def meta_get_column_statistics(self, table_names: Optional[list] = None) -> Resp result = self.native_query(query) - if result.type == RESPONSE_TYPE.TABLE and result.data_frame is not None: - df = result.data_frame - - # Extract min/max from histogram bounds - def extract_min_max(histogram_str): - if histogram_str and str(histogram_str) != "nan": - clean = str(histogram_str).strip("{}") - if clean: - values = clean.split(",") - min_val = values[0].strip(" \"'") if values else None - max_val = values[-1].strip(" \"'") if values else None - return min_val, max_val - return None, None - - min_max_values = df["histogram_bounds"].apply(extract_min_max) - df["MINIMUM_VALUE"] = min_max_values.apply(lambda x: x[0]) - df["MAXIMUM_VALUE"] = min_max_values.apply(lambda x: x[1]) - - # Convert most_common_values and most_common_freqs to arrays. - df["MOST_COMMON_VALUES"] = df["most_common_values"].apply( - lambda x: x.strip("{}").split(",") if isinstance(x, str) else [] - ) - df["MOST_COMMON_FREQUENCIES"] = df["most_common_frequencies"].apply( - lambda x: x.strip("{}").split(",") if isinstance(x, str) else [] - ) + if result.type != RESPONSE_TYPE.TABLE or result.data_frame is None: + return result + + df = result.data_frame + + # Extract min/max from histogram bounds + def extract_min_max(histogram_str): + if histogram_str and str(histogram_str) != "nan": + clean = str(histogram_str).strip("{}") + if clean: + values = clean.split(",") + min_val = values[0].strip(" \"'") if values else None + max_val = values[-1].strip(" \"'") if values else None + return min_val, max_val + return None, None + + min_max_values = df["histogram_bounds"].apply(extract_min_max) + df["MINIMUM_VALUE"] = min_max_values.apply(lambda x: x[0]) + df["MAXIMUM_VALUE"] = min_max_values.apply(lambda x: x[1]) + + # Convert most_common_values and most_common_freqs to arrays. + df["MOST_COMMON_VALUES"] = df["most_common_values"].apply( + lambda x: x.strip("{}").split(",") if isinstance(x, str) else [] + ) + df["MOST_COMMON_FREQUENCIES"] = df["most_common_frequencies"].apply( + lambda x: x.strip("{}").split(",") if isinstance(x, str) else [] + ) result.data_frame = df.drop(columns=["histogram_bounds", "most_common_values", "most_common_frequencies"]) diff --git a/mindsdb/integrations/handlers/pycaret_handler/README.md b/mindsdb/integrations/handlers/pycaret_handler/README.md deleted file mode 100644 index 8043192b53a..00000000000 --- a/mindsdb/integrations/handlers/pycaret_handler/README.md +++ /dev/null @@ -1,65 +0,0 @@ -## PyCaret Handler - -PyCaret ML handler for MindsDB. - -## PyCaret - -PyCaret is an open-source, low-code machine learning library in Python that automates machine learning workflows. - -## Example Usage - -### Creation - -Required parameters: -- `model_type`: the type of model that you want to build -- `model_name`: you can pass in supported models using this. eg. supported models for regression can be found [here](https://pycaret.readthedocs.io/en/latest/api/regression.html#pycaret.regression.create_model). You can also set it to `best` to generate the best model (only supported for classification, regression and time_series) - -In addition to required parameters, there are 3 categories of optional parameters `setup`, `create` and `predict`. These are passed in during various stages of model development (see below). You have to prefix the arguments with one of these categories to pass in during the workflow. -- `setup_*`: these are passed to `setup()` function while creating model. You can find these in PyCaret's documentation. eg. For regression, the setup function's arguments are documented [here](https://pycaret.readthedocs.io/en/latest/api/regression.html#pycaret.regression.RegressionExperiment.setup). -- `create_*`: these are passed into `create_model()` or `compare_models()` function depending on the `model_name`. For classification you can find the docs [here](https://pycaret.readthedocs.io/en/latest/api/classification.html#pycaret.classification.create_model). -- `predict_*`: these are passed into `predict_model()` function of PyCaret. eg. You can find the documentation for classification [here](https://pycaret.readthedocs.io/en/latest/api/classification.html#pycaret.classification.predict_model). - -These are the supported types of models (`model_type`): -- `classification` -- `regression` -- `time_series` -- `clustering` -- `anomaly` - -Below is the example for creating a classification model - -~~~sql -CREATE MODEL my_pycaret_class_model -FROM irisdb - (SELECT SepalLengthCm, SepalWidthCm, PetalLengthCm, PetalWidthCm, Species FROM Iris) -PREDICT Species -USING - engine = 'pycaret', - model_type = 'classification', - model_name = 'xgboost', - setup_session_id = 123; -~~~~ - -For model types that don't want a target column (like anomaly and clustering), just pass in any one of the column names in `PREDICT` clause to comply with MindsDB's SQL syntax: - -~~~sql -CREATE MODEL my_pycaret_anom_model -FROM anomalydb - (SELECT Col1, Col2, Col3, Col4, Col5, Col6, Col7, Col8, Col9, Col10 FROM anomaly) -PREDICT Col10 -USING - engine = 'pycaret', - model_type = 'anomaly', - model_name = 'iforest', - setup_session_id = 123; -~~~~ - -### Prediction - -You can predict using normal mindsdb syntax like so: - -~~~sql -SELECT t.Id, m.prediction_label, m.prediction_score -FROM irisdb.Iris as t -JOIN my_pycaret_class_model AS m; -~~~~ diff --git a/mindsdb/integrations/handlers/pycaret_handler/__about__.py b/mindsdb/integrations/handlers/pycaret_handler/__about__.py deleted file mode 100644 index a783a674d95..00000000000 --- a/mindsdb/integrations/handlers/pycaret_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB PyCaret handler' -__package_name__ = 'mindsdb_pycaret_handler' -__version__ = '0.0.1' -__description__ = 'MindsDB handler for PyCaret' -__author__ = 'Aditya Azad' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/pycaret_handler/__init__.py b/mindsdb/integrations/handlers/pycaret_handler/__init__.py deleted file mode 100644 index 7178e8db4a2..00000000000 --- a/mindsdb/integrations/handlers/pycaret_handler/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE -from mindsdb.utilities import log -from .__about__ import __version__ as version, __description__ as description - -logger = log.getLogger(__name__) - -try: - from .pycaret_handler import PyCaretHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'PyCaret' -name = 'pycaret' -type = HANDLER_TYPE.ML -icon_path = 'icon.png' -permanent = False - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/pycaret_handler/icon.png b/mindsdb/integrations/handlers/pycaret_handler/icon.png deleted file mode 100644 index a7d2f1a44a9..00000000000 Binary files a/mindsdb/integrations/handlers/pycaret_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/pycaret_handler/pycaret_handler.py b/mindsdb/integrations/handlers/pycaret_handler/pycaret_handler.py deleted file mode 100644 index f36583e9689..00000000000 --- a/mindsdb/integrations/handlers/pycaret_handler/pycaret_handler.py +++ /dev/null @@ -1,116 +0,0 @@ -from typing import Optional, Dict -import os - -import pandas as pd - -from mindsdb.integrations.libs.base import BaseMLEngine -from pycaret.classification import ClassificationExperiment -from pycaret.regression import RegressionExperiment -from pycaret.time_series import TSForecastingExperiment -from pycaret.clustering import ClusteringExperiment -from pycaret.anomaly import AnomalyExperiment - - -class PyCaretHandler(BaseMLEngine): - name = 'pycaret' - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def create(self, target: str, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None: - """Create and train model on given data""" - # parse args - if 'using' not in args: - raise Exception("PyCaret engine requires a USING clause! Refer to its documentation for more details.") - using = args['using'] - if df is None: - raise Exception("PyCaret engine requires a some data to initialize!") - # create experiment - s = self._get_experiment(using['model_type']) - s.setup(df, **self._get_experiment_setup_kwargs(using, args['target'])) - # train model - model = self._train_model(s, using) - # save model and args - model_file_path = os.path.join(self.model_storage.fileStorage.folder_path, 'model') - s.save_model(model, model_file_path) - self.model_storage.json_set('saved_args', { - **args['using'], - 'model_path': model_file_path - }) - - def predict(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> pd.DataFrame: - """Predict on the given data""" - # load model - saved_args = self.model_storage.json_get('saved_args') - s = self._get_experiment(saved_args['model_type']) - model = s.load_model(saved_args['model_path']) - # predict and return - return self._predict_model(s, model, df, saved_args) - - def _get_experiment(self, model_type): - """Returns one of the types of experiments in PyCaret""" - if model_type == "classification": - return ClassificationExperiment() - elif model_type == "regression": - return RegressionExperiment() - elif model_type == "time_series": - return TSForecastingExperiment() - elif model_type == "clustering": - return ClusteringExperiment() - elif model_type == "anomaly": - return AnomalyExperiment() - else: - raise Exception(f"Unrecognized model type '{model_type}'") - - def _get_experiment_setup_kwargs(self, args: Dict, target: str): - """Returns the arguments that need to passed in setup function for the experiment""" - model_type = args['model_type'] - # copy setup kwargs - kwargs = self._select_keys(args, "setup_") - # return kwargs - if model_type == 'classification' or model_type == 'regression' or model_type == 'time_series': - return {**kwargs, 'target': target} - elif model_type == 'clustering' or model_type == 'anomaly': - return {**kwargs} - raise Exception(f"Unrecognized model type '{model_type}'") - - def _predict_model(self, s, model, df, args): - """Apply predictor arguments and get predictions""" - model_type = args["model_type"] - kwargs = self._select_keys(args, "predict_") - if ( - model_type == 'classification' - or model_type == 'regression' - or model_type == 'clustering' - or model_type == 'anomaly' - ): - kwargs["data"] = df - elif model_type == 'time_series': - # do nothing - pass - else: - raise Exception(f"Unrecognized model type '{model_type}'") - return s.predict_model(model, **kwargs) - - def _train_model(self, experiment, args): - """Train the model and return the best (if applicable)""" - model_type = args['model_type'] - model_name = args['model_name'] - kwargs = self._select_keys(args, "create_") - if ( - model_type == 'classification' - or model_type == 'regression' - or model_type == 'time_series' - ) and model_name == 'best': - return experiment.compare_models(**kwargs) - if model_name == 'best': - raise Exception("Specific model name must be provided for clustering or anomaly tasks") - return experiment.create_model(model_name, **kwargs) - - def _select_keys(self, d, prefix): - """Selects keys with given prefix and returns a new dict""" - result = {} - for k in d: - if k.startswith(prefix): - result[k[len(prefix):]] = d[k] - return result diff --git a/mindsdb/integrations/handlers/pycaret_handler/requirements.txt b/mindsdb/integrations/handlers/pycaret_handler/requirements.txt deleted file mode 100644 index 45b322671b6..00000000000 --- a/mindsdb/integrations/handlers/pycaret_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -pycaret -pycaret[models] diff --git a/mindsdb/integrations/handlers/pycaret_handler/test/__init__.py b/mindsdb/integrations/handlers/pycaret_handler/test/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/pycaret_handler/test/test_pycaret.py b/mindsdb/integrations/handlers/pycaret_handler/test/test_pycaret.py deleted file mode 100644 index 236cd62c1d2..00000000000 --- a/mindsdb/integrations/handlers/pycaret_handler/test/test_pycaret.py +++ /dev/null @@ -1,242 +0,0 @@ -import importlib -import time -from unittest.mock import patch -import pandas as pd -import pytest - -from mindsdb_sql_parser import parse_sql -from tests.unit.executor_test_base import BaseExecutorTest - -try: - importlib.import_module("pycaret") - PYCARET_INSTALLED = True -except ImportError: - PYCARET_INSTALLED = False - - -@pytest.mark.skipif(not PYCARET_INSTALLED, reason="pycaret is not installed") -class TestPyCaret(BaseExecutorTest): - - def wait_predictor(self, project, name): - done = False - for attempt in range(200): - ret = self.run_sql( - f"select * from {project}.models where name='{name}'" - ) - if not ret.empty: - if ret['STATUS'][0] == 'complete': - done = True - break - elif ret['STATUS'][0] == 'error': - break - time.sleep(0.5) - if not done: - raise RuntimeError("predictor wasn't created") - - def run_sql(self, sql): - ret = self.command_executor.execute_command( - parse_sql(sql) - ) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - @patch('mindsdb.integrations.handlers.postgres_handler.Handler') - def test_classifier(self, mock_handler): - df = pd.DataFrame({ - 'sepal_length': [5.1, 4.9, 4.7, 4.6, 6.4, 6.9, 5.5, 6.5, 7.7, 6.3, 6.7, 7.2], - 'sepal_width': [3.5, 3.0, 3.2, 3.1, 3.2, 3.1, 2.3, 2.8, 2.8, 2.7, 3.3, 3.2], - 'petal_length': [1.4, 4.0, 1.3, 1.5, 4.5, 4.9, 4.0, 4.6, 6.7, 4.9, 5.7, 6.0], - 'petal_width': [0.2, 0.2, 0.2, 0.2, 1.5, 1.5, 1.3, 1.5, 2.0, 1.8, 2.1, 1.8], - 'species': ['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica'] - }) - self.set_handler(mock_handler, name='pg', tables={'iris': df}) - - # create project - self.run_sql('create database proj;') - - # create predictor - self.run_sql(''' - CREATE MODEL proj.my_pycaret_class_model - FROM pg - (SELECT sepal_length, sepal_width, petal_length, petal_width, species FROM iris) - PREDICT species - USING - engine = 'pycaret', - model_type = 'classification', - model_name = 'xgboost', - setup_session_id = 123, - setup_fold = 2; - ''') - self.wait_predictor('proj', 'my_pycaret_class_model') - - # run predict - ret = self.run_sql(''' - SELECT prediction_label - FROM pg.iris as t - JOIN proj.my_pycaret_class_model AS m; - ''') - - assert ret['prediction_label'].iloc[0] == 'Iris-setosa' - - @patch('mindsdb.integrations.handlers.postgres_handler.Handler') - def test_regression(self, mock_handler): - df = pd.DataFrame({ - 'age': [19, 18, 28, 33, 32, 31, 46, 37], - 'sex': ['female', 'male', 'male', 'male', 'male', 'female', 'female', 'female'], - 'bmi': [27.9, 33.77, 33, 22.705, 28.88, 25.74, 33.44, 27.74], - 'children': [0, 1, 3, 0, 0, 0, 1, 3], - 'smoker': ['yes', 'no', 'no', 'no', 'no', 'no', 'no', 'no'], - 'region': ['southwest', 'southeast', 'southeast', 'northwest', 'northwest', 'southeast', 'southeast', 'northwest'], - 'charges': [16884.924, 1725.5523, 4449.462, 21984.47061, 3866.8552, 3756.6216, 8240.5896, 7281.5056] - }) - - self.set_handler(mock_handler, name='pg', tables={'insurance': df}) - - # create project - self.run_sql('create database proj;') - - # create predictor - self.run_sql(''' - CREATE MODEL proj.my_pycaret_regr_model - FROM pg - (SELECT age, sex, bmi, children, smoker, region, charges FROM insurance) - PREDICT charges - USING - engine = 'pycaret', - model_type = 'regression', - model_name = 'xgboost', - setup_session_id = 123, - setup_fold = 2; - ''') - self.wait_predictor('proj', 'my_pycaret_regr_model') - - # run predict - ret = self.run_sql(''' - SELECT prediction_label - FROM pg.insurance as t - JOIN proj.my_pycaret_regr_model AS m; - ''') - - assert int(ret['prediction_label'].iloc[0]) == 3822 - - @patch('mindsdb.integrations.handlers.postgres_handler.Handler') - @pytest.mark.skip(reason="MindsDB recognizes 'Anomaly' as a keyword so it fails to fetch Anomaly column") - def test_anomaly(self, mock_handler): - df = pd.DataFrame({ - 'Col1': [0.263995357, 0.764928588, 0.13842355, 0.935242061, 0.605866573, 0.518789697, 0.912225161, 0.608234451, 0.723781923, 0.73359095], - 'Col2': [0.546092303, 0.65397459, 0.065575135, 0.227771913, 0.845269445, 0.837065879, 0.272378939, 0.331678698, 0.429296975, 0.367422001], - 'Col3': [0.336714104, 0.538842451, 0.192801069, 0.553562822, 0.074514511, 0.332993162, 0.365792205, 0.861309323, 0.899016587, 0.088600152], - 'Col4': [0.092107835, 0.995016662, 0.014465045, 0.176370646, 0.241530075, 0.514723634, 0.562208164, 0.158963258, 0.073715215, 0.208463224], - 'Col5': [0.325261175, 0.805967636, 0.957033424, 0.331664957, 0.307923366, 0.355314772, 0.50189852, 0.558449452, 0.885169295, 0.182754409], - 'Col6': [0.212464853, 0.780304761, 0.458443656, 0.634508561, 0.373030452, 0.465650668, 0.413997158, 0.013080054, 0.570250227, 0.736672363], - 'Col7': [0.258565714, 0.437317789, 0.559647989, 0.109202597, 0.994553306, 0.896994183, 0.488468506, 0.251942977, 0.017265143, 0.538513303], - 'Col8': [0.869236755, 0.277978893, 0.42307639, 0.11247202, 0.183727053, 0.034959735, 0.111113968, 0.249329646, 0.550683376, 0.049843054], - 'Col9': [0.197077957, 0.843918225, 0.24339588, 0.281278233, 0.329148141, 0.73458152, 0.191947043, 0.927804425, 0.71326865, 0.891548497], - 'Col10': [0.292984504, 0.70343162, 0.43962138, 0.107867968, 0.922947409, 0.25345779, 0.29565178, 0.355286799, 0.980911322, 0.308864217] - }) - - self.set_handler(mock_handler, name='pg', tables={'anomaly': df}) - - # create project - self.run_sql('create database proj;') - - # create predictor - self.run_sql(''' - CREATE MODEL proj.my_pycaret_anom_model - FROM pg - (SELECT Col1, Col2, Col3, Col4, Col5, Col6, Col7, Col8, Col9, Col10 FROM anomaly) - PREDICT Col10 - USING - engine = 'pycaret', - model_type = 'anomaly', - model_name = 'iforest', - setup_session_id = 123, - setup_fold = 2; - ''') - self.wait_predictor('proj', 'my_pycaret_anom_model') - - # run predict - # TODO: is there a workaround for this? (it works when ran in web UI) - ret = self.run_sql(''' - SELECT m.Anomaly - FROM pg.anomaly as t - JOIN proj.my_pycaret_anom_model AS m; - ''') - - assert int(ret['Anomaly'].iloc[0]) == 0 - - @patch('mindsdb.integrations.handlers.postgres_handler.Handler') - def test_cluster(self, mock_handler): - df = pd.DataFrame({ - 'Age': [58, 59, 62, 59, 87, 29, 54, 87], - 'Income': [77769, 81799, 74751, 74373, 17760, 13157, 76500, 42592], - 'SpendingScore': [0.7913287771988531, 0.7910820467274178, 0.7026569520102857, 0.7656795619984281, 0.3487775484305076, 0.8470341025128374, 0.7851978501165687, 0.3552896820382753], - 'Savings': [6559.8299230048315, 5417.661426197439, 9258.992965034067, 7346.334503537976, 16869.507130301474, 3535.5143522162816, 6878.884248553975, 18086.287157859304] - }) - - self.set_handler(mock_handler, name='pg', tables={'jewellery': df}) - - # create project - self.run_sql('create database proj;') - - # create predictor - self.run_sql(''' - CREATE MODEL proj.my_pycaret_cluster_model - FROM pg - (SELECT Age, Income, SpendingScore, Savings FROM jewellery) - PREDICT Savings - USING - engine = 'pycaret', - model_type = 'clustering', - model_name = 'kmeans', - setup_session_id = 123; - ''') - self.wait_predictor('proj', 'my_pycaret_cluster_model') - - # run predict - ret = self.run_sql(''' - SELECT m.Cluster - FROM pg.jewellery as t - JOIN proj.my_pycaret_cluster_model AS m; - ''') - - assert ret['Cluster'].iloc[0] == "Cluster 0" - - @patch('mindsdb.integrations.handlers.postgres_handler.Handler') - def test_timeseries(self, mock_handler): - df = pd.DataFrame({ - 'Year': [1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950], - 'Month': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8], - 'Passengers': [112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118, 115, 126, 141, 135, 125, 149, 170, 170] - }) - - self.set_handler(mock_handler, name='pg', tables={'airline': df}) - - # create project - self.run_sql('create database proj;') - - # create predictor - self.run_sql(''' - CREATE MODEL proj.my_pycaret_timeseries_model - FROM pg - (SELECT Year, Month, Passengers FROM airline) - PREDICT Passengers - USING - engine = 'pycaret', - model_type = 'time_series', - model_name = 'naive', - setup_fh = 3, - predict_fh = 36, - setup_session_id = 123; - ''') - self.wait_predictor('proj', 'my_pycaret_timeseries_model') - - # run predict - ret = self.run_sql(''' - SELECT m.y_pred - FROM pg.airline as t - JOIN proj.my_pycaret_timeseries_model AS m; - ''') - - assert int(ret['y_pred'].iloc[0]) == 125 diff --git a/mindsdb/integrations/handlers/pypi_handler/.bumpversion.cfg b/mindsdb/integrations/handlers/pypi_handler/.bumpversion.cfg deleted file mode 100644 index 0b84d758892..00000000000 --- a/mindsdb/integrations/handlers/pypi_handler/.bumpversion.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[bumpversion] -current_version = 0.1.0 -commit = True -message = {current_version} β†’ {new_version} - -[bumpversion:file:__about__.py] diff --git a/mindsdb/integrations/handlers/pypi_handler/README.md b/mindsdb/integrations/handlers/pypi_handler/README.md deleted file mode 100644 index 5c61d077a85..00000000000 --- a/mindsdb/integrations/handlers/pypi_handler/README.md +++ /dev/null @@ -1,88 +0,0 @@ -# PyPI Handler -This handler allows you to interact with the data that [pypi.org](https://pypi.org) is storing on Google's BigQuery about the rates of Python packages. - -## About PyPI -Python Package Index (PyPI) is a host for maintaining and storing Python packages. It's a good place for publishing your Python packages in different versions and releases. - -## PyPI Handler Implementation -This implementation is based on the RESTful service that [pypistats.org](https://pypistats.org) is serving. - -## PyPI Handler Initialization -There is nothing needed to be passed in the database initialization process. You can create the database via the following flow. - -```sql -CREATE DATABASE pypi_datasource -WITH ENGINE = 'pypi'; -``` - -Once you execute this query, you'll have access to all the following tables. - -- Overal Table: `pypi_datasource.overall` -- Recent Table: `pypi_datasource.recent` -- Python Major Table: `pypi_datasource.python_major` -- Python Minor Table: `pypi_datasource.python_minor` -- System Table: `pypi_datasource.system` - -## Example Usage -Each table has its `WHERE` clause(s) and condition(s) as follows. - -- `recent` - - `period`: `{day, week, month}` -- `overall` - - `mirrors`: `{true, false}` -- `python_major` - - `version`: `{2, 3, ...}` -- `python_minor` - - `version`: `{2.7, 3.2, ...}` -- `system`: - - `os`: `{"Windows", "Linux", "Darwin", ...}` - - -### All the recent downloads -```sql -SELECT * -FROM pypi_datasource.recent WHERE package="mindsdb"; -``` -```sql -SELECT * -FROM pypi_datasource.recent WHERE package="mindsdb" AND period="day"; -``` - -### Overall downloads (only mirrors included) -```sql -SELECT * -FROM pypi_datasource.overall WHERE package="mindsdb" AND mirrors=true; -``` - -### Overall downloads on CPython==2.7 -```sql -SELECT * -FROM pypi_datasource.python_minor WHERE package="mindsdb" AND version="2.7"; -``` - -### All the downloads on the Linux-based distros -```sql -SELECT date, downloads -FROM pypi_datasource.system WHERE package="mindsdb" AND os="Linux"; -``` - -### Keep in mind.. -- Each table takes a *REQUIRED* `WHERE` parameter and that's nothing but the package name that is specified with the `package` keyword. -- All the `Null` recordes are ignored from viewing. -- `SELECT` query is limited by 20 records by default. You can change it to whatever amount of records you need. - - -## Implemented Features -- [x] Database initialization -- [x] Tracking the downloads rate - - [x] Overall downloads - - [x] Recent downloads -- [x] System-based filtering -- [x] Version-based filtering - - [x] Minor-based filtering - - [x] Major-based filtering - -## TODO -- [ ] Writing tests -- [ ] Tracking the dependency graph -- [ ] Packages' metadata filtering \ No newline at end of file diff --git a/mindsdb/integrations/handlers/pypi_handler/__about__.py b/mindsdb/integrations/handlers/pypi_handler/__about__.py deleted file mode 100644 index 734f4a5ddb2..00000000000 --- a/mindsdb/integrations/handlers/pypi_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB PyPI handler" -__package_name__ = "mindsdb_pypi_handler" -__version__ = "0.1.0" -__description__ = "MindsDB handler for PyPI" -__author__ = "Sadra Yahyapour" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/pypi_handler/__init__.py b/mindsdb/integrations/handlers/pypi_handler/__init__.py deleted file mode 100644 index fbed13e3dd5..00000000000 --- a/mindsdb/integrations/handlers/pypi_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version - -try: - from .pypi_handler import PyPIHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "PyPI" -name = "pypi" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/pypi_handler/api.py b/mindsdb/integrations/handlers/pypi_handler/api.py deleted file mode 100644 index 55b16940431..00000000000 --- a/mindsdb/integrations/handlers/pypi_handler/api.py +++ /dev/null @@ -1,152 +0,0 @@ -from os import path -from typing import Collection, Dict - -import numpy as np -import pandas as pd -import requests - -SERVICE_URL = r"https://pypistats.org" -API_BASE_URL = path.join(SERVICE_URL, "api/packages/") - - -class PyPI: - def __init__(self, name: str, limit: int = None) -> None: - """initializer method - - Args: - name(str): package name - """ - self.name: str = name - self.limit = limit - self.endpoint: str = path.join(API_BASE_URL, name) - - def recent(self, period: str = None) -> pd.DataFrame: - """recent endpoint - - Args: - period (str, optional): the desired `day` or `week` or `month` period. Defaults to None. - - Returns: - pd.DataFrame: pandas dataframe - """ - endpoint: str = path.join(self.endpoint, "recent") - params: Dict = {} - - if period: - params["period"] = period - - payload = requests.get(endpoint, params=params).json()["data"] - - df = self.__to_dataframe(payload, [0]) - - return df - - def overall(self, mirrors: bool = None) -> pd.DataFrame: - """overall endpoint - - Args: - mirrors (bool, optional): filter by mirrors. Defaults to None. - - Returns: - pd.DataFrame: pandas dataframe - """ - endpoint: str = path.join(self.endpoint, "overall") - params: Dict = {} - - if mirrors is not None: - params["mirrors"] = str(mirrors).lower() - - payload = requests.get(endpoint, params=params).json()["data"] - df = self.__to_dataframe(payload, limit=self.limit) - - return df - - def python_major(self, version: str = None) -> pd.DataFrame: - """python major endpoint - - Args: - version (str, optional): filter by the major version number. Defaults to None. - - Returns: - pd.DataFrame: pandas dataframe - """ - endpoint: str = path.join(self.endpoint, "python_major") - params: Dict = {} - - if version is not None: - params["version"] = version - - payload = requests.get(endpoint, params=params).json()["data"] - df = self.__to_dataframe(payload, limit=self.limit) - - return df - - def python_minor(self, version: str = None) -> pd.DataFrame: - """python minor endpoint - - Args: - version (str, optional): filter by the minor.patch version number. Defaults to None. - - Returns: - pd.DataFrame: pandas dataframe - """ - endpoint: str = path.join(self.endpoint, "python_minor") - params: Dict = {} - - if version is not None: - params["version"] = version - - payload = requests.get(endpoint, params=params).json()["data"] - df = self.__to_dataframe(payload, limit=self.limit) - - return df - - def system(self, os: str = None) -> pd.DataFrame: - """system endpoint - - Args: - os (str, optional): filter by the operating system. Defaults to None. - - Returns: - pd.DataFrame: pandas dataframe - """ - endpoint: str = path.join(self.endpoint, "system") - params: Dict = {} - - if os is not None: - params["os"] = os - - payload = requests.get(endpoint, params=params).json()["data"] - df = self.__to_dataframe(payload, limit=self.limit) - - return df - - @staticmethod - def __to_dataframe( - json_data: Dict, - index: Collection = None, - limit: int = 20, - ) -> pd.DataFrame: - """_summary_ - - Args: - json_data (Dict): data - index (Collection, optional): desired index. Defaults to None. - limit (int, optional): limit the output coming from dataframe. Defaults to 20. - - Returns: - pd.DataFrame: _description_ - """ - df = pd.DataFrame(json_data, index=index) - df.replace("null", np.nan, inplace=True) - df = df.dropna() - - return df.tail(limit) - - @classmethod - def is_connected(cls) -> Dict: - try: - _ = requests.get(SERVICE_URL, timeout=5).raise_for_status() - return {"status": True} - except requests.exceptions.RequestException as e: - return {"status": False, "message": e} diff --git a/mindsdb/integrations/handlers/pypi_handler/icon.svg b/mindsdb/integrations/handlers/pypi_handler/icon.svg deleted file mode 100644 index 13f8de8bbc6..00000000000 --- a/mindsdb/integrations/handlers/pypi_handler/icon.svg +++ /dev/null @@ -1,123 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mindsdb/integrations/handlers/pypi_handler/pypi_handler.py b/mindsdb/integrations/handlers/pypi_handler/pypi_handler.py deleted file mode 100644 index a389ae6b1af..00000000000 --- a/mindsdb/integrations/handlers/pypi_handler/pypi_handler.py +++ /dev/null @@ -1,73 +0,0 @@ -from mindsdb_sql_parser import parse_sql - -from mindsdb.integrations.handlers.pypi_handler.api import PyPI -from mindsdb.integrations.handlers.pypi_handler.pypi_tables import ( - PyPIOverallTable, - PyPIPythonMajorTable, - PyPIPythonMinorTable, - PyPIRecentTable, - PyPISystemTable, -) -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse - - -class PyPIHandler(APIHandler): - def __init__(self, name: str, **kwargs) -> None: - """initializer method - - Args: - name (str): handler's name - """ - super().__init__(name) - - self.connection = None - self.is_connected = False - - _tables = [ - PyPIOverallTable, - PyPIPythonMajorTable, - PyPIPythonMinorTable, - PyPIRecentTable, - PyPISystemTable, - ] - - for Table in _tables: - self._register_table(Table.name, Table(self)) - - def check_connection(self) -> StatusResponse: - response = StatusResponse(False) - checking = PyPI.is_connected() - if checking["status"]: - response.success = True - else: - response.error_message = checking["message"] - - self.is_connected = True - - return response - - def connect(self) -> PyPI: - """making the connectino object - - Returns: - PyPI: pypi class as the returned value - """ - self.connection = PyPI - return self.connection - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - - Parameters - ---------- - query : str - query in a native format - - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/pypi_handler/pypi_tables.py b/mindsdb/integrations/handlers/pypi_handler/pypi_tables.py deleted file mode 100644 index 05bf956a22b..00000000000 --- a/mindsdb/integrations/handlers/pypi_handler/pypi_tables.py +++ /dev/null @@ -1,263 +0,0 @@ -from typing import List - -import pandas as pd -from mindsdb_sql_parser import ast - -from mindsdb.integrations.utilities.handlers.query_utilities import ( - SELECTQueryExecutor, - SELECTQueryParser, -) -from mindsdb.integrations.libs.api_handler import APIHandler, APITable -from mindsdb.integrations.utilities.sql_utils import conditions_to_filter - - -class CustomAPITable(APITable): - name: str = None - columns: List[str] = [ - "category", - "date", - "downloads", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.handler.connect() - - def get_columns(self, ignore: List[str] = []) -> List[str]: - """columns - - Args: - ignore (List[str], optional): exclusion items. Defaults to []. - - Returns: - List[str]: available columns with `ignore` items removed from the list. - """ - - return [item for item in self.columns if item not in ignore] - - -class PyPIRecentTable(CustomAPITable): - name: str = "recent" - columns: List[str] = [ - "last_day", - "last_week", - "last_month", - ] - - def select(self, query: ast.Select) -> pd.DataFrame: - """triggered at the SELECT query - - Args: - query (ast.Select): user's entered query - - Returns: - pd.DataFrame: the queried information - """ - params = conditions_to_filter(query.where) - - package_name = params["package"] - period = params.get("period", None) - all_cols = { - "day": "last_day", - "week": "last_week", - "month": "last_month", - } - - to_be_excluded = [] - - if period: - if period in all_cols.keys(): - del all_cols[period] - to_be_excluded = list(all_cols.values()) - else: - raise ValueError( - "Make sure that one of `day`, `week` or `month` values is assigned to `period`." - ) - - select_statement_parser = SELECTQueryParser( - query, PyPIRecentTable.name, self.get_columns(to_be_excluded) - ) - ( - selected_columns, - _, - order_by_conditions, - _, - ) = select_statement_parser.parse_query() - - raw_df = self.handler.connection(name=package_name).recent(period) - - select_statement_executor = SELECTQueryExecutor( - raw_df, selected_columns, [], order_by_conditions - ) - - result_df = select_statement_executor.execute_query() - - return result_df - - -class PyPIOverallTable(CustomAPITable): - name: str = "overall" - - def select(self, query: ast.Select) -> pd.DataFrame: - """triggered at the SELECT query - - Args: - query (ast.Select): user's entered query - - Returns: - pd.DataFrame: the queried information - """ - params = conditions_to_filter(query.where) - - package_name = params["package"] - mirrors = params.get("mirrors", None) - - select_statement_parser = SELECTQueryParser( - query, - PyPIOverallTable.name, - self.get_columns(), - ) - ( - selected_columns, - _, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - raw_df = self.handler.connection(name=package_name, limit=result_limit).overall( - mirrors=mirrors - ) - - select_statement_executor = SELECTQueryExecutor( - raw_df, selected_columns, [], order_by_conditions - ) - - result_df = select_statement_executor.execute_query() - - return result_df - - -class PyPIPythonMajorTable(CustomAPITable): - name: str = "python_major" - - def select(self, query: ast.Select) -> pd.DataFrame: - """triggered at the SELECT query - - Args: - query (ast.Select): user's entered query - - Returns: - pd.DataFrame: the queried information - """ - params = conditions_to_filter(query.where) - - package_name = params["package"] - version = params.get("version", None) - - select_statement_parser = SELECTQueryParser( - query, - PyPIOverallTable.name, - self.get_columns(), - ) - ( - selected_columns, - _, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - raw_df = self.handler.connection( - name=package_name, limit=result_limit - ).python_major(version=version) - - select_statement_executor = SELECTQueryExecutor( - raw_df, selected_columns, [], order_by_conditions - ) - - result_df = select_statement_executor.execute_query() - - return result_df - - -class PyPIPythonMinorTable(CustomAPITable): - name: str = "python_minor" - - def select(self, query: ast.Select) -> pd.DataFrame: - """triggered at the SELECT query - - Args: - query (ast.Select): user's entered query - - Returns: - pd.DataFrame: the queried information - """ - params = conditions_to_filter(query.where) - - package_name = params["package"] - version = params.get("version", None) - - select_statement_parser = SELECTQueryParser( - query, - PyPIOverallTable.name, - self.get_columns(), - ) - ( - selected_columns, - _, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - raw_df = self.handler.connection( - name=package_name, limit=result_limit - ).python_minor(version=version) - - select_statement_executor = SELECTQueryExecutor( - raw_df, selected_columns, [], order_by_conditions - ) - - result_df = select_statement_executor.execute_query() - - return result_df - - -class PyPISystemTable(CustomAPITable): - name: str = "system" - - def select(self, query: ast.Select) -> pd.DataFrame: - """triggered at the SELECT query - - Args: - query (ast.Select): user's entered query - - Returns: - pd.DataFrame: the queried information - """ - params = conditions_to_filter(query.where) - - package_name = params["package"] - os = params.get("os", None) - - select_statement_parser = SELECTQueryParser( - query, - PyPIOverallTable.name, - self.get_columns(), - ) - ( - selected_columns, - _, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - raw_df = self.handler.connection(name=package_name, limit=result_limit).system( - os=os - ) - - select_statement_executor = SELECTQueryExecutor( - raw_df, selected_columns, [], order_by_conditions - ) - - result_df = select_statement_executor.execute_query() - - return result_df diff --git a/mindsdb/integrations/handlers/qdrant_handler/README.md b/mindsdb/integrations/handlers/qdrant_handler/README.md deleted file mode 100644 index f1d85fea1c1..00000000000 --- a/mindsdb/integrations/handlers/qdrant_handler/README.md +++ /dev/null @@ -1,83 +0,0 @@ -
- - Qdrant - -

qdrant.tech handler for MindsDB

-

- -## About Qdrant πŸš€ - -A High-performance, massive-scale vector database for the next generation of AI. Also available in the cloud. - -## Implementation - -The handler uses the [qdrant-client](https://github.com/qdrant/qdrant-client) Python library to establish a connection to a Qdrant instance. - - -## Usage -To use this handler and get started with Qdrant, the following syntax can be used. -```sql -CREATE DATABASE qdrant_test -WITH ENGINE = "qdrant", -PARAMETERS = { - "location": ":memory:", - "collection_config": { - "size": 386, - "distance": "Cosine" - } -} -``` -The available arguments for instantiating Qdrant can be found [here](https://github.com/mindsdb/mindsdb/blob/23a509cb26bacae9cc22475497b8644e3f3e23c3/mindsdb/integrations/handlers/qdrant_handler/qdrant_handler.py#L408-L468). - -## Creating a new table - -- Qdrant options for creating a collection can be specified as `collection_config` in the `CREATE DATABASE` parameters. -- By default, UUIDs are set as collection IDs. You can provide your own IDs under the `id` column. -```sql -CREATE TABLE qdrant_test.test_table ( - SELECT embeddings,'{"source": "bbc"}' as metadata FROM mysql_demo_db.test_embeddings -); -``` - -## Querying the database - -#### Perform a full retrieval using the following syntax. - -```sql -SELECT * FROM qdrant_test.test_table -``` -By default, the `LIMIT` is set to 10 and the `OFFSET` is set to 0. - -#### Perform a similarity search using your embeddings, like so -```sql -SELECT * FROM qdrant_test.test_table -WHERE search_vector = (select embeddings from mysql_demo_db.test_embeddings limit 1) -``` - -#### Perform a search using filters -```sql -SELECT * FROM qdrant_test.test_table -WHERE `metadata.source` = 'bbc'; -``` - -#### Delete entries using IDs -```sql -DELETE FROM qtest.test_table_6 -WHERE id = 2 -``` - -#### Delete entries using filters -```sql -DELETE * FROM qdrant_test.test_table -WHERE `metadata.source` = 'bbc'; -``` - -#### Drop a table -```sql - DROP TABLE qdrant_test.test_table; -``` - -## NOTICE -Qdrant supports payload indexing that vastly improves retrieval efficiency with filters and is highly recommended. Please note that this feature currently cannot be configured via MindsDB and must be set up separately if needed. - -For detailed information on payload indexing, you can refer to the documentation available [here](https://qdrant.tech/documentation/concepts/indexing/#payload-index). diff --git a/mindsdb/integrations/handlers/qdrant_handler/__about__.py b/mindsdb/integrations/handlers/qdrant_handler/__about__.py deleted file mode 100644 index 8f6ae16e494..00000000000 --- a/mindsdb/integrations/handlers/qdrant_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Qdrant handler" -__package_name__ = "mindsdb_qdrant_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Qdrant" -__author__ = "Qdrant team" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/qdrant_handler/__init__.py b/mindsdb/integrations/handlers/qdrant_handler/__init__.py deleted file mode 100644 index d46e5ffbf35..00000000000 --- a/mindsdb/integrations/handlers/qdrant_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version -from .connection_args import connection_args, connection_args_example -try: - from .qdrant_handler import QdrantHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Qdrant" -name = "qdrant" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/qdrant_handler/connection_args.py b/mindsdb/integrations/handlers/qdrant_handler/connection_args.py deleted file mode 100644 index 4e58664af86..00000000000 --- a/mindsdb/integrations/handlers/qdrant_handler/connection_args.py +++ /dev/null @@ -1,75 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - location={ - "type": ARG_TYPE.STR, - "description": "If `:memory:` - use in-memory Qdrant instance. If a remote URL - connect to a remote Qdrant instance. Example: `http://localhost:6333`", - "required": False, - }, - url={ - "type": ARG_TYPE.STR, - "description": "URL of Qdrant service. Either host or a string of type [scheme]<[port][prefix]. Ex: http://localhost:6333/service/v1", - }, - host={ - "type": ARG_TYPE.STR, - "description": "Host name of Qdrant service. The port and host are used to construct the connection URL.", - "required": False, - }, - port={ - "type": ARG_TYPE.INT, - "description": "Port of the REST API interface. Default: 6333", - "required": False, - }, - grpc_port={ - "type": ARG_TYPE.INT, - "description": "Port of the gRPC interface. Default: 6334", - "required": False, - }, - prefer_grpc={ - "type": ARG_TYPE.BOOL, - "description": "If `true` - use gPRC interface whenever possible in custom methods. Default: false", - "required": False, - }, - https={ - "type": ARG_TYPE.BOOL, - "description": "If `true` - use https protocol.", - "required": False, - }, - api_key={ - "type": ARG_TYPE.PWD, - "description": "API key for authentication in Qdrant Cloud.", - "required": False, - "secret": True - }, - prefix={ - "type": ARG_TYPE.STR, - "description": "If set, the value is added to the REST URL path. Example: `service/v1` will result in `http://localhost:6333/service/v1/{qdrant-endpoint}` for REST API", - "required": False, - }, - timeout={ - "type": ARG_TYPE.INT, - "description": "Timeout for REST and gRPC API requests. Defaults to 5.0 seconds for REST and unlimited for gRPC", - "required": False, - }, - path={ - "type": ARG_TYPE.STR, - "description": "Persistence path for a local Qdrant instance.", - "required": False, - }, - collection_config={ - "type": ARG_TYPE.DICT, - "description": "Collection creation configuration. See https://qdrant.github.io/qdrant/redoc/index.html#tag/collections/operation/create_collection", - "required": True, - }, -) - -connection_args_example = { - "location": ":memory:", - "collection_config": { - "size": 386, - "distance": "Cosine" - } -} diff --git a/mindsdb/integrations/handlers/qdrant_handler/icon.svg b/mindsdb/integrations/handlers/qdrant_handler/icon.svg deleted file mode 100644 index 08617d7fa12..00000000000 --- a/mindsdb/integrations/handlers/qdrant_handler/icon.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/qdrant_handler/qdrant_handler.py b/mindsdb/integrations/handlers/qdrant_handler/qdrant_handler.py deleted file mode 100644 index cd28a262a37..00000000000 --- a/mindsdb/integrations/handlers/qdrant_handler/qdrant_handler.py +++ /dev/null @@ -1,402 +0,0 @@ -import ast -from typing import Any, List, Optional -from itertools import zip_longest - -from qdrant_client import QdrantClient, models -import pandas as pd - -from mindsdb.integrations.libs.response import HandlerResponse -from mindsdb.integrations.libs.response import RESPONSE_TYPE -from mindsdb.integrations.libs.response import HandlerResponse as Response -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse -from mindsdb.integrations.libs.vectordatabase_handler import ( - FilterCondition, - FilterOperator, - TableField, - VectorStoreHandler, -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class QdrantHandler(VectorStoreHandler): - """Handles connection and execution of the Qdrant statements.""" - - name = "qdrant" - - def __init__(self, name: str, **kwargs): - super().__init__(name) - connection_data = kwargs.get("connection_data").copy() - # Qdrant offers several configuration and optmization options at the time of collection creation - # Since the create table statement doesn't have a way to pass these options - # We are requiring the user to pass these options in the connection_data - # These options are documented here. https://qdrant.github.io/qdrant/redoc/index.html#tag/collections/operation/create_collection - self.collection_config = connection_data.pop("collection_config") - self.connect(**connection_data) - - def connect(self, **kwargs): - """Connect to a Qdrant instance. - A Qdrant client can be instantiated with a REST, GRPC interface or in-memory for testing. - To use the in-memory instance, specify the location argument as ':memory:'.""" - if self.is_connected: - return self._client - - try: - self._client = QdrantClient(**kwargs) - self.is_connected = True - return self._client - except Exception as e: - logger.error(f"Error instantiating a Qdrant client: {e}") - self.is_connected = False - - def disconnect(self): - """Close the database connection.""" - if self.is_connected: - self._client.close() - self._client = None - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """Check the connection to the Qdrant database. - - Returns: - StatusResponse: Indicates if the connection is alive - """ - need_to_close = not self.is_connected - - try: - # Using a trivial operation to get the connection status - # As there isn't a universal ping method for the REST, GRPC and in-memory interface - self._client.get_locks() - response_code = StatusResponse(True) - except Exception as e: - logger.error(f"Error connecting to a Qdrant instance: {e}") - response_code = StatusResponse(False, error_message=str(e)) - finally: - if response_code.success and need_to_close: - self.disconnect() - if not response_code.success and self.is_connected: - self.is_connected = False - - return response_code - - def drop_table(self, table_name: str, if_exists=True): - """Delete a collection from the Qdrant Instance. - - Args: - table_name (str): The name of the collection to be dropped - if_exists (bool, optional): Throws an error if this value is set to false and the collection doesn't exist. Defaults to True. - - Returns: - HandlerResponse: _description_ - """ - result = self._client.delete_collection(table_name) - if not (result or if_exists): - raise Exception(f"Table {table_name} does not exist!") - - def get_tables(self) -> HandlerResponse: - """Get the list of collections in the Qdrant instance. - - Returns: - HandlerResponse: The common query handler response with a list of table names - """ - collection_response = self._client.get_collections() - collections_name = pd.DataFrame( - columns=["table_name"], - data=[collection.name for collection in collection_response.collections], - ) - return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=collections_name) - - def get_columns(self, table_name: str) -> HandlerResponse: - try: - _ = self._client.get_collection(table_name) - except ValueError: - return Response( - resp_type=RESPONSE_TYPE.ERROR, - error_message=f"Table {table_name} does not exist!", - ) - return super().get_columns(table_name) - - def insert( - self, table_name: str, data: pd.DataFrame, columns: List[str] = None - ): - """Handler for the insert query - - Args: - table_name (str): The name of the table to be inserted into - data (pd.DataFrame): The data to be inserted - columns (List[str], optional): Columns to be inserted into. Unused as the values are derived from the "data" argument. Defaults to None. - - Returns: - HandlerResponse: The common query handler response - """ - assert len(data[TableField.ID.value]) == len(data[TableField.EMBEDDINGS.value]), "Number of ids and embeddings must be equal" - - # Qdrant doesn't have a distinction between documents and metadata - # Any data that is to be stored should be placed in the "payload" field - data = data.to_dict(orient="list") - payloads = [] - content_list = data[TableField.CONTENT.value] - if TableField.METADATA.value in data: - metadata_list = data[TableField.METADATA.value] - else: - metadata_list = [None] * len(data) - for document, metadata in zip_longest(content_list, metadata_list, fillvalue=None): - payload = {} - - # Insert the document with a "document" key in the payload - if document is not None: - payload["document"] = document - - # Unpack all the metadata fields into the payload - if metadata is not None: - if isinstance(metadata, str): - metadata = ast.literal_eval(metadata) - payload = {**payload, **metadata} - - if payload: - payloads.append(payload) - - # IDs can be either integers or strings(UUIDs) - # The following step ensures proper type of numberic values - ids = [int(id) if str(id).isdigit() else id for id in data[TableField.ID.value]] - self._client.upsert(table_name, points=models.Batch( - ids=ids, - vectors=data[TableField.EMBEDDINGS.value], - payloads=payloads - )) - - def create_table(self, table_name: str, if_not_exists=True): - """Create a collection with the given name in the Qdrant database. - - Args: - table_name (str): Name of the table(Collection) to be created - if_not_exists (bool, optional): Throws an error if this value is set to false and the collection already exists. Defaults to True. - - Returns: - HandlerResponse: The common query handler response - """ - try: - # Create a collection with the collection name and collection_config set during __init__ - self._client.create_collection(table_name, self.collection_config) - except ValueError as e: - if if_not_exists is False: - raise e - - def _get_qdrant_filter(self, operator: FilterOperator, value: Any) -> dict: - """ Map the filter operator to the Qdrant filter - We use a match and not a dict so as to conditionally construct values - With a dict, all the values the values will be constructed - Generating models.Range() with a str type value fails - - Args: - operator (FilterOperator): FilterOperator specified in the query. Eg >=, <=, = - value (Any): Value specified in the query - - Raises: - Exception: If an unsupported operator is specified - - Returns: - dict: A dict of Qdrant filtering clauses - """ - if operator == FilterOperator.EQUAL: - return {"match": models.MatchValue(value=value)} - elif operator == FilterOperator.NOT_EQUAL: - return {"match": models.MatchExcept(**{"except": [value]})} - elif operator == FilterOperator.LESS_THAN: - return {"range": models.Range(lt=value)} - elif operator == FilterOperator.LESS_THAN_OR_EQUAL: - return {"range": models.Range(lte=value)} - elif operator == FilterOperator.GREATER_THAN: - return {"range": models.Range(gt=value)} - elif operator == FilterOperator.GREATER_THAN_OR_EQUAL: - return {"range": models.Range(gte=value)} - else: - raise Exception(f"Operator {operator} is not supported by Qdrant!") - - def _translate_filter_conditions( - self, conditions: List[FilterCondition] - ) -> Optional[dict]: - """ - Translate a list of FilterCondition objects a dict that can be used by Qdrant. - Filtering clause docs can be found here: https://qdrant.tech/documentation/concepts/filtering/ - E.g., - [ - FilterCondition( - column="metadata.created_at", - op=FilterOperator.LESS_THAN, - value=7132423, - ), - FilterCondition( - column="metadata.created_at", - op=FilterOperator.GREATER_THAN, - value=2323432, - ) - ] - --> - models.Filter( - must=[ - models.FieldCondition( - key="created_at", - match=models.Range(lt=7132423), - ), - models.FieldCondition( - key="created_at", - match=models.Range(gt=2323432), - ), - ] - ) - """ - # We ignore all non-metadata conditions - if conditions is None: - return None - filter_conditions = [ - condition - for condition in conditions - if condition.column.startswith(TableField.METADATA.value) - ] - if len(filter_conditions) == 0: - return None - - qdrant_filters = [] - for condition in filter_conditions: - payload_key = condition.column.split(".")[-1] - qdrant_filters.append( - models.FieldCondition(key=payload_key, **self._get_qdrant_filter(condition.op, condition.value)) - ) - - return models.Filter(must=qdrant_filters) if qdrant_filters else None - - def update( - self, table_name: str, data: pd.DataFrame, columns: List[str] = None - ): - # insert makes upsert - return self.insert(table_name, data) - - def select(self, table_name: str, columns: Optional[List[str]] = None, - conditions: Optional[List[FilterCondition]] = None, offset: int = 0, limit: int = 10) -> pd.DataFrame: - """Select query handler - Eg: SELECT * FROM qdrant.test_table - - Args: - table_name (str): The name of the table to be queried - columns (Optional[List[str]], optional): List of column names specified in the query. Defaults to None. - conditions (Optional[List[FilterCondition]], optional): List of "where" conditionals. Defaults to None. - offset (int, optional): Offset the results by the provided value. Defaults to 0. - limit (int, optional): Number of results to return. Defaults to 10. - - Returns: - HandlerResponse: The common query handler response - """ - - # Validate and set offset and limit as None is passed if not set in the query - offset = offset if offset is not None else 0 - limit = limit if limit is not None else 10 - - # Full scroll if no where conditions are specified - if not conditions: - results = self._client.scroll(table_name, limit=limit, offset=offset) - payload = self._process_select_results(results[0], columns) - return payload - - # Filter conditions - vector_filter = [condition.value for condition in conditions if condition.column == TableField.SEARCH_VECTOR.value] - id_filters = [condition.value for condition in conditions if condition.column == TableField.ID.value] - query_filters = self._translate_filter_conditions(conditions) - - # Prefer returning results by IDs first - if id_filters: - - if len(id_filters) > 0: - # is wrapped to a list - if isinstance(id_filters[0], list): - id_filters = id_filters[0] - # convert to int if possible - id_filters = [int(id) if isinstance(id, str) and id.isdigit() else id for id in id_filters] - - results = self._client.retrieve(table_name, ids=id_filters) - # Followed by the search_vector value - elif vector_filter: - # Perform a similarity search with the first vector filter - results = self._client.search(table_name, query_vector=vector_filter[0], query_filter=query_filters or None, limit=limit, offset=offset) - elif query_filters: - results = self._client.scroll(table_name, scroll_filter=query_filters, limit=limit, offset=offset)[0] - - # Process results - payload = self._process_select_results(results, columns) - return payload - - def _process_select_results(self, results=None, columns=None): - """Private method to process the results of a select query - - Args: - results: A List[Records] or List[ScoredPoint]. Defaults to None - columns: List of column names specified in the query. Defaults to None - - Returns: - Dataframe: A processed pandas dataframe - """ - ids, documents, metadata, distances = [], [], [], [] - - for result in results: - ids.append(result.id) - # The documents and metadata are stored as a dict in the payload - documents.append(result.payload["document"]) - metadata.append({k: v for k, v in result.payload.items() if k != "document"}) - - # Score is only available for similarity search results - if "score" in result: - distances.append(result.score) - - payload = { - TableField.ID.value: ids, - TableField.CONTENT.value: documents, - TableField.METADATA.value: metadata, - } - - # Filter result columns - if columns: - payload = { - column: payload[column] - for column in columns - if column != TableField.EMBEDDINGS.value and column in payload - } - - # If the distance list is empty, don't add it to the result - if distances: - payload[TableField.DISTANCE.value] = distances - - return pd.DataFrame(payload) - - def delete( - self, table_name: str, conditions: List[FilterCondition] = None - ): - """Delete query handler - - Args: - table_name (str): List of column names specified in the query. Defaults to None. - conditions (List[FilterCondition], optional): List of "where" conditionals. Defaults to None. - - Raises: - Exception: If no conditions are specified - - Returns: - HandlerResponse: The common query handler response - """ - filters = self._translate_filter_conditions(conditions) - # Get id filters - ids = [ - condition.value - for condition in conditions - if condition.column == TableField.ID.value - ] or None - - if filters is None and ids is None: - raise Exception("Delete query must have at least one condition!") - - if ids: - self._client.delete(table_name, points_selector=models.PointIdsList(points=ids)) - - if filters: - self._client.delete(table_name, points_selector=models.FilterSelector(filter=filters)) diff --git a/mindsdb/integrations/handlers/qdrant_handler/requirements.txt b/mindsdb/integrations/handlers/qdrant_handler/requirements.txt deleted file mode 100644 index 0ba481f5871..00000000000 --- a/mindsdb/integrations/handlers/qdrant_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -qdrant-client -urllib3>=2.6.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/mindsdb/integrations/handlers/questdb_handler/__about__.py b/mindsdb/integrations/handlers/questdb_handler/__about__.py deleted file mode 100644 index fb4c7e4a057..00000000000 --- a/mindsdb/integrations/handlers/questdb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB QuestDB handler' -__package_name__ = 'mindsdb_questdb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for QuestDB" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/questdb_handler/__init__.py b/mindsdb/integrations/handlers/questdb_handler/__init__.py deleted file mode 100644 index 04ddc2c8807..00000000000 --- a/mindsdb/integrations/handlers/questdb_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .questdb_handler import QuestDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - - -title = 'QuestDB' -name = 'questdb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', - 'description', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/questdb_handler/icon.svg b/mindsdb/integrations/handlers/questdb_handler/icon.svg deleted file mode 100644 index a504d5e24a7..00000000000 --- a/mindsdb/integrations/handlers/questdb_handler/icon.svg +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/questdb_handler/questdb_handler.py b/mindsdb/integrations/handlers/questdb_handler/questdb_handler.py deleted file mode 100644 index 78b15821eeb..00000000000 --- a/mindsdb/integrations/handlers/questdb_handler/questdb_handler.py +++ /dev/null @@ -1,71 +0,0 @@ -import pandas as pd -import numpy as np - -from questdb.ingress import Sender - -from mindsdb.integrations.handlers.postgres_handler import Handler as PostgresHandler -from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, - RESPONSE_TYPE -) -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class QuestDBHandler(PostgresHandler): - """ - This handler handles connection and execution of the QuestDB statements. - TODO: check the dialect for questdb - """ - name = 'questdb' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) - - def get_tables(self): - """ - List all tabels in QuestDB - """ - query = "SHOW TABLES" - response = super().native_query(query) - return response - - def get_columns(self, table_name): - """ - List information about the table - """ - query = f"SELECT * FROM tables() WHERE name='{table_name}';" - response = super().native_query(query) - return response - - def qdb_connect(self): - args = self.connection_args - conf = f"http::addr={args['host']}:9000;username={args['user']};password={args['password']};" - return Sender.from_conf(conf) - - def insert(self, table_name: str, df: pd.DataFrame): - - with self.qdb_connect() as sender: - try: - # find datetime column - at_col = None - for col, dtype in df.dtypes.items(): - if np.issubdtype(dtype, np.datetime64): - at_col = col - if at_col is None: - raise Exception(f'Unable to find datetime column: {df.dtypes}') - - sender.dataframe(df, table_name=table_name, at=at_col) - response = Response(RESPONSE_TYPE.OK) - - except Exception as e: - logger.error(f'Error running insert to {table_name} on {self.database}, {e}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_code=0, - error_message=str(e) - ) - - return response diff --git a/mindsdb/integrations/handlers/questdb_handler/requirements.txt b/mindsdb/integrations/handlers/questdb_handler/requirements.txt deleted file mode 100644 index 3c72ed894fc..00000000000 --- a/mindsdb/integrations/handlers/questdb_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -questdb diff --git a/mindsdb/integrations/handlers/questdb_handler/tests/__init__.py b/mindsdb/integrations/handlers/questdb_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/questdb_handler/tests/test_questdb_handler.py b/mindsdb/integrations/handlers/questdb_handler/tests/test_questdb_handler.py deleted file mode 100644 index 9b214a31697..00000000000 --- a/mindsdb/integrations/handlers/questdb_handler/tests/test_questdb_handler.py +++ /dev/null @@ -1,34 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.questdb_handler.questdb_handler import QuestDBHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class QuestDBHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "127.0.0.1", - "port": "8812", - "user": "admin", - "password": "quest", - "database": "questdb" - } - } - cls.handler = QuestDBHandler('test_questdb_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_describe_table(self): - described = self.handler.describe_table("house_rentals_data") - assert described['type'] is not RESPONSE_TYPE.ERROR - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables['type'] is not RESPONSE_TYPE.ERROR - - def test_3_select_query(self): - query = "SELECT * FROM house_rentals_data WHERE 'id'='1'" - result = self.handler.query(query) - assert len(result) > 0 diff --git a/mindsdb/integrations/handlers/quickbooks_handler/README.md b/mindsdb/integrations/handlers/quickbooks_handler/README.md deleted file mode 100644 index a5afb4230a6..00000000000 --- a/mindsdb/integrations/handlers/quickbooks_handler/README.md +++ /dev/null @@ -1,65 +0,0 @@ -# Quickbooks Handler - -Quickbooks handler for MindsDB provides interfaces to connect to Quickbooks via APIs and pull data into MindsDB. - ---- - -## Table of Contents - -- [About Quickbooks](#about-Quickbooks) - - [Quickbooks Handler Implementation](#Quickbooks-handler-implementation) - - [Implemented Features](#implemented-features) - - [TODO](#todo) - - [Example Usage](#example-usage) ---- -## About Quickbooks - -Quickbooks is an accounting software package developed and marketed by Intuit. Quickbooks products are geared mainly toward small and medium-sized businesses and offer on-premises accounting applications as well as cloud-based versions that accept business payments, manage and pay bills, and payroll functions. - -## Quickbooks Handler Implementation - -This handler was implemented using the official Quickbooks API. It provides a simple and easy-to-use interface to access the Quickbooks API. - - -## Implemented Features - -- Fetch the following TABLES - - vendors - - employees - - purchases - - accounts - - bills - - bill_payments - -## TODO - -- (List any pending features or improvements here) - -## Example Usage -``` -CREATE DATABASE my_qboo -With - ENGINE = "quickbooks", - PARAMETERS = { - "client_id": "", - "client_secret": "", - "realm_id":"", - "refresh_token":"", - "environment":'' - }; - -``` - -After setting up the Quickbooks Handler, you can use SQL queries to fetch data from Quickbooks: - -```sql -SELECT * -FROM my_qboo.vendors; -``` - -To fetch data from the employees table: - -```sql -SELECT * -FROM my_qboo.employees; -``` diff --git a/mindsdb/integrations/handlers/quickbooks_handler/__about__.py b/mindsdb/integrations/handlers/quickbooks_handler/__about__.py deleted file mode 100644 index 7d7af16128e..00000000000 --- a/mindsdb/integrations/handlers/quickbooks_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Quickbooks handler' -__package_name__ = 'mindsdb_quickbooks_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Quickbooks" -__author__ = 'Maro Akpobi' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/quickbooks_handler/__init__.py b/mindsdb/integrations/handlers/quickbooks_handler/__init__.py deleted file mode 100644 index 958c44792c4..00000000000 --- a/mindsdb/integrations/handlers/quickbooks_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .quickbooks_handler import ( - QuickbooksHandler as Handler - ) - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'QuickBooks' -name = 'quickbooks' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/quickbooks_handler/icon.svg b/mindsdb/integrations/handlers/quickbooks_handler/icon.svg deleted file mode 100644 index d4b6845dbf4..00000000000 --- a/mindsdb/integrations/handlers/quickbooks_handler/icon.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/quickbooks_handler/quickbooks_handler.py b/mindsdb/integrations/handlers/quickbooks_handler/quickbooks_handler.py deleted file mode 100644 index f321b0def15..00000000000 --- a/mindsdb/integrations/handlers/quickbooks_handler/quickbooks_handler.py +++ /dev/null @@ -1,95 +0,0 @@ -import os -from qbosdk import QuickbooksOnlineSDK -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse -) -from mindsdb.utilities.config import Config -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql -from .quickbooks_table import AccountsTable, PurchasesTable, BillPaymentsTable, VendorsTable, BillsTable, EmployeesTable - -logger = log.getLogger(__name__) - - -class QuickbooksHandler(APIHandler): - """ - A class for handling connections and interactions with Quickbooks API. - """ - - def __init__(self, name=None, **kwargs): - super().__init__(name) - - args = kwargs.get('connection_data', {}) - - self.connection_args = {} - handler_config = Config().get('quickbooks_handler', {}) - for k in ['client_id', 'client_secret', 'refresh_token', 'realm_id', 'environment']: - if k in args: - self.connection_args[k] = args[k] - elif f'QUICKBOOKS_{k.upper()}' in os.environ: - self.connection_args[k] = os.environ[f'QUICKBOOKS_{k.upper()}'] - elif k in handler_config: - self.connection_args[k] = handler_config[k] - - self.quickbooks = None - self.is_connected = False - - accountso = AccountsTable(self) - self._register_table('accountso', accountso) - purchases = PurchasesTable(self) - self._register_table('purchases', purchases) - bills_payments = BillPaymentsTable(self) - self._register_table('bills_payments', bills_payments) - vendors = VendorsTable(self) - self._register_table('vendors', vendors) - bills = BillsTable(self) - self._register_table('bills', bills) - employees = EmployeesTable(self) - self._register_table('employees', employees) - - def connect(self): - if self.is_connected is True: - return self.quickbooks - - self.quickbooks = QuickbooksOnlineSDK( - client_id=self.connection_args['client_id'], - client_secret=self.connection_args['client_secret'], - realm_id=self.connection_args['realm_id'], - refresh_token=self.connection_args['refresh_token'], - environment=self.connection_args['environment'] - ) - self.is_connected = True - return self.quickbooks - - def check_connection(self) -> StatusResponse: - response = StatusResponse(False) - - try: - quickbooks = self.connect() - quickbooks.accounts.get() - logger.info(quickbooks.accounts.get()) - response.success = True - - except Exception as e: - response.error_message = f'Error connecting to Quickbooks API: {e}. ' - logger.error(response.error_message) - - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - Parameters - ---------- - query : str - query in a native format - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/quickbooks_handler/quickbooks_table.py b/mindsdb/integrations/handlers/quickbooks_handler/quickbooks_table.py deleted file mode 100644 index 02344198094..00000000000 --- a/mindsdb/integrations/handlers/quickbooks_handler/quickbooks_table.py +++ /dev/null @@ -1,374 +0,0 @@ -import pandas as pd -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb_sql_parser import ast - - -class AccountsTable(APITable): - - def flatten_dict(self, data: dict, prefix: str = ""): - flat_data = {} - for key, value in data.items(): - if isinstance(value, dict): - flattened_sub_dict = self.flatten_dict(value, f"{key}_") - flat_data.update(flattened_sub_dict) - else: - flat_data[f"{prefix}{key}"] = value - return flat_data - - def select(self, query: ast.Select) -> pd.DataFrame: - qbo = self.handler.connect() - accounts_data = qbo.accounts.get() - flattened_accounts_data = [self.flatten_dict(account) for account in accounts_data] - result = pd.DataFrame(flattened_accounts_data) - self.filter_columns(result, query) - return result - - def get_columns(self): - return [ - 'Name', - 'SubAccount', - 'FullyQualifiedName', - 'Active', - 'Classification', - 'AccountType', - 'AccountSubType', - 'CurrentBalance', - 'CurrentBalanceWithSubAccounts', - 'CurrencyRef_value', - 'CurrencyRef_name', - 'domain', - 'sparse', - 'Id', - 'SyncToken', - 'MetaData_CreateTime', - 'MetaData_LastUpdatedTime', - ] - - def filter_columns(self, result: pd.DataFrame, query: ast.Select = None): - columns = [] - if query is not None: - for target in query.targets: - if isinstance(target, ast.Star): - columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - columns.append(target.value) - if len(columns) > 0: - result = result[columns] - - -class PurchasesTable(APITable): - - def flatten_dict(self, data: dict, prefix: str = ""): - flat_data = {} - for key, value in data.items(): - if isinstance(value, dict): - flattened_sub_dict = self.flatten_dict(value, f"{key}_") - flat_data.update(flattened_sub_dict) - elif isinstance(value, list): - for i, item in enumerate(value): - if isinstance(item, dict): - flattened_sub_dict = self.flatten_dict(item, f"{key}_{i}_") - flat_data.update(flattened_sub_dict) - else: - flat_data[f"{prefix}{key}_{i}"] = item - else: - flat_data[f"{prefix}{key}"] = value - return flat_data - - def select(self, query: ast.Select) -> pd.DataFrame: - qbo = self.handler.connect() - purchases_data = qbo.purchases.get() - flattened_purchases_data = [self.flatten_dict(purchase) for purchase in purchases_data] - result = pd.DataFrame(flattened_purchases_data) - self.filter_columns(result, query) - return result - - def get_columns(self): - return [ - 'AccountRef_value', - 'AccountRef_name', - 'PaymentType', - 'Credit', - 'TotalAmt', - 'domain', - 'sparse', - 'Id', - 'SyncToken', - 'MetaData_CreateTime', - 'MetaData_LastUpdatedTime', - 'TxnDate', - 'CurrencyRef_value', - 'CurrencyRef_name', - 'EntityRef_value', - 'EntityRef_name', - 'EntityRef_type', - 'Line_0_Id', - 'Line_0_Amount', - 'Line_0_DetailType' - # Add more columns for additional line items if needed - ] - - def filter_columns(self, result: pd.DataFrame, query: ast.Select = None): - columns = [] - if query is not None: - for target in query.targets: - if isinstance(target, ast.Star): - columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - columns.append(target.value) - if len(columns) > 0: - result = result[columns] - - -class BillPaymentsTable(APITable): - def flatten_dict(self, data: dict, prefix: str = ""): - flat_data = {} - for key, value in data.items(): - if isinstance(value, dict): - flattened_sub_dict = self.flatten_dict(value, f"{prefix}{key}_") - flat_data.update(flattened_sub_dict) - elif isinstance(value, list): - for i, item in enumerate(value): - if isinstance(item, dict): - flattened_sub_dict = self.flatten_dict(item, f"{prefix}{key}_{i}_") - flat_data.update(flattened_sub_dict) - else: - flat_data[f"{prefix}{key}_{i}"] = item - else: - flat_data[prefix + key] = value - return flat_data - - def select(self, query: ast.Select) -> pd.DataFrame: - qbo = self.handler.connect() - billpayments_data = qbo.bill_payments.get() - flattened_billpayments_data = [self.flatten_dict(bp) for bp in billpayments_data] - result = pd.DataFrame(flattened_billpayments_data) - self.filter_columns(result, query) - return result - - def get_columns(self): - return [ - 'VendorRef_value', - 'VendorRef_name', - 'PayType', - 'CreditCardPayment_CCAccountRef_value', - 'CreditCardPayment_CCAccountRef_name', - 'CheckPayment_BankAccountRef_value', - 'CheckPayment_BankAccountRef_name', - 'TotalAmt', - 'Id', - 'SyncToken', - 'MetaData_CreateTime', - 'MetaData_LastUpdatedTime', - 'DocNumber', - 'TxnDate', - 'CurrencyRef_value', - 'CurrencyRef_name', - 'Line_0_Amount', - 'Line_0_LinkedTxn_0_TxnId', - 'Line_0_LinkedTxn_0_TxnType', - 'domain', - 'sparse', - ] - - def filter_columns(self, result: pd.DataFrame, query: ast.Select = None): - columns = [] - if query is not None: - for target in query.targets: - if isinstance(target, ast.Star): - columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - columns.append(target.value) - if len(columns) > 0: - result = result[columns] - - -class VendorsTable(APITable): - - def flatten_dict(self, data: dict, prefix: str = ""): - flat_data = {} - for key, value in data.items(): - if isinstance(value, dict): - flattened_sub_dict = self.flatten_dict(value, f"{key}_") - flat_data.update(flattened_sub_dict) - else: - flat_data[f"{prefix}{key}"] = value - return flat_data - - def select(self, query: ast.Select) -> pd.DataFrame: - qbo = self.handler.connect() - vendors_data = qbo.vendors.get() - flattened_vendors_data = [self.flatten_dict(vendor) for vendor in vendors_data] - result = pd.DataFrame(flattened_vendors_data) - self.filter_columns(result, query) - return result - - def get_columns(self): - return [ - 'Balance', - 'Vendor1099', - 'CurrencyRef_value', - 'CurrencyRef_name', - 'Id', - 'SyncToken', - 'MetaData_CreateTime', - 'MetaData_LastUpdatedTime', - 'DisplayName', - 'PrintOnCheckName', - 'Active', - 'domain', - 'sparse' - ] - - def filter_columns(self, result: pd.DataFrame, query: ast.Select = None): - columns = [] - if query is not None: - for target in query.targets: - if isinstance(target, ast.Star): - columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - columns.append(target.value) - if len(columns) > 0: - result = result[columns] - - -class BillsTable(APITable): - - def flatten_dict(self, data: dict, prefix: str = ""): - flat_data = {} - for key, value in data.items(): - if isinstance(value, dict): - flattened_sub_dict = self.flatten_dict(value, f"{prefix}{key}_") - flat_data.update(flattened_sub_dict) - elif isinstance(value, list): - for i, item in enumerate(value): - if isinstance(item, dict): - flattened_sub_dict = self.flatten_dict(item, f"{prefix}{key}_{i}_") - flat_data.update(flattened_sub_dict) - else: - flat_data[f"{prefix}{key}_{i}"] = item - else: - flat_data[prefix + key] = value - return flat_data - - def select(self, query: ast.Select) -> pd.DataFrame: - qbo = self.handler.connect() - bills_data = qbo.bills.get() - flattened_bills_data = [self.flatten_dict(bill) for bill in bills_data] - result = pd.DataFrame(flattened_bills_data) - self.filter_columns(result, query) - return result - - def get_columns(self): - return [ - 'DueDate', - 'Balance', - 'Id', - 'SyncToken', - 'MetaData_CreateTime', - 'MetaData_LastUpdatedTime', - 'TxnDate', - 'CurrencyRef_value', - 'CurrencyRef_name', - 'VendorRef_value', - 'VendorRef_name', - 'APAccountRef_value', - 'APAccountRef_name', - 'TotalAmt', - 'Line_0_Id', - 'Line_0_LineNum', - 'Line_0_Description', - 'Line_0_Amount', - 'Line_0_DetailType', - 'Line_0_ItemBasedExpenseLineDetail_BillableStatus', - 'Line_0_ItemBasedExpenseLineDetail_ItemRef_value', - 'Line_0_ItemBasedExpenseLineDetail_ItemRef_name', - 'Line_0_ItemBasedExpenseLineDetail_UnitPrice', - 'Line_0_ItemBasedExpenseLineDetail_Qty', - 'Line_1_Id', - 'Line_1_LineNum', - 'Line_1_Description', - 'Line_1_Amount', - 'Line_1_DetailType', - 'Line_1_ItemBasedExpenseLineDetail_BillableStatus', - 'Line_1_ItemBasedExpenseLineDetail_ItemRef_value', - 'Line_1_ItemBasedExpenseLineDetail_ItemRef_name', - 'Line_1_ItemBasedExpenseLineDetail_UnitPrice', - 'Line_1_ItemBasedExpenseLineDetail_Qty', - 'domain', - 'sparse' - ] - - def filter_columns(self, result: pd.DataFrame, query: ast.Select = None): - columns = [] - if query is not None: - for target in query.targets: - if isinstance(target, ast.Star): - columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - columns.append(target.value) - if len(columns) > 0: - result = result[columns] - - -class EmployeesTable(APITable): - - def flatten_dict(self, data: dict, prefix: str = ""): - flat_data = {} - for key, value in data.items(): - if isinstance(value, dict): - flattened_sub_dict = self.flatten_dict(value, f"{prefix}{key}_") - flat_data.update(flattened_sub_dict) - elif isinstance(value, list): - for i, item in enumerate(value): - if isinstance(item, dict): - flattened_sub_dict = self.flatten_dict(item, f"{prefix}{key}_{i}_") - flat_data.update(flattened_sub_dict) - else: - flat_data[f"{prefix}{key}_{i}"] = item - else: - flat_data[prefix + key] = value - return flat_data - - def select(self, query: ast.Select) -> pd.DataFrame: - qbo = self.handler.connect() - employees_data = qbo.employees.get() - flattened_employees_data = [self.flatten_dict(employee) for employee in employees_data] - result = pd.DataFrame(flattened_employees_data) - self.filter_columns(result, query) - return result - - def get_columns(self): - return [ - 'BillableTime', - 'Id', - 'SyncToken', - 'MetaData_CreateTime', - 'MetaData_LastUpdatedTime', - 'GivenName', - 'FamilyName', - 'DisplayName', - 'PrintOnCheckName', - 'Active', - 'PrimaryPhone_FreeFormNumber', - 'HiredDate', - 'domain', - 'sparse' - ] - - def filter_columns(self, result: pd.DataFrame, query: ast.Select = None): - columns = [] - if query is not None: - for target in query.targets: - if isinstance(target, ast.Star): - columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - columns.append(target.value) - if len(columns) > 0: - result = result[columns] diff --git a/mindsdb/integrations/handlers/quickbooks_handler/requirements.txt b/mindsdb/integrations/handlers/quickbooks_handler/requirements.txt deleted file mode 100644 index cf4f4b95815..00000000000 --- a/mindsdb/integrations/handlers/quickbooks_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -qbosdk \ No newline at end of file diff --git a/mindsdb/integrations/handlers/ray_serve_handler/__about__.py b/mindsdb/integrations/handlers/ray_serve_handler/__about__.py deleted file mode 100644 index 9b4ca6ff8c3..00000000000 --- a/mindsdb/integrations/handlers/ray_serve_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Ray Serve handler' -__package_name__ = 'mindsdb_ray_serve_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Ray Serve" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/ray_serve_handler/__init__.py b/mindsdb/integrations/handlers/ray_serve_handler/__init__.py deleted file mode 100644 index 25daf9b4410..00000000000 --- a/mindsdb/integrations/handlers/ray_serve_handler/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE -from mindsdb.integrations.handlers.ray_serve_handler.__about__ import __version__ as version, __description__ as description -try: - from mindsdb.integrations.handlers.ray_serve_handler.ray_serve_handler import RayServeHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Ray Serve' -name = 'ray_serve' -type = HANDLER_TYPE.ML -icon_path = 'icon.svg' -permanent = False - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/ray_serve_handler/example.md b/mindsdb/integrations/handlers/ray_serve_handler/example.md deleted file mode 100644 index c3ffedccd5b..00000000000 --- a/mindsdb/integrations/handlers/ray_serve_handler/example.md +++ /dev/null @@ -1,175 +0,0 @@ -# Preliminaries - -To use Ray Serve models through MindsDB, you need to create a file that uses Ray Serve to deploy both training and inference endpoints for MindsDB to hook into. - -There are several (slightly different) ways to achieve this, but we recommend the approach used in our [docs page](https://docs.mindsdb.com/custom-model/ray-serve): - -```python -import ray -from fastapi import Request, FastAPI -from ray import serve - -# [other model-specific imports here] - -app = FastAPI() -ray.init() -serve.start(detached=True) - - -async def parse_req(request: Request): - """Parse a json payload from a post request into a dataframe and target column.""""" - data = await request.json() - target = data.get('target', None) - di = json.loads(data['df']) - df = pd.DataFrame(di) - return df, target - - -@serve.deployment(route_prefix="/my_model") -@serve.ingress(app) -class MyModel: - @app.post("/train") - async def train(self, request: Request): - df, target = await parse_req(request) - # [...model fitting logic here] - return {'status': 'ok'} - - @app.post("/predict") - async def predict(self, request: Request): - df, _ = await parse_req(request) - # [...model inference logic here] - pred_dict = {'prediction': [x for x in predictions]} - return pred_dict - - -if __name__ == '__main__': - MyModel.deploy() - while True: - time.sleep(1) -``` - -Once your script looks similar to the one above, save it (e.g. as `model.py`) and setup the server by simply running the file: - -```bash -python model.py -``` - -# MindsDB commands - -To create the engine: - -```sql -CREATE ML_ENGINE rayserve FROM ray_serve; -``` - -While the Ray Serve is active, you can use the following command to create the model inside MindsDB by triggering the train endpoint: - -```sql -CREATE MODEL mindsdb.rayserve_model -FROM integration_name (SELECT * FROM table_name) -PREDICT target -USING -engine='ray_serve', -train_url='http://ray_serve_url:port/my_model/train', -predict_url='http://ray_serve_url:port/my_model/predict'; -``` - -In a local deployment, `ray_serve_url = localhost` and `port = 8000`. The `my_model` part of the URL is the `route_prefix` in the script above. - -Once the model is created, you should wait until the training process finishes. - -Then, you can query it as any other MindsDB model: -```sql -SELECT input_col, target_col -FROM rayserve_model -WHERE input_col=some_value; -- could also use a JOIN here, as usual - -DESCRIBE rayserve_model; -``` - -# End to end example - -Here, we take a look at a simple linear regression done on a single feature to predict a numerical target. - -`model.py` looks like this: - -```python -import ray -from fastapi import Request, FastAPI -from ray import serve -import time -import pandas as pd -import json -from sklearn.linear_model import LogisticRegression - - -app = FastAPI() -ray.init() -serve.start(detached=True) - - -async def parse_req(request: Request): - data = await request.json() - target = data.get('target', None) - di = json.loads(data['df']) - df = pd.DataFrame(di) - return df, target - - -@serve.deployment(route_prefix="/my_model") -@serve.ingress(app) -class MyModel: - @app.post("/train") - async def train(self, request: Request): - df, target = await parse_req(request) - feature_cols = list(set(list(df.columns)) - set([target])) - self.feature_cols = feature_cols - X = df.loc[:, self.feature_cols] - Y = list(df[target]) - self.model = LogisticRegression() - self.model.fit(X, Y) - return {'status': 'ok'} - - @app.post("/predict") - async def predict(self, request: Request): - df, _ = await parse_req(request) - X = df.loc[:, self.feature_cols] - predictions = self.model.predict(X) - pred_dict = {'prediction': [float(x) for x in predictions]} - return pred_dict - - -if __name__ == '__main__': - MyModel.deploy() - - while True: - time.sleep(1) -``` - -And the MindsDB commands to train and query the model are: - -```sql -CREATE ML_ENGINE rayserve FROM ray_serve; -- assumes user hasn't registered the engine prior to this example - -SELECT sqft, rental_price FROM example_db.demo_data.home_rentals LIMIT 10; -- toy dataset. we'll use `sqft` as the input feature and `rental_price` as the target - -CREATE MODEL mindsdb.rayserve_model -FROM example_db (SELECT sqft, rental_price FROM demo_data.home_rentals LIMIT 10) -PREDICT rental_price -USING -engine='ray_serve', -train_url='http://127.0.0.1:8000/my_model/train', -predict_url='http://127.0.0.1:8000/my_model/predict'; - -SELECT sqft, rental_price -FROM rayserve_model -WHERE sqft=917; - -DESCRIBE rayserve_model; -``` - -By the end of the entire process, you should get back a response with the predicted `rental_price`: - -| sqft | rental_price | -|------|--------------| -| 917 | 3901 | \ No newline at end of file diff --git a/mindsdb/integrations/handlers/ray_serve_handler/icon.svg b/mindsdb/integrations/handlers/ray_serve_handler/icon.svg deleted file mode 100644 index 2a11028288e..00000000000 --- a/mindsdb/integrations/handlers/ray_serve_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py b/mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py deleted file mode 100644 index 9abca34a885..00000000000 --- a/mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +++ /dev/null @@ -1,117 +0,0 @@ -import io -import json - -import requests -from typing import Dict, Optional - -import pandas as pd -import pyarrow.parquet as pq - -from mindsdb.integrations.libs.base import BaseMLEngine - - -class RayServeException(Exception): - pass - - -class RayServeHandler(BaseMLEngine): - """ - The Ray Serve integration engine needs to have a working connection to Ray Serve. For this: - - A Ray Serve server should be running - - Example: - - """ # noqa - name = 'ray_serve' - - @staticmethod - def create_validation(target, args=None, **kwargs): - if not args.get('using'): - raise Exception("Error: This engine requires some parameters via the 'using' clause. Please refer to the documentation of the Ray Serve handler and try again.") # noqa - if not args['using'].get('train_url'): - raise Exception("Error: Please provide a URL for the training endpoint.") - if not args['using'].get('predict_url'): - raise Exception("Error: Please provide a URL for the prediction endpoint.") - - def create(self, target: str, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None: - # TODO: use join_learn_process to notify users when ray has finished the training process - args = args['using'] # ignore the rest of the problem definition - args['target'] = target - self.model_storage.json_set('args', args) - try: - if args.get('is_parquet', False): - buffer = io.BytesIO() - df.to_parquet(buffer) - resp = requests.post(args['train_url'], - files={"df": ("df", buffer.getvalue(), "application/octet-stream")}, - data={"args": json.dumps(args), "target": target}, - ) - else: - resp = requests.post(args['train_url'], - json={'df': df.to_json(orient='records'), 'target': target, 'args': args}, - headers={'content-type': 'application/json; format=pandas-records'}) - except requests.exceptions.InvalidSchema: - raise Exception("Error: The URL provided for the training endpoint is invalid.") - - error = None - try: - resp = resp.json() - except json.JSONDecodeError: - error = resp.text - else: - if resp.get('status') != 'ok': - error = resp['status'] - - if error: - raise RayServeException(f"Error: {error}") - - def predict(self, df, args=None): - args = {**(self.model_storage.json_get('args')), **args} # merge incoming args - pred_args = args.get('predict_params', {}) - args = {**args, **pred_args} # merge pred_args - if args.get('is_parquet', False): - buffer = io.BytesIO() - df.attrs['pred_args'] = pred_args - df.to_parquet(buffer) - resp = requests.post(args['predict_url'], - files={"df": ("df", buffer.getvalue(), "application/octet-stream")}, - data={"pred_args": json.dumps(pred_args)}, - ) - else: - resp = requests.post(args['predict_url'], - json={'df': df.to_json(orient='records'), 'pred_args': pred_args}, - headers={'content-type': 'application/json; format=pandas-records'}) - content_type = resp.headers.get("Content-Type", "") - if "application/octet-stream" in content_type: - try: - buffer = io.BytesIO(resp.content) - table = pq.read_table(buffer) - response = table.to_pandas() - except Exception: - error = 'Could not decode parquet.' - else: - try: - response = resp.json() - except json.JSONDecodeError: - error = resp.text - - if 'prediction' in response: - target = args['target'] - if target != 'prediction': - # rename prediction to target - response[target] = response.pop('prediction') - return pd.DataFrame(response) - else: - # something wrong - error = response - - raise RayServeException(f"Error: {error}") - - def describe(self, key: Optional[str] = None) -> pd.DataFrame: - args = self.model_storage.json_get('args') - description = { - 'TRAIN_URL': [args['train_url']], - 'PREDICT_URL': [args['predict_url']], - 'TARGET': [args['target']], - } - return pd.DataFrame.from_dict(description) diff --git a/mindsdb/integrations/handlers/reddit_handler/README.md b/mindsdb/integrations/handlers/reddit_handler/README.md deleted file mode 100644 index cd774178c89..00000000000 --- a/mindsdb/integrations/handlers/reddit_handler/README.md +++ /dev/null @@ -1,82 +0,0 @@ -# Reddit Handler - -Reddit handler for MindsDB provides interfaces to connect to Reddit via APIs and pull data into MindsDB. - ---- - -## Table of Contents - -- [Reddit Handler](#reddit-handler) - - [Table of Contents](#table-of-contents) - - [About Reddit](#about-reddit) - - [Reddit Handler Implementation](#reddit-handler-implementation) - - [Reddit Handler Initialization](#reddit-handler-initialization) - - [Implemented Features](#implemented-features) - - [TODO](#todo) - - [Example Usage](#example-usage) - ---- - -## About Reddit - -Reddit is a network of communities based on people's interests. It provides a platform for users to submit links, create content, and have discussions about various topics. - -## Reddit Handler Implementation - -This handler was implemented using the [PRAW (Python Reddit API Wrapper)](https://praw.readthedocs.io/en/latest/) library. PRAW is a Python package that provides a simple and easy-to-use interface to access the Reddit API. - -## Reddit Handler Initialization - -The Reddit handler is initialized with the following parameters: - -- `client_id`: a required Reddit API client ID -- `client_secret`: a required Reddit API client secret -- `user_agent`: a required user agent string to identify your application - -## How to get your Reddit credentials. - -1. Visit Reddit App Preferences (https://www.reddit.com/prefs/apps) or [https://old.reddit.com/prefs/apps/](https://old.reddit.com/prefs/apps/) -2. Scroll to the bottom and click "create another app..." -3. Fill out the name, description, and redirect url for your app, then click "create app" -4. Now you should be able to see the personal use script, secret, and name of your app. Store those as environment variables CLIENT_ID, CLIENT_SECRET, and USER_AGENT respectively. - -## Implemented Features - -- Fetch submissions from a subreddit based on sorting type and limit. -- (Add other implemented features here) - -## TODO - -- (List any pending features or improvements here) - -## Example Usage -``` -CREATE DATABASE my_reddit -With - ENGINE = 'reddit', - PARAMETERS = { - "client_id":"YOUR_CLIENT_ID", - "client_secret":"YOUR_CLIENT_SECRET", - "user_agent":"YOUR_USER_AGENT" - }; -``` - -After setting up the Reddit Handler, you can use SQL queries to fetch data from Reddit: - -```sql -SELECT * -FROM my_reddit.submission -WHERE subreddit = 'MachineLearning' AND sort_type = 'top' AND items = 5; -``` - -`items`: Number of items to fetch from the subreddit. - -`sort_type`: Sorting type for the subreddit. Can be one of `hot`, `new`, `top`, `controversial`, `gilded`, `wiki`, `mod`, `rising`. - -Each Post in a subreddit has a unique ID. You can use this ID to fetch comments for a particular post. - -``` -SELECT * -FROM my_reddit.comment -WHERE submission_id = '12gls93' -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/reddit_handler/__about__.py b/mindsdb/integrations/handlers/reddit_handler/__about__.py deleted file mode 100644 index 64aed327ab8..00000000000 --- a/mindsdb/integrations/handlers/reddit_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Reddit handler' -__package_name__ = 'mindsdb_reddit_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Reddit" -__author__ = 'Maro Akpobi' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/reddit_handler/__init__.py b/mindsdb/integrations/handlers/reddit_handler/__init__.py deleted file mode 100644 index 7314efc20d6..00000000000 --- a/mindsdb/integrations/handlers/reddit_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .reddit_handler import ( - RedditHandler as Handler - ) - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Reddit' -name = 'reddit' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/reddit_handler/icon.svg b/mindsdb/integrations/handlers/reddit_handler/icon.svg deleted file mode 100644 index e9f9923a08c..00000000000 --- a/mindsdb/integrations/handlers/reddit_handler/icon.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/reddit_handler/reddit_handler.py b/mindsdb/integrations/handlers/reddit_handler/reddit_handler.py deleted file mode 100644 index 4d314f620b1..00000000000 --- a/mindsdb/integrations/handlers/reddit_handler/reddit_handler.py +++ /dev/null @@ -1,101 +0,0 @@ -import praw -import os -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) -from mindsdb.utilities.config import Config -from mindsdb.utilities import log - -from .reddit_tables import CommentTable, SubmissionTable - -logger = log.getLogger(__name__) - - -class RedditHandler(APIHandler): - - def __init__(self, name=None, **kwargs): - super().__init__(name) - - args = kwargs.get('connection_data', {}) - - self.connection_args = {} - handler_config = Config().get('reddit_handler', {}) - for k in ['client_id', 'client_secret', 'user_agent']: - if k in args: - self.connection_args[k] = args[k] - elif f'REDDIT_{k.upper()}' in os.environ: - self.connection_args[k] = os.environ[f'REDDIT_{k.upper()}'] - elif k in handler_config: - self.connection_args[k] = handler_config[k] - - self.reddit = None - self.is_connected = False - - comment = CommentTable(self) - self._register_table('comment', comment) - - submission = SubmissionTable(self) - self._register_table('submission', submission) - - def connect(self): - """Authenticate with the Reddit API using the client ID, client secret - and user agent provided in the constructor. - """ - if self.is_connected is True: - return self.reddit - - self.reddit = praw.Reddit( - client_id=self.connection_args['client_id'], - client_secret=self.connection_args['client_secret'], - user_agent=self.connection_args['user_agent'], - ) - - self.is_connected = True - return self.reddit - - def check_connection(self) -> StatusResponse: - '''It evaluates if the connection with Reddit API is alive and healthy. - Returns: - HandlerStatusResponse - ''' - - response = StatusResponse(False) - - try: - reddit = self.connect() - reddit.user.me() - response.success = True - - except Exception as e: - response.error_message = f'Error connecting to Reddit api: {e}. ' - logger.error(response.error_message) - - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query_string: str = None): - '''It parses any native statement string and acts upon it (for example, raw syntax commands). - Args: - query (Any): query in native format (str for sql databases, - dict for mongo, api's json etc) - Returns: - HandlerResponse - ''' - - method_name, params = self.parse_native_query(query_string) - if method_name == 'get_submission': - df = self.get_submission(params) - elif method_name == 'get_subreddit': - df = self.get_subreddit(params) - else: - raise ValueError(f"Method '{method_name}' not supported by RedditHandler") - - return Response( - RESPONSE_TYPE.TABLE, - data_frame=df - ) diff --git a/mindsdb/integrations/handlers/reddit_handler/reddit_tables.py b/mindsdb/integrations/handlers/reddit_handler/reddit_tables.py deleted file mode 100644 index ecbd063b2f2..00000000000 --- a/mindsdb/integrations/handlers/reddit_handler/reddit_tables.py +++ /dev/null @@ -1,194 +0,0 @@ -import pandas as pd -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb_sql_parser import ast -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions - - -class CommentTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - '''Select data from the comment table and return it as a pandas DataFrame. - - Args: - query (ast.Select): The SQL query to be executed. - - Returns: - pandas.DataFrame: A pandas DataFrame containing the selected data. - ''' - - reddit = self.handler.connect() - - submission_id = None - conditions = extract_comparison_conditions(query.where) - for condition in conditions: - if condition[0] == '=' and condition[1] == 'submission_id': - submission_id = condition[2] - break - - if submission_id is None: - raise ValueError('Submission ID is missing in the SQL query') - - submission = reddit.submission(id=submission_id) - submission.comments.replace_more(limit=None) - - result = [] - for comment in submission.comments.list(): - data = { - 'id': comment.id, - 'body': comment.body, - 'author': comment.author.name if comment.author else None, - 'created_utc': comment.created_utc, - 'score': comment.score, - 'permalink': comment.permalink, - 'ups': comment.ups, - 'downs': comment.downs, - 'subreddit': comment.subreddit.display_name, - } - result.append(data) - - result = pd.DataFrame(result) - self.filter_columns(result, query) - return result - - def get_columns(self): - '''Get the list of column names for the comment table. - - Returns: - list: A list of column names for the comment table. - ''' - return [ - 'id', - 'body', - 'author', - 'created_utc', - 'permalink', - 'score', - 'ups', - 'downs', - 'subreddit', - ] - - def filter_columns(self, result: pd.DataFrame, query: ast.Select = None): - columns = [] - if query is not None: - for target in query.targets: - if isinstance(target, ast.Star): - columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - columns.append(target.value) - if len(columns) > 0: - result = result[columns] - - -class SubmissionTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - '''Select data from the submission table and return it as a pandas DataFrame. - - Args: - query (ast.Select): The SQL query to be executed. - - Returns: - pandas.DataFrame: A pandas DataFrame containing the selected data. - ''' - - reddit = self.handler.connect() - - subreddit_name = None - sort_type = None - conditions = extract_comparison_conditions(query.where) - for condition in conditions: - if condition[0] == '=' and condition[1] == 'subreddit': - subreddit_name = condition[2] - elif condition[0] == '=' and condition[1] == 'sort_type': - sort_type = condition[2] - elif condition[0] == '=' and condition[1] == 'items': - items = int(condition[2]) - - if not sort_type: - sort_type = 'hot' - if not subreddit_name: - return pd.DataFrame() - - if sort_type == 'new': - submissions = reddit.subreddit(subreddit_name).new(limit=items) - elif sort_type == 'rising': - submissions = reddit.subreddit(subreddit_name).rising(limit=items) - elif sort_type == 'controversial': - submissions = reddit.subreddit(subreddit_name).controversial(limit=items) - elif sort_type == 'top': - submissions = reddit.subreddit(subreddit_name).top(limit=items) - else: - submissions = reddit.subreddit(subreddit_name).hot(limit=items) - - result = [] - for submission in submissions: - data = { - 'id': submission.id, - 'title': submission.title, - 'author': submission.author.name if submission.author else None, - 'created_utc': submission.created_utc, - 'score': submission.score, - 'num_comments': submission.num_comments, - 'permalink': submission.permalink, - 'url': submission.url, - 'ups': submission.ups, - 'downs': submission.downs, - 'num_crossposts': submission.num_crossposts, - 'subreddit': submission.subreddit.display_name, - 'selftext': submission.selftext, - } - result.append(data) - - result = pd.DataFrame(result) - self.filter_columns(result, query) - return result - - def get_columns(self): - '''Get the list of column names for the submission table. - - Returns: - list: A list of column names for the submission table. - ''' - return [ - 'id', - 'title', - 'author', - 'created_utc', - 'permalink', - 'num_comments', - 'score', - 'ups', - 'downs', - 'num_crossposts', - 'subreddit', - 'selftext' - ] - - def filter_columns(self, result: pd.DataFrame, query: ast.Select = None): - columns = [] - if query is not None: - for target in query.targets: - if isinstance(target, ast.Star): - columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - else: - columns = self.get_columns() - - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - result = result[columns] - - if query is not None and query.limit is not None: - return result.head(query.limit.value) - - return result diff --git a/mindsdb/integrations/handlers/reddit_handler/requirements.txt b/mindsdb/integrations/handlers/reddit_handler/requirements.txt deleted file mode 100644 index 9d9d90a490b..00000000000 --- a/mindsdb/integrations/handlers/reddit_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -praw \ No newline at end of file diff --git a/mindsdb/integrations/handlers/rest_api_handler/README.md b/mindsdb/integrations/handlers/rest_api_handler/README.md new file mode 100644 index 00000000000..037d6ff429a --- /dev/null +++ b/mindsdb/integrations/handlers/rest_api_handler/README.md @@ -0,0 +1,168 @@ +--- +title: REST API +sidebarTitle: REST API +--- + +This documentation describes the integration of MindsDB with generic REST APIs using bearer-token authentication. +The integration allows MindsDB to forward HTTP requests to any REST API using stored credentials via the passthrough endpoint β€” no SQL table mapping required. + +### Prerequisites + +Before proceeding, ensure the following prerequisites are met: + +1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). +2. Obtain a bearer token (API key, personal access token, etc.) for the target REST API. + +## Connection + +Establish a connection to a REST API from MindsDB by executing the following SQL command: + +```sql +CREATE DATABASE my_api +WITH ENGINE = 'rest_api', +PARAMETERS = { + "base_url": "https://api.example.com", + "bearer_token": "your_token_here" +}; +``` + +Required connection parameters include the following: + +* `base_url`: The base URL of the REST API (e.g. `https://api.example.com`). All passthrough request paths are appended to this URL. +* `bearer_token`: The bearer token used for authentication. Injected as `Authorization: Bearer ` on every request. + +Optional connection parameters include the following: + +* `default_headers`: A JSON object of static headers added to every request (e.g. `{"Accept": "application/json"}`). +* `allowed_hosts`: A list of allowed hostnames for passthrough requests. Defaults to the hostname of `base_url`. Use `["*"]` to disable host containment. +* `test_path`: The path used by the `/passthrough/test` endpoint to verify connectivity. Defaults to `/`. + +### Examples + +Connect to the HubSpot API: + +```sql +CREATE DATABASE my_hubspot +WITH ENGINE = 'rest_api', +PARAMETERS = { + "base_url": "https://api.hubapi.com", + "bearer_token": "pat-eu1-..." +}; +``` + +Connect to a custom internal API with default headers: + +```sql +CREATE DATABASE my_internal_api +WITH ENGINE = 'rest_api', +PARAMETERS = { + "base_url": "https://internal.example.com/api/v2", + "bearer_token": "sk-...", + "default_headers": {"Accept": "application/json", "X-Team": "data"}, + "test_path": "/health" +}; +``` + +Connect to an API with multiple allowed hosts: + +```sql +CREATE DATABASE my_multi_region_api +WITH ENGINE = 'rest_api', +PARAMETERS = { + "base_url": "https://api.example.com", + "bearer_token": "your_token", + "allowed_hosts": ["api.example.com", "api.eu.example.com"] +}; +``` + +## Usage + +This handler is **passthrough-only** β€” it does not expose SQL tables. All interaction is through the REST passthrough endpoint. + +### Passthrough Requests + +Send HTTP requests to the upstream API through MindsDB: + +``` +POST /api/integrations/my_api/passthrough +``` + +```json +{ + "method": "GET", + "path": "/v1/users", + "query": {"limit": "10"}, + "headers": {"Accept": "application/json"} +} +``` + +The response wraps the upstream HTTP response: + +```json +{ + "status_code": 200, + "headers": {"content-type": "application/json"}, + "body": {"results": [...]}, + "content_type": "application/json" +} +``` + +Supported HTTP methods: `GET`, `POST`, `PUT`, `PATCH`, `DELETE`. + +### Testing the Connection + +Verify that the base URL, token, and host allowlist are configured correctly: + +``` +POST /api/integrations/my_api/passthrough/test +``` + +Returns: + +```json +{"ok": true, "status_code": 200, "host": "api.example.com", "latency_ms": 140} +``` + +Or on failure: + +```json +{"ok": false, "error_code": "auth_failed", "message": "upstream rejected credentials; base URL and allowlist look correct"} +``` + +## Security + +- **Credentials are never exposed.** The bearer token is stored in MindsDB and injected at request time. It is never returned to the caller. +- **Host containment.** Requests are restricted to hostnames in the allowlist (defaults to the `base_url` host). Private/loopback IP addresses are rejected by default. +- **Header filtering.** Callers cannot override `Authorization`, `Host`, `Cookie`, or `Proxy-*` headers. +- **Response scrubbing.** If the upstream API echoes the token in responses, it is replaced with `[REDACTED_API_KEY]` before returning to the caller. +- **Size limits.** Request bodies are capped at 1 MB, response bodies at 10 MB (configurable via environment variables). + +## Troubleshooting + + +`base_url is not configured` + +* **Symptoms**: Passthrough requests fail with a configuration error. +* **Checklist**: + 1. Ensure `base_url` is provided in the connection parameters. + 2. The URL must include the scheme (`https://`). + + + +`host 'X' is not in the datasource allowlist` + +* **Symptoms**: Passthrough requests to a valid URL are rejected. +* **Checklist**: + 1. The request path may resolve to a different hostname than `base_url`. + 2. Add the hostname to `allowed_hosts` in the connection parameters. + 3. Use `["*"]` to disable host containment (not recommended for production). + + + +`upstream rejected credentials (401/403)` + +* **Symptoms**: The `/passthrough/test` endpoint returns `error_code: "auth_failed"`. +* **Checklist**: + 1. Verify the bearer token is valid and not expired. + 2. Check that the token has the required scopes/permissions for the API endpoints you are calling. + diff --git a/mindsdb/integrations/handlers/rest_api_handler/__about__.py b/mindsdb/integrations/handlers/rest_api_handler/__about__.py new file mode 100644 index 00000000000..b7f131f401c --- /dev/null +++ b/mindsdb/integrations/handlers/rest_api_handler/__about__.py @@ -0,0 +1,9 @@ +__title__ = "MindsDB REST API handler" +__package_name__ = "mindsdb_rest_api_handler" +__version__ = "0.0.1" +__description__ = "MindsDB handler for generic REST APIs with bearer-token passthrough" +__author__ = "MindsDB Inc" +__github__ = "https://github.com/mindsdb/mindsdb" +__pypi__ = "https://pypi.org/project/mindsdb/" +__license__ = "MIT" +__copyright__ = "Copyright 2026 - mindsdb" diff --git a/mindsdb/integrations/handlers/confluence_handler/__init__.py b/mindsdb/integrations/handlers/rest_api_handler/__init__.py similarity index 79% rename from mindsdb/integrations/handlers/confluence_handler/__init__.py rename to mindsdb/integrations/handlers/rest_api_handler/__init__.py index a87e7512ed4..d9f8fcf24eb 100644 --- a/mindsdb/integrations/handlers/confluence_handler/__init__.py +++ b/mindsdb/integrations/handlers/rest_api_handler/__init__.py @@ -4,18 +4,18 @@ from .connection_args import connection_args, connection_args_example try: - from .confluence_handler import ConfluenceHandler as Handler + from .rest_api_handler import RestApiHandler as Handler import_error = None except Exception as e: Handler = None import_error = e -title = "Confluence" -name = "confluence" +title = "REST API" +name = "rest_api" type = HANDLER_TYPE.DATA icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY +support_level = HANDLER_SUPPORT_LEVEL.MINDSDB __all__ = [ "Handler", @@ -27,6 +27,6 @@ "description", "import_error", "icon_path", - "connection_args_example", "connection_args", + "connection_args_example", ] diff --git a/mindsdb/integrations/handlers/rest_api_handler/connection_args.py b/mindsdb/integrations/handlers/rest_api_handler/connection_args.py new file mode 100644 index 00000000000..bba20202ba0 --- /dev/null +++ b/mindsdb/integrations/handlers/rest_api_handler/connection_args.py @@ -0,0 +1,44 @@ +from collections import OrderedDict + +from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE + +connection_args = OrderedDict( + base_url={ + "type": ARG_TYPE.STR, + "description": "Base URL of the REST API (e.g. https://api.example.com)", + "required": True, + "label": "Base URL", + }, + bearer_token={ + "type": ARG_TYPE.PWD, + "description": "Bearer token injected as Authorization: Bearer ", + "required": True, + "label": "Bearer Token", + "secret": True, + }, + default_headers={ + "type": ARG_TYPE.DICT, + "description": 'Static headers added to every request (e.g. {"Accept": "application/json"})', + "required": False, + "label": "Default Headers", + }, + allowed_hosts={ + "type": ARG_TYPE.LIST, + "description": 'Allowed hostnames for passthrough requests. Defaults to the base_url host. Use ["*"] to disable containment.', + "required": False, + "label": "Allowed Hosts", + }, + test_path={ + "type": ARG_TYPE.STR, + "description": "Path used by the /passthrough/test endpoint. Defaults to /", + "required": False, + "label": "Test Path", + }, +) + +connection_args_example = OrderedDict( + base_url="https://api.example.com", + bearer_token="your_token_here", + default_headers={"Accept": "application/json"}, + allowed_hosts=["api.example.com"], +) diff --git a/mindsdb/integrations/handlers/rest_api_handler/icon.svg b/mindsdb/integrations/handlers/rest_api_handler/icon.svg new file mode 100644 index 00000000000..2346f8d4d3e --- /dev/null +++ b/mindsdb/integrations/handlers/rest_api_handler/icon.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/mindsdb/integrations/handlers/rest_api_handler/rest_api_handler.py b/mindsdb/integrations/handlers/rest_api_handler/rest_api_handler.py new file mode 100644 index 00000000000..a55ad7c1f4d --- /dev/null +++ b/mindsdb/integrations/handlers/rest_api_handler/rest_api_handler.py @@ -0,0 +1,87 @@ +from typing import Any + +from mindsdb.integrations.libs.api_handler import APIHandler +from mindsdb.integrations.libs.passthrough import PassthroughMixin +from mindsdb.integrations.libs.passthrough_types import PassthroughRequest +from mindsdb.integrations.libs.response import ( + HandlerStatusResponse as StatusResponse, + HandlerResponse as Response, + RESPONSE_TYPE, +) +from mindsdb.utilities import log + +logger = log.getLogger(__name__) + + +class RestApiHandler(APIHandler, PassthroughMixin): + """Generic REST API handler β€” passthrough only, no SQL tables. + + This is the "bring your own URL" escape hatch for any bearer-token API + that mindsdb doesn't have a named handler for. Users supply a base_url + and a bearer_token and get full passthrough access. + """ + + name = "rest_api" + + def __init__(self, name: str, **kwargs: Any) -> None: + super().__init__(name) + self.connection_data = kwargs.get("connection_data") or {} + self.kwargs = kwargs + self.is_connected = False + + # PassthroughMixin reads these instance attributes at runtime. + self._bearer_token_arg = "bearer_token" + self._base_url_default = None # user must supply base_url + + # Build the test request from connection_data. Default to GET / + # unless the user provided a custom test_path. + test_path = self.connection_data.get("test_path", "/") + if not test_path.startswith("/"): + test_path = f"/{test_path}" + self._test_request = PassthroughRequest(method="GET", path=test_path) + + def connect(self) -> None: + """No persistent connection needed β€” passthrough is stateless. + + Validation happens in check_connection(), which we + call separately during CREATE DATABASE. + """ + self.is_connected = True + + def check_connection(self) -> StatusResponse: + """Validate that base_url and bearer_token are present.""" + response = StatusResponse(False) + try: + base_url = self._build_base_url() + if not base_url: + response.error_message = "base_url is required" + return response + token = self.connection_data.get(self._bearer_token_arg) + if not token: + response.error_message = "bearer_token is required" + return response + response.success = True + self.is_connected = True + except Exception as e: + response.error_message = str(e) + return response + + def native_query(self, query: str) -> Response: + """Not supported β€” use passthrough instead.""" + return Response( + RESPONSE_TYPE.ERROR, + error_message="rest_api handler is passthrough-only. Use the /passthrough endpoint.", + ) + + def get_tables(self) -> Response: + """No SQL tables β€” passthrough only.""" + import pandas as pd + + return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()) + + def get_columns(self, table_name: str) -> Response: + """No SQL tables β€” passthrough only.""" + return Response( + RESPONSE_TYPE.ERROR, + error_message="rest_api handler is passthrough-only. No tables available.", + ) diff --git a/mindsdb/integrations/handlers/rocket_chat_handler/README.md b/mindsdb/integrations/handlers/rocket_chat_handler/README.md deleted file mode 100644 index ef0821a9010..00000000000 --- a/mindsdb/integrations/handlers/rocket_chat_handler/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# Rocket Chat API Handler - -This handler integrates with the [Rocket Chat API](https://developer.rocket.chat/reference/api) to read and write messages. - -### Connect to the Rocket Chat API -We start by creating a database to connect to the Rocket Chat API. - -``` -CREATE DATABASE my_rocket_chat -WITH - ENGINE = "rocket_chat" - PARAMETERS = { - "username": , - "password": , - "domain": - }; -``` - -### Select Data -To see if the connection was successful, try searching for messages in a channel - -``` -SELECT * -FROM my_rocket_chat.channel_messages WHERE room_id="GENERAL"; -``` - -Each row should look like this: - -| id | room_id | bot_id | text | username | name | sent_at | -| ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | -| PbrLoFpxYk2bbkvyA | GENERAL | [NULL] | Sample message | minds.db | MindsDB | 2023-05-05T16:41:57.998Z | - -where: -* id - ID of the message -* room_id - ID of the channel/room the message was sent in -* bot_id - ID of the bot that sent this message if applicable -* text - Actual message text -* username - Username for sent message -* name: Full name for sent message -* sent_at: When the message was sent in 'YYYY-MM-DDTHH:MM:SS.mmmZ' format - - -## Posting Messages - -You can also post messages to a Rocket Chat channel using MindsDB: - -``` -INSERT INTO my_rocket_chat.channel_messages (room_id, text) VALUES ("GENERAL", "This is a test message!") -``` - -Supported insert columns: -* room_id (REQUIRED) - ID of room to send message to -* text (REQUIRED) - Message to send -* alias - What the message's name will appear as (username will still display). -* emoji - Sets the avatar on the message to be this emoji (e.g. :smirk:). -* avatar - Image URL to use for the message avatar. \ No newline at end of file diff --git a/mindsdb/integrations/handlers/rocket_chat_handler/__about__.py b/mindsdb/integrations/handlers/rocket_chat_handler/__about__.py deleted file mode 100644 index 19877330b71..00000000000 --- a/mindsdb/integrations/handlers/rocket_chat_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Rocket Chat handler' -__package_name__ = 'mindsdb_rocket_chat_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for the Rocket Chat API" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/rocket_chat_handler/__init__.py b/mindsdb/integrations/handlers/rocket_chat_handler/__init__.py deleted file mode 100644 index e4ab474d8b6..00000000000 --- a/mindsdb/integrations/handlers/rocket_chat_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .rocket_chat_handler import RocketChatHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Rocket Chat' -name = 'rocket_chat' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/rocket_chat_handler/icon.svg b/mindsdb/integrations/handlers/rocket_chat_handler/icon.svg deleted file mode 100644 index 5ae6e6bca20..00000000000 --- a/mindsdb/integrations/handlers/rocket_chat_handler/icon.svg +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/rocket_chat_handler/requirements.txt b/mindsdb/integrations/handlers/rocket_chat_handler/requirements.txt deleted file mode 100644 index c5ef3d60283..00000000000 --- a/mindsdb/integrations/handlers/rocket_chat_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -rocketchat_API -urllib3>=2.6.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_handler.py b/mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_handler.py deleted file mode 100644 index 327f4adf45f..00000000000 --- a/mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_handler.py +++ /dev/null @@ -1,152 +0,0 @@ -import pandas as pd - -from rocketchat_API.rocketchat import RocketChat - -from mindsdb.integrations.handlers.rocket_chat_handler.rocket_chat_tables import ( - ChannelMessagesTable, ChannelsTable, DirectsTable, DirectMessagesTable, UsersTable) -from mindsdb.integrations.libs.api_handler import APIChatHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, -) -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - -logger = log.getLogger(__name__) - - -class RocketChatHandler(APIChatHandler): - """A class for handling connections and interactions with the Rocket Chat API. - - Attributes: - username (str): Rocket Chat username to use for authentication. - password (str): Rocket Chat username to use for authentication. - auth_token (str): Rocket Chat authorization token to use for all API requests. - auth_user_id (str): Rocket Chat user ID to associate with all API requests - domain (str): Path to Rocket Chat domain to use (e.g. https://mindsdb.rocket.chat). - client (RocketChatClient): The `RocketChatClient` object for interacting with the Rocket Chat API. - is_connected (bool): Whether or not the API client is connected to the Rocket Chat API. - """ - - def __init__(self, name: str = None, **kwargs): - """Registers all API tables and prepares the handler for an API connection. - - Args: - name: (str): The handler name to use - """ - super().__init__(name) - self.username = None - self.password = None - self.auth_token = None - self.auth_user_id = None - self.domain = None - - args = kwargs.get('connection_data', {}) - if 'domain' not in args: - raise ValueError('Must include Rocket Chat "domain" to read and write messages') - self.domain = args['domain'] - - if 'token' in args and 'user_id' in args: - self.auth_token = args['token'] - self.auth_user_id = args['user_id'] - elif 'username' in args and 'password' in args: - self.username = args['username'] - self.password = args['password'] - else: - raise ValueError('Need "token" and "user_id", or "username" and "password" to connect to Rocket Chat') - - self.client = None - self.is_connected = False - - self._register_table('channels', ChannelsTable(self)) - - self._register_table('channel_messages', ChannelMessagesTable(self)) - - self._register_table('directs', DirectsTable(self)) - - self._register_table('direct_messages', DirectMessagesTable(self)) - - self._register_table('users', UsersTable(self)) - - def get_chat_config(self): - params = { - 'polling': { - 'type': 'message_count', - 'table': 'directs', - 'chat_id_col': '_id', - 'count_col': 'msgs' - }, - 'chat_table': { - 'name': 'direct_messages', - 'chat_id_col': 'room_id', - 'username_col': 'username', - 'text_col': 'text', - 'time_col': 'sent_at', - } - } - return params - - def get_my_user_name(self): - info = self.call_api('me') - return info['username'] - - def connect(self): - """Creates a new Rocket Chat API client if needed and sets it as the client to use for requests. - - Returns newly created Rocket Chat API client, or current client if already set. - """ - if self.is_connected and self.client is not None: - return self.client - - self.client = RocketChat( - user=self.username, - password=self.password, - auth_token=self.auth_token, - user_id=self.auth_user_id, - server_url=self.domain - ) - - self.is_connected = True - return self.client - - def check_connection(self) -> StatusResponse: - """Checks connection to Rocket Chat API by sending a ping request. - - Returns StatusResponse indicating whether or not the handler is connected. - """ - - response = StatusResponse(False) - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to Rocket Chat API: {e}!') - response.error_message = e - - if response.success is False: - self.is_connected = False - return response - - def native_query(self, query: str = None) -> Response: - ast = parse_sql(query) - return self.query(ast) - - def call_api(self, method_name: str = None, *args, **kwargs) -> pd.DataFrame: - """Calls the Rocket Chat API method with the given params. - - Returns results as a pandas DataFrame. - - Args: - method_name (str): Method name to call - params (Dict): Params to pass to the API call - """ - client = self.connect() - - method = getattr(client, method_name) - - messages_response = method(*args, **kwargs) - - if not messages_response.ok: - messages_response.raise_for_status() - return messages_response.json() diff --git a/mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_tables.py b/mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_tables.py deleted file mode 100644 index 5cd36f5119e..00000000000 --- a/mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_tables.py +++ /dev/null @@ -1,198 +0,0 @@ -import pandas as pd - -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.sql_utils import conditions_to_filter, project_dataframe, sort_dataframe -from mindsdb_sql_parser import ast - - -def message_to_dataframe_row(message: dict): - message['id'] = message['_id'] - message['room_id'] = message['rid'] - message['text'] = message['msg'] - message['sent_at'] = message['ts'] - - if 'u' in message: - if 'username' in message['u']: - message['username'] = message['u']['username'] - if 'name' in message['u']: - message['name'] = message['u']['name'] - if 'bot' in message and 'i' in message['bot']: - message['bot_id'] = message['bot']['i'] - return message - - -class ChannelsTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - message_data = self.handler.call_api('channels_list') - df = pd.DataFrame(message_data['channels']) - df = project_dataframe(df, query.targets, self.get_columns()) - - return df - - def get_columns(self): - """Gets all columns to be returned in pandas DataFrame responses""" - return [ - '_id', - 'name', - 'usersCount', - 'msgs', - ] - - -class ChannelMessagesTable(APITable): - """Manages SELECT and INSERT operations for Rocket Chat messages.""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Selects message data from the Rocket Chat API and returns it as a pandas DataFrame. - - Returns dataframe representing the Rocket Chat API results. - - Args: - query (ast.Select): Given SQL SELECT query - """ - filters = conditions_to_filter(query.where) - - if 'room_id' not in filters: - raise NotImplementedError() - - params = {} - - if query.limit: - params['count'] = query.limit - - # See Channel Messages endpoint: - # https://developer.rocket.chat/reference/api/rest-api/endpoints/core-endpoints/channels-endpoints/messages - message_data = self.handler.call_api('channels_history', filters['room_id'], **params) - - # Only return the columns we need to. - message_rows = [message_to_dataframe_row(m) for m in message_data['messages']] - df = pd.DataFrame(message_rows) - - df = sort_dataframe(df, query.order_by) - df = project_dataframe(df, query.targets, self.get_columns()) - - return df - - def insert(self, query: ast.Insert): - """Posts a message using the Rocket Chat API. - - Args: - query (ast.Insert): Given SQL INSERT query - """ - # See Post Message endpoint: - # https://developer.rocket.chat/reference/api/rest-api/endpoints/core-endpoints/chat-endpoints/postmessage - column_names = [col.name for col in query.columns] - for insert_row in query.values: - insert_params = dict(zip(column_names, insert_row)) - - self.handler.call_api('chat_post_message', **insert_params) - - def get_columns(self): - """Gets all columns to be returned in pandas DataFrame responses""" - return [ - 'id', - 'room_id', - 'bot_id', - 'text', - 'username', - 'name', - 'sent_at' - ] - - -class DirectsTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - message_data = self.handler.call_api('im_list') - df = pd.DataFrame(message_data['ims']) - df = project_dataframe(df, query.targets, self.get_columns()) - - return df - - def insert(self, query: ast.Insert): - column_names = [col.name for col in query.columns] - for insert_row in query.values: - insert_params = dict(zip(column_names, insert_row)) - - self.handler.call_api('im_create', **insert_params) - - def get_columns(self): - """Gets all columns to be returned in pandas DataFrame responses""" - return [ - '_id', - 'usernames', - 'usersCount', - 'msgs', - ] - - -class DirectMessagesTable(APITable): - - def select(self, query: ast.Select) -> pd.DataFrame: - - filters = conditions_to_filter(query.where) - - if 'room_id' not in filters: - raise NotImplementedError() - - params = {} - - if query.limit: - params['count'] = query.limit - - message_data = self.handler.call_api('im_history', filters['room_id'], **params) - - message_rows = [message_to_dataframe_row(m) for m in message_data['messages']] - df = pd.DataFrame(message_rows) - df = sort_dataframe(df, query.order_by) - df = project_dataframe(df, query.targets, self.get_columns()) - - return df - - def insert(self, query: ast.Insert): - - column_names = [col.name for col in query.columns] - for insert_row in query.values: - insert_params = dict(zip(column_names, insert_row)) - - if 'username' in insert_params: - # resolve username - resp = self.handler.call_api('users_info', insert_params['username']) - if 'user' in resp: - insert_params['room_id'] = resp['user']['_id'] - del insert_params['username'] - else: - raise ValueError(f'User not found: {insert_params["username"]}') - - self.handler.call_api('chat_post_message', **insert_params) - - def get_columns(self): - """Gets all columns to be returned in pandas DataFrame responses""" - return [ - 'id', - 'room_id', - 'bot_id', - 'text', - 'username', - 'name', - 'sent_at' - ] - - -class UsersTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - message_data = self.handler.call_api('users_list') - df = pd.DataFrame(message_data['users']) - df = project_dataframe(df, query.targets, self.get_columns()) - - return df - - def get_columns(self): - """Gets all columns to be returned in pandas DataFrame responses""" - return [ - '_id', - 'username', - 'name', - 'status', - 'active', - 'type', - ] diff --git a/mindsdb/integrations/handlers/rockset_handler/__about__.py b/mindsdb/integrations/handlers/rockset_handler/__about__.py deleted file mode 100644 index 4c23fb194f5..00000000000 --- a/mindsdb/integrations/handlers/rockset_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Rockset handler' -__package_name__ = 'mindsdb_rockset_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Rockset" -__author__ = 'Alissa Troiano' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/rockset_handler/__init__.py b/mindsdb/integrations/handlers/rockset_handler/__init__.py deleted file mode 100644 index 0a005037459..00000000000 --- a/mindsdb/integrations/handlers/rockset_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .rockset_handler import RocksetHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Rockset' -name = 'rockset' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/rockset_handler/connection_args.py b/mindsdb/integrations/handlers/rockset_handler/connection_args.py deleted file mode 100644 index 6f306302d8f..00000000000 --- a/mindsdb/integrations/handlers/rockset_handler/connection_args.py +++ /dev/null @@ -1,45 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'Rockset user name' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'Rockset password', - 'secret': True - }, - api_key={ - 'type': ARG_TYPE.STR, - 'description': 'Rockset API key' - }, - api_server={ - 'type': ARG_TYPE.STR, - 'description': 'Rockset API server' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'Rockset host' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'Rockset port' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'Rockset database' - } -) -connection_args_example = OrderedDict( - user='rockset', - password='rockset', - api_key="adkjf234rksjfa23waejf2", - api_server='api-us-west-2.rockset.io', - host='localhost', - port='3306', - database='test' -) diff --git a/mindsdb/integrations/handlers/rockset_handler/icon.svg b/mindsdb/integrations/handlers/rockset_handler/icon.svg deleted file mode 100644 index 73cfa304953..00000000000 --- a/mindsdb/integrations/handlers/rockset_handler/icon.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/rockset_handler/requirements.txt b/mindsdb/integrations/handlers/rockset_handler/requirements.txt deleted file mode 100644 index ee467569031..00000000000 --- a/mindsdb/integrations/handlers/rockset_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/mysql_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/rockset_handler/rockset_handler.py b/mindsdb/integrations/handlers/rockset_handler/rockset_handler.py deleted file mode 100644 index aae7d02dcdf..00000000000 --- a/mindsdb/integrations/handlers/rockset_handler/rockset_handler.py +++ /dev/null @@ -1,11 +0,0 @@ -from mindsdb.integrations.handlers.mysql_handler import Handler as MySQLHandler - - -class RocksetHandler(MySQLHandler): - """ - This handler handles connection and execution of the Rockset integration - """ - name = 'rockset' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/rockset_handler/tests/__init__.py b/mindsdb/integrations/handlers/rockset_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/rockset_handler/tests/test.png b/mindsdb/integrations/handlers/rockset_handler/tests/test.png deleted file mode 100644 index 684533fe6e4..00000000000 Binary files a/mindsdb/integrations/handlers/rockset_handler/tests/test.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/rockset_handler/tests/test2.png b/mindsdb/integrations/handlers/rockset_handler/tests/test2.png deleted file mode 100644 index e9d54745b7a..00000000000 Binary files a/mindsdb/integrations/handlers/rockset_handler/tests/test2.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/rockset_handler/tests/test_rockset_handler.py b/mindsdb/integrations/handlers/rockset_handler/tests/test_rockset_handler.py deleted file mode 100644 index 6b98fbeeb6a..00000000000 --- a/mindsdb/integrations/handlers/rockset_handler/tests/test_rockset_handler.py +++ /dev/null @@ -1,33 +0,0 @@ -import unittest -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.integrations.handlers.rockset_handler.rockset_handler import RocksetHandler - - -class RocksetHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_args": { - "host": '127.0.0.1', - "port": 3306, - "user": "rockset", - "password": "rockset" - } - } - cls.handler = RocksetHandler('test_rockset_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.check_connection() - - def test_1_get_tables(self): - tables = self.handler.get_tables() - self.assertEqual(tables, []) - - def test_2_get_columns(self): - columns = self.handler.get_columns('test') - self.assertEqual(columns, []) - - def test_3_query(self): - response = self.handler.query('SELECT 1') - self.assertEqual(response['type'], RESPONSE_TYPE.QUERY) - self.assertEqual(response['data'], [[1]]) diff --git a/mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py b/mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py index d816e233147..e87b44317e0 100644 --- a/mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +++ b/mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py @@ -5,6 +5,8 @@ from salesforce_api.exceptions import AuthenticationError, RestRequestCouldNotBeUnderstoodError from mindsdb.integrations.libs.api_handler import MetaAPIHandler +from mindsdb.integrations.libs.passthrough import PassthroughMixin +from mindsdb.integrations.libs.passthrough_types import PassthroughRequest from mindsdb.integrations.libs.response import ( HandlerResponse as Response, HandlerStatusResponse as StatusResponse, @@ -18,13 +20,29 @@ logger = log.getLogger(__name__) -class SalesforceHandler(MetaAPIHandler): +class SalesforceHandler(MetaAPIHandler, PassthroughMixin): """ This handler handles the connection and execution of SQL statements on Salesforce. """ name = "salesforce" + # REST passthrough configuration. Salesforce's base URL is per-org + # (`instance_url`) and is normally discovered at auth time. v1 requires + # the caller to supply both `access_token` and `instance_url` explicitly + # in connection_data; dynamic discovery from the username/password flow + # is deferred to a future refresh-aware mixin. + _bearer_token_arg = "access_token" + _base_url_default = None + _test_request = PassthroughRequest(method="GET", path="/services/data/v60.0/") + + def _build_base_url(self) -> str | None: + data = self._get_connection_data() + instance_url = data.get("instance_url") + if not instance_url: + return None + return str(instance_url).rstrip("/") + def __init__(self, name: Text, connection_data: Dict, **kwargs: Any) -> None: """ Initializes the handler. diff --git a/mindsdb/integrations/handlers/sap_erp_handler/README.md b/mindsdb/integrations/handlers/sap_erp_handler/README.md deleted file mode 100644 index 07007313807..00000000000 --- a/mindsdb/integrations/handlers/sap_erp_handler/README.md +++ /dev/null @@ -1,82 +0,0 @@ -# SAP ERP Handler - -This handler allows you to interact with SAP ERP API available [here](https://api.sap.com/api/API_BUSINESS_PARTNER/overview). - -## About SAP ERP - -SAP ERP is enterprise resource planning software developed by the company SAP SE. - -## Limitations - -Currently this handler only supports `SELECT` queries - -## SAP ERP Handler Initialization - -You can create the database via the following query: - -```sql -CREATE DATABASE sap_datasource -WITH ENGINE = "sap_erp", -PARAMETERS = { - "api_key": "...", - "base_url": "https://sandbox.api.sap.com/s4hanacloud/sap/opu/odata/sap/API_BUSINESS_PARTNER/" -}; -``` - -To select from various tables, you can use `SELECT` statement: - -```sql -SELECT * FROM sap_datasource.address_email_address; -``` - -## Available tables - -`address_email_address`: email address data linked to all business partner address records in the system -`address_fax_number`: fax address data linked to all the business partner address records in the system -`address_home_page`: home page url address records linked to all business partner address records in the system -`address_phone_number`: all the mobile/telephone address records linked to all the business partner address records in the system -`bp_addr_depdnt_intl_loc_number`: address dependent data for the business partner address by using the key fields business partner number and address id -`bp_contact_to_address`: workplace address records linked to all the business partner contact records in the system. -`bp_contact_to_func_and_dept`: contact person department and function data linked to all business partner contact records in the system -`bp_credit_worthiness`: contact person department and function data linked to all business partner contact records in the system -`bp_data_controller`: business partner data controllers of all the available records linked to business partners in the system -`bp_financial_services_extn`: financial services business partner attributes of all the available records linked to business partners in the system -`bp_financial_services_reporting`: financial services reporting attributes of all the available records linked to business partners in the system -`bp_fiscal_year_information`: business partner fiscal year information of all the available records linked to business partners in the system. -`bp_relationship`: business partner relationship data fields of all the available records in the system -`bu_pa_address_usage`: all the address usage records linked to all business partner address records in the system -`bu_pa_identification`: business partner identification data fields of all the records available records in the system -`bu_pa_industry`: business partner industry data fields of all the available records in the system -`business_partner`: general data fields of all the business partner records available in the system -`business_partner_address`: business partner address data fields of all the available records in the system -`business_partner_contact`: business partner contact data fields of all the available records in the system -`business_partner_payment_card`: business partner payment card data fields of all the available records in the system -`business_partner_rating`: business partner ratings of all the available records linked to business partners in the system -`business_partner_role`: business partner role data fields of all the records available records in the system -`business_partner_tax_number`: tax number data of all the available records linked to business partners in the system -`business_partner_address_dependent_tax_number`: address dependent tax number data of all the available records linked to business partners in the system -`cust_addr_depdnt_ext_identifier`: address dependent external identifiers of all the available records linked to customers in the system -`customer`: general data of all the customer records available in the system -`customer_company`: customer company data fields of all the available records in the system linked to customer -`customer_company_text`: customer company text records attached to customer company in the system -`customer_dunning`: dunning records attached to customer company in the system -`customer_sales_area`: customer sales area data fields of all the available records in the system -`customer_sales_area_tax`: customer sales area tax data fields of all the available records in the system -`customer_sales_area_text`: customer sales area text fields of all the available records in the system linked to customer sales areas -`customer_tax_grouping`: customer tax grouping data attached to a customer in the system -`customer_text`: customer text data attached to a customer in the system -`customer_unloading_point`: unloading point data attached to a customer in the system -`customer_withholding_tax`: withholding tax records attached to customer company in the system -`customer_sales_partner_func`: partner function fields of all the available records in the system linked to customer sales areas -`customer_sales_area_addr_depdnt_info`: address dependent customer sales area data fields of all the available records in the system -`customer_sales_area_addr_depdnt_tax_info`: address dependent customer sales area tax data fields of all the available records in the system -`customer_unloading_point_addr_depdnt_info`: address dependent customer unloading point data fields of all the available records in the system -`supplier`: general data of all the supplier records available in the system -`supplier_company`: supplier company data available in the system -`supplier_company_text`: supplier company text data attached to supplier company in the system -`supplier_dunning`: dunning records attached to supplier company in the system -`supplier_partner_func`: partner function fields of all the available records in the system linked to supplier purchasing organization -`supplier_purchasing_org`: supplier purchasing organization data attached to supplier records in the system -`supplier_purchasing_org_text`: supplier purchasing organization text data attached to purchasing organization in the system -`supplier_text`: supplier text data attached to purchasing organization in the system -`supplier_withholding_tax`: withholding tax records attached to supplier company in the system diff --git a/mindsdb/integrations/handlers/sap_erp_handler/__about__.py b/mindsdb/integrations/handlers/sap_erp_handler/__about__.py deleted file mode 100644 index 4fd6177814e..00000000000 --- a/mindsdb/integrations/handlers/sap_erp_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB SAP ERP handler" -__package_name__ = "mindsdb_sap_erp_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for SAP ERP" -__author__ = "Aditya Azad" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/sap_erp_handler/__init__.py b/mindsdb/integrations/handlers/sap_erp_handler/__init__.py deleted file mode 100644 index 7c515006d0e..00000000000 --- a/mindsdb/integrations/handlers/sap_erp_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version - -try: - from .sap_erp_handler import SAPERPHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "SAP ERP" -name = "sap_erp" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/sap_erp_handler/api.py b/mindsdb/integrations/handlers/sap_erp_handler/api.py deleted file mode 100644 index cf812d22462..00000000000 --- a/mindsdb/integrations/handlers/sap_erp_handler/api.py +++ /dev/null @@ -1,79 +0,0 @@ -import requests -from urllib.parse import urljoin - -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -def move_under(d, key_contents_to_move, key_to_move_under=None): - """ - Moves the keys in nested dict with key key_contents_to_move under the dict defined by key_to_move_under. - If no key_to_move_under is provided, the keys are moved under d. - eg. - Calling this on the following dict (d) with key_contents_to_move = "a": - { - "a": { - "b": 1, - "c": 2 - } - } - results in: - { - "b": 1, - "c": 2 - } - """ - if key_contents_to_move not in d: - return - for k, v in d[key_contents_to_move].items(): - if key_to_move_under: - d[key_to_move_under][k] = v - else: - d[k] = v - del d[key_contents_to_move] - - -class SAPERP: - - def __init__(self, url: str, api_key: str) -> None: - self.base_url = url - self.api_key = api_key - - def _request(self, method: str, relative_endpoint: str, data=None): - kwargs = { - "method": method, - "url": urljoin(self.base_url, relative_endpoint), - "headers": { - "APIKey": self.api_key, - "Accept": "application/json", - "DataServiceVersion": "2.0" - } - } - if data is not None: - kwargs["data"] = data - return requests.request(**kwargs) - - def is_connected(self) -> bool: - if self._request("get", "").ok: - return True - return False - - def get(self, endpoint): - """ Common method for all get endpoints """ - try: - resp = self._request("get", endpoint) - if resp.ok: - resp = resp.json()["d"] - if "results" in resp: - resp = resp["results"] - else: - resp = [resp] - else: - resp = [] - for r in resp: - move_under(r, "__metadata") - return resp - except Exception as e: - logger.error(f"Error requesting endpoint {endpoint}: {e}") - return {} diff --git a/mindsdb/integrations/handlers/sap_erp_handler/icon.svg b/mindsdb/integrations/handlers/sap_erp_handler/icon.svg deleted file mode 100644 index 1849de1d44a..00000000000 --- a/mindsdb/integrations/handlers/sap_erp_handler/icon.svg +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/sap_erp_handler/sap_erp_handler.py b/mindsdb/integrations/handlers/sap_erp_handler/sap_erp_handler.py deleted file mode 100644 index b437b0abc5b..00000000000 --- a/mindsdb/integrations/handlers/sap_erp_handler/sap_erp_handler.py +++ /dev/null @@ -1,161 +0,0 @@ -from collections import OrderedDict -from mindsdb_sql_parser import parse_sql - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE -from mindsdb.integrations.handlers.sap_erp_handler.api import SAPERP -from mindsdb.integrations.handlers.sap_erp_handler.sap_erp_tables import ( - AddressEmailAddressTable, - AddressFaxNumberTable, - AddressHomePageURLTable, - AddressPhoneNumberTable, - BPAddrDepdntIntlLocNumberTable, - BPContactToAddressTable, - BPContactToFuncAndDeptTable, - BPCreditWorthinessTable, - BPDataControllerTable, - BPFinancialServicesExtnTable, - BPFinancialServicesReportingTable, - BPFiscalYearInformationTable, - BPRelationshipTable, - BuPaAddressUsageTable, - BuPaIdentificationTable, - BuPaIndustryTable, - BusinessPartnerTable, - BusinessPartnerAddressTable, - BusinessPartnerContactTable, - BusinessPartnerPaymentCardTable, - BusinessPartnerRatingTable, - BusinessPartnerRoleTable, - BusinessPartnerTaxNumberTable, - BusPartAddrDepdntTaxNumberTable, - CustAddrDepdntExtIdentifierTable, - CustAddrDepdntInformationTable, - CustomerCompanyTable, - CustomerCompanyTextTable, - CustomerDunningTable, - CustomerSalesAreaTable, - CustomerSalesAreaTaxTable, - CustomerSalesAreaTextTable, - CustomerTaxGroupingTable, - CustomerTextTable, - CustomerUnloadingPointTable, - CustomerWithHoldingTaxTable, - CustSalesPartnerFuncTable, - CustSlsAreaAddrDepdntInfoTable, - CustSlsAreaAddrDepdntTaxInfoTable, - CustUnldgPtAddrDepdntInfoTable, - SupplierTable, - SupplierCompanyTable, - SupplierCompanyTextTable, - SupplierDunningTable, - SupplierPartnerFuncTable, - SupplierPurchasingOrgTable, - SupplierPurchasingOrgTextTable, - SupplierTextTable, - SupplierWithHoldingTaxTable, -) - -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse - - -class SAPERPHandler(APIHandler): - - def __init__(self, name: str, **kwargs) -> None: - super().__init__(name) - self.connection = None - self.is_connected = False - self.api_key = kwargs.get("connection_data", {}).get("api_key", "") - self.base_url = kwargs.get("connection_data", {}).get("base_url", "") - _tables = [ - AddressEmailAddressTable, - AddressFaxNumberTable, - AddressHomePageURLTable, - AddressPhoneNumberTable, - BPAddrDepdntIntlLocNumberTable, - BPContactToAddressTable, - BPContactToFuncAndDeptTable, - BPCreditWorthinessTable, - BPDataControllerTable, - BPFinancialServicesExtnTable, - BPFinancialServicesReportingTable, - BPFiscalYearInformationTable, - BPRelationshipTable, - BuPaAddressUsageTable, - BuPaIdentificationTable, - BuPaIndustryTable, - BusinessPartnerTable, - BusinessPartnerAddressTable, - BusinessPartnerContactTable, - BusinessPartnerPaymentCardTable, - BusinessPartnerRatingTable, - BusinessPartnerRoleTable, - BusinessPartnerTaxNumberTable, - BusPartAddrDepdntTaxNumberTable, - CustAddrDepdntExtIdentifierTable, - CustAddrDepdntInformationTable, - CustomerCompanyTable, - CustomerCompanyTextTable, - CustomerDunningTable, - CustomerSalesAreaTable, - CustomerSalesAreaTaxTable, - CustomerSalesAreaTextTable, - CustomerTaxGroupingTable, - CustomerTextTable, - CustomerUnloadingPointTable, - CustomerWithHoldingTaxTable, - CustSalesPartnerFuncTable, - CustSlsAreaAddrDepdntInfoTable, - CustSlsAreaAddrDepdntTaxInfoTable, - CustUnldgPtAddrDepdntInfoTable, - SupplierTable, - SupplierCompanyTable, - SupplierCompanyTextTable, - SupplierDunningTable, - SupplierPartnerFuncTable, - SupplierPurchasingOrgTable, - SupplierPurchasingOrgTextTable, - SupplierTextTable, - SupplierWithHoldingTaxTable, - ] - for Table in _tables: - self._register_table(Table.name, Table(self)) - self.connect() - - def check_connection(self) -> StatusResponse: - resp = StatusResponse(False) - if self.connection and not self.connection.is_connected(): - resp.error = "Client not connected" - else: - resp.success = True - return resp - - def connect(self) -> SAPERP: - self.connection = SAPERP(self.base_url, self.api_key) - return self.connection - - def native_query(self, query: str) -> StatusResponse: - ast = parse_sql(query) - return self.query(ast) - - -connection_args = OrderedDict( - api_key={ - 'type': ARG_TYPE.STR, - 'description': 'API Token for accessing SAP ERP', - 'required': True, - 'label': 'API Key', - }, - base_url={ - 'type': ARG_TYPE.STR, - 'description': 'Base URL of the host', - 'required': True, - 'label': 'Base URL', - } -) - - -connection_args_example = OrderedDict( - api_key='23d6b9e0c2fab7eba2e8b7e452cead47', - base_url='https://sandbox.api.sap.com/s4hanacloud/sap/opu/odata/sap/API_BUSINESS_PARTNER/' -) diff --git a/mindsdb/integrations/handlers/sap_erp_handler/sap_erp_tables.py b/mindsdb/integrations/handlers/sap_erp_handler/sap_erp_tables.py deleted file mode 100644 index 6d545393634..00000000000 --- a/mindsdb/integrations/handlers/sap_erp_handler/sap_erp_tables.py +++ /dev/null @@ -1,1928 +0,0 @@ -from typing import List - -import pandas as pd -from mindsdb_sql_parser import ast - -from mindsdb.integrations.utilities.handlers.query_utilities import ( - SELECTQueryExecutor, - SELECTQueryParser, -) -from mindsdb.integrations.libs.api_handler import APIHandler, APITable -from mindsdb.integrations.utilities.sql_utils import conditions_to_filter - - -class CustomAPITable(APITable): - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.handler.connect() - - def get_columns(self, ignore: List[str] = []) -> List[str]: - return [item for item in self.columns if item not in ignore] - - def select(self, query: ast.Select) -> pd.DataFrame: - raise NotImplementedError() - - def parse_select(self, query: ast.Select, table_name: str): - select_statement_parser = SELECTQueryParser(query, table_name, self.get_columns()) - self.selected_columns, self.where_conditions, self.order_by_conditions, self.result_limit = select_statement_parser.parse_query() - - def get_where_param(self, query: ast.Select, param: str): - params = conditions_to_filter(query.where) - if param not in params: - raise Exception(f"WHERE condition does not have '{param}' selector") - return params[param] - - def apply_query_params(self, df, query): - select_statement_parser = SELECTQueryParser(query, self.name, self.get_columns()) - selected_columns, _, order_by_conditions, result_limit = select_statement_parser.parse_query() - select_statement_executor = SELECTQueryExecutor(df, selected_columns, [], order_by_conditions, result_limit) - return select_statement_executor.execute_query() - - -class AddressEmailAddressTable(CustomAPITable): - """Email address data linked to all business partner address records in the system""" - - name: str = "address_email_address" - columns: List[str] = [ - "id", - "uri", - "type", - "AddressID", - "Person", - "OrdinalNumber", - "IsDefaultEmailAddress", - "EmailAddress", - "SearchEmailAddress", - "AddressCommunicationRemarkText", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_AddressEmailAddress") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class AddressFaxNumberTable(CustomAPITable): - """Fax address data linked to all the business partner address records in the system""" - - name: str = "address_fax_number" - columns: List[str] = [ - "id", - "uri", - "type", - "AddressID", - "Person", - "OrdinalNumber", - "IsDefaultFaxNumber", - "FaxCountry", - "FaxNumber", - "FaxNumberExtension", - "InternationalFaxNumber", - "AddressCommunicationRemarkText", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_AddressFaxNumber") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class AddressHomePageURLTable(CustomAPITable): - """Home page URL address records linked to all business partner address records in the system""" - - name: str = "address_home_page" - columns: List[str] = [ - "id", - "uri", - "type", - "AddressID", - "Person", - "OrdinalNumber", - "ValidityStartDate", - "IsDefaultURLAddress", - "SearchURLAddress", - "AddressCommunicationRemarkText", - "URLFieldLength", - "WebsiteURL", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_AddressHomePageURL") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class AddressPhoneNumberTable(CustomAPITable): - """All the mobile/telephone address records linked to all the business partner address records in the system""" - - name: str = "address_phone_number" - columns: List[str] = [ - "id", - "uri", - "type", - "AddressID", - "Person", - "OrdinalNumber", - "DestinationLocationCountry", - "IsDefaultPhoneNumber", - "PhoneNumber", - "PhoneNumberExtension", - "InternationalPhoneNumber", - "PhoneNumberType", - "AddressCommunicationRemarkText", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_AddressPhoneNumber") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BPAddrDepdntIntlLocNumberTable(CustomAPITable): - """address dependent data for the business partner address by using the key fields business partner number and address ID""" - - name: str = "bp_addr_depdnt_intl_loc_number" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "AddressID", - "InternationalLocationNumber1", - "InternationalLocationNumber2", - "InternationalLocationNumber3", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BPAddrDepdntIntlLocNumber") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BPContactToAddressTable(CustomAPITable): - """Workplace address records linked to all the business partner contact records in the system.""" - - name: str = "bp_contact_to_address" - columns: List[str] = [ - "id", - "uri", - "type", - "RelationshipNumber", - "BusinessPartnerCompany", - "BusinessPartnerPerson", - "ValidityEndDate", - "AddressID", - "AddressNumber", - "AdditionalStreetPrefixName", - "AdditionalStreetSuffixName", - "AddressTimeZone", - "CareOfName", - "CityCode", - "CityName", - "CompanyPostalCode", - "Country", - "County", - "DeliveryServiceNumber", - "DeliveryServiceTypeCode", - "District", - "FormOfAddress", - "FullName", - "HomeCityName", - "HouseNumber", - "HouseNumberSupplementText", - "Language", - "POBox", - "POBoxDeviatingCityName", - "POBoxDeviatingCountry", - "POBoxDeviatingRegion", - "POBoxIsWithoutNumber", - "POBoxLobbyName", - "POBoxPostalCode", - "Person", - "PostalCode", - "PrfrdCommMediumType", - "Region", - "StreetName", - "StreetPrefixName", - "StreetSuffixName", - "TaxJurisdiction", - "TransportZone", - "AddressRepresentationCode", - "ContactPersonBuilding", - "ContactPersonPrfrdCommMedium", - "ContactRelationshipDepartment", - "ContactRelationshipFunction", - "CorrespondenceShortName", - "Floor", - "InhouseMail", - "IsDefaultAddress", - "RoomNumber", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BPContactToAddress") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BPContactToFuncAndDeptTable(CustomAPITable): - """Contact person department and function data linked to all business partner contact records in the system""" - - name: str = "bp_contact_to_func_and_dept" - columns: List[str] = [ - "id", - "uri", - "type", - "RelationshipNumber", - "BusinessPartnerCompany", - "BusinessPartnerPerson", - "ValidityEndDate", - "ContactPersonAuthorityType", - "ContactPersonDepartment", - "ContactPersonDepartmentName", - "ContactPersonFunction", - "ContactPersonFunctionName", - "ContactPersonRemarkText", - "ContactPersonVIPType", - "EmailAddress", - "FaxNumber", - "FaxNumberExtension", - "PhoneNumber", - "PhoneNumberExtension", - "RelationshipCategory", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BPContactToFuncAndDept") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BPCreditWorthinessTable(CustomAPITable): - """Contact person department and function data linked to all business partner contact records in the system""" - - name: str = "bp_credit_worthiness" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "BusPartCreditStanding", - "BPCreditStandingStatus", - "CreditRatingAgency", - "BPCreditStandingComment", - "BPCreditStandingDate", - "BPCreditStandingRating", - "BPLegalProceedingStatus", - "BPLglProceedingInitiationDate", - "BusinessPartnerIsUnderOath", - "BusinessPartnerOathDate", - "BusinessPartnerIsBankrupt", - "BusinessPartnerBankruptcyDate", - "BPForeclosureIsInitiated", - "BPForeclosureDate", - "BPCrdtWrthnssAccessChkIsActive", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BPCreditWorthiness") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BPDataControllerTable(CustomAPITable): - """Business partner data controllers of all the available records linked to business partners in the system""" - - name: str = "bp_data_controller" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "DataController", - "PurposeForPersonalData", - "DataControlAssignmentStatus", - "BPDataControllerIsDerived", - "PurposeDerived", - "PurposeType", - "BusinessPurposeFlag", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BPDataController") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BPFinancialServicesExtnTable(CustomAPITable): - """Financial services business partner attributes of all the available records linked to business partners in the system""" - - name: str = "bp_financial_services_extn" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "BusinessPartnerIsVIP", - "TradingPartner", - "FactoryCalendar", - "BusinessPartnerOfficeCountry", - "BusinessPartnerOfficeRegion", - "BPRegisteredOfficeName", - "BPBalanceSheetCurrency", - "BPLastCptlIncrAmtInBalShtCrcy", - "BPLastCapitalIncreaseYear", - "BPBalanceSheetDisplayType", - "BusinessPartnerCitizenship", - "BPMaritalPropertyRegime", - "BusinessPartnerIncomeCurrency", - "BPNumberOfChildren", - "BPNumberOfHouseholdMembers", - "BPAnnualNetIncAmtInIncomeCrcy", - "BPMonthlyNetIncAmtInIncomeCrcy", - "BPAnnualNetIncomeYear", - "BPMonthlyNetIncomeMonth", - "BPMonthlyNetIncomeYear", - "BPPlaceOfDeathName", - "CustomerIsUnwanted", - "UndesirabilityReason", - "UndesirabilityComment", - "LastCustomerContactDate", - "BPGroupingCharacter", - "BPLetterSalutation", - "BusinessPartnerTargetGroup", - "BusinessPartnerEmployeeGroup", - "BusinessPartnerIsEmployee", - "BPTermnBusRelationsBankDate", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BPFinancialServicesExtn") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BPFinancialServicesReportingTable(CustomAPITable): - """Financial services reporting attributes of all the available records linked to business partners in the system""" - - name: str = "bp_financial_services_reporting" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "BPIsNonResident", - "BPNonResidencyStartDate", - "BPIsMultimillionLoanRecipient", - "BPLoanReportingBorrowerNumber", - "BPLoanRptgBorrowerEntityNumber", - "BPCreditStandingReview", - "BPCreditStandingReviewDate", - "BusinessPartnerLoanToManager", - "BPCompanyRelationship", - "BPLoanReportingCreditorNumber", - "BPOeNBIdentNumber", - "BPOeNBTargetGroup", - "BPOeNBIdentNumberAssigned", - "BPOeNBInstituteNumber", - "BusinessPartnerIsOeNBInstitute", - "BusinessPartnerGroup", - "BPGroupAssignmentCategory", - "BusinessPartnerGroupName", - "BusinessPartnerLegalEntity", - "BPGerAstRglnRestrictedAstQuota", - "BusinessPartnerDebtorGroup", - "BusinessPartnerBusinessPurpose", - "BusinessPartnerRiskGroup", - "BPRiskGroupingDate", - "BPHasGroupAffiliation", - "BPIsMonetaryFinInstitution", - "BPCrdtStandingReviewIsRequired", - "BPLoanMonitoringIsRequired", - "BPHasCreditingRelief", - "BPInvestInRstrcdAstIsAuthzd", - "BPCentralBankCountryRegion", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BPFinancialServicesReporting") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BPFiscalYearInformationTable(CustomAPITable): - """Business partner fiscal year information of all the available records linked to business partners in the system.""" - - name: str = "bp_fiscal_year_information" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "BusinessPartnerFiscalYear", - "BPBalanceSheetCurrency", - "BPAnnualStockholderMeetingDate", - "BPFiscalYearStartDate", - "BPFiscalYearEndDate", - "BPFiscalYearIsClosed", - "BPFiscalYearClosingDate", - "BPFsclYrCnsldtdFinStatementDte", - "BPCapitalStockAmtInBalShtCrcy", - "BPIssdStockCptlAmtInBalShtCrcy", - "BPPartcipnCertAmtInBalShtCrcy", - "BPEquityCapitalAmtInBalShtCrcy", - "BPGrossPremiumAmtInBalShtCrcy", - "BPNetPremiumAmtInBalShtCrcy", - "BPAnnualSalesAmtInBalShtCrcy", - "BPAnnualNetIncAmtInBalShtCrcy", - "BPDividendDistrAmtInBalShtCrcy", - "BPDebtRatioInYears", - "BPAnnualPnLAmtInBalShtCrcy", - "BPBalSheetTotalAmtInBalShtCrcy", - "BPNumberOfEmployees", - "BPCptlReserveAmtInBalShtCrcy", - "BPLglRevnRsrvAmtInBalShtCrcy", - "RevnRsrvOwnStkAmtInBalShtCrcy", - "BPStatryReserveAmtInBalShtCrcy", - "BPOthRevnRsrvAmtInBalShtCrcy", - "BPPnLCarryfwdAmtInBalShtCrcy", - "BPSuborddLbltyAmtInBalShtCrcy", - "BPRetOnTotalCptlEmpldInPercent", - "BPDebtClearancePeriodInYears", - "BPFinancingCoeffInPercent", - "BPEquityRatioInPercent", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BPFiscalYearInformation") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BPRelationshipTable(CustomAPITable): - """Business partner relationship data fields of all the available records in the system""" - - name: str = "bp_relationship" - columns: List[str] = [ - "id", - "uri", - "type", - "RelationshipNumber", - "BusinessPartner1", - "BusinessPartner2", - "ValidityEndDate", - "ValidityStartDate", - "IsStandardRelationship", - "RelationshipCategory", - "BPRelationshipType", - "CreatedByUser", - "CreationDate", - "CreationTime", - "LastChangedByUser", - "LastChangeDate", - "LastChangeTime", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BPRelationship") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BuPaAddressUsageTable(CustomAPITable): - """All the address usage records linked to all business partner address records in the system""" - - name: str = "bu_pa_address_usage" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "ValidityEndDate", - "AddressUsage", - "AddressID", - "ValidityStartDate", - "StandardUsage", - "AuthorizationGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BuPaAddressUsage") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BuPaIdentificationTable(CustomAPITable): - """Business partner identification data fields of all the records available records in the system""" - - name: str = "bu_pa_identification" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "BPIdentificationType", - "BPIdentificationNumber", - "BPIdnNmbrIssuingInstitute", - "BPIdentificationEntryDate", - "Country", - "Region", - "ValidityStartDate", - "ValidityEndDate", - "AuthorizationGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BuPaIdentification") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BuPaIndustryTable(CustomAPITable): - """Business partner industry data fields of all the available records in the system""" - - name: str = "bu_pa_industry" - columns: List[str] = [ - "id", - "uri", - "type", - "IndustrySector", - "IndustrySystemType", - "BusinessPartner", - "IsStandardIndustry", - "IndustryKeyDescription", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BuPaIndustry") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BusinessPartnerTable(CustomAPITable): - """General data fields of all the business partner records available in the system""" - - name: str = "business_partner" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "Customer", - "Supplier", - "AcademicTitle", - "AuthorizationGroup", - "BusinessPartnerCategory", - "BusinessPartnerFullName", - "BusinessPartnerGrouping", - "BusinessPartnerName", - "BusinessPartnerUUID", - "CorrespondenceLanguage", - "CreatedByUser", - "CreationDate", - "CreationTime", - "FirstName", - "FormOfAddress", - "Industry", - "InternationalLocationNumber1", - "InternationalLocationNumber2", - "IsFemale", - "IsMale", - "IsNaturalPerson", - "IsSexUnknown", - "GenderCodeName", - "Language", - "LastChangeDate", - "LastChangeTime", - "LastChangedByUser", - "LastName", - "LegalForm", - "OrganizationBPName1", - "OrganizationBPName2", - "OrganizationBPName3", - "OrganizationBPName4", - "OrganizationFoundationDate", - "OrganizationLiquidationDate", - "SearchTerm1", - "SearchTerm2", - "AdditionalLastName", - "BirthDate", - "BusinessPartnerBirthDateStatus", - "BusinessPartnerBirthplaceName", - "BusinessPartnerDeathDate", - "BusinessPartnerIsBlocked", - "BusinessPartnerType", - "ETag", - "GroupBusinessPartnerName1", - "GroupBusinessPartnerName2", - "IndependentAddressID", - "InternationalLocationNumber3", - "MiddleName", - "NameCountry", - "NameFormat", - "PersonFullName", - "PersonNumber", - "IsMarkedForArchiving", - "BusinessPartnerIDByExtSystem", - "BusinessPartnerPrintFormat", - "BusinessPartnerOccupation", - "BusPartMaritalStatus", - "BusPartNationality", - "BusinessPartnerBirthName", - "BusinessPartnerSupplementName", - "NaturalPersonEmployerName", - "LastNamePrefix", - "LastNameSecondPrefix", - "Initials", - "BPDataControllerIsNotRequired", - "TradingPartner", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BusinessPartner") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BusinessPartnerAddressTable(CustomAPITable): - """Business partner address data fields of all the available records in the system""" - - name: str = "business_partner_address" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "AddressID", - "ValidityStartDate", - "ValidityEndDate", - "AuthorizationGroup", - "AddressUUID", - "AdditionalStreetPrefixName", - "AdditionalStreetSuffixName", - "AddressTimeZone", - "CareOfName", - "CityCode", - "CityName", - "CompanyPostalCode", - "Country", - "County", - "DeliveryServiceNumber", - "DeliveryServiceTypeCode", - "District", - "FormOfAddress", - "FullName", - "HomeCityName", - "HouseNumber", - "HouseNumberSupplementText", - "Language", - "POBox", - "POBoxDeviatingCityName", - "POBoxDeviatingCountry", - "POBoxDeviatingRegion", - "POBoxIsWithoutNumber", - "POBoxLobbyName", - "POBoxPostalCode", - "Person", - "PostalCode", - "PrfrdCommMediumType", - "Region", - "StreetName", - "StreetPrefixName", - "StreetSuffixName", - "TaxJurisdiction", - "TransportZone", - "AddressIDByExternalSystem", - "CountyCode", - "TownshipCode", - "TownshipName", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BusinessPartnerAddress") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BusinessPartnerContactTable(CustomAPITable): - """Business partner contact data fields of all the available records in the system""" - - name: str = "business_partner_contact" - columns: List[str] = [ - "id", - "uri", - "type", - "RelationshipNumber", - "BusinessPartnerCompany", - "BusinessPartnerPerson", - "ValidityEndDate", - "ValidityStartDate", - "IsStandardRelationship", - "RelationshipCategory", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BusinessPartnerContact") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BusinessPartnerPaymentCardTable(CustomAPITable): - """Business partner payment card data fields of all the available records in the system""" - - name: str = "business_partner_payment_card" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "PaymentCardID", - "PaymentCardType", - "CardNumber", - "IsStandardCard", - "CardDescription", - "ValidityDate", - "ValidityEndDate", - "CardHolder", - "CardIssuingBank", - "CardIssueDate", - "PaymentCardLock", - "MaskedCardNumber", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BusinessPartnerPaymentCard") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BusinessPartnerRatingTable(CustomAPITable): - """Business partner ratings of all the available records linked to business partners in the system""" - - name: str = "business_partner_rating" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "BusinessPartnerRatingProcedure", - "BPRatingValidityEndDate", - "BusinessPartnerRatingGrade", - "BusinessPartnerRatingTrend", - "BPRatingValidityStartDate", - "BPRatingCreationDate", - "BusinessPartnerRatingComment", - "BusinessPartnerRatingIsAllowed", - "BPRatingIsValidOnKeyDate", - "BusinessPartnerRatingKeyDate", - "BusinessPartnerRatingIsExpired", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BusinessPartnerRating") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BusinessPartnerRoleTable(CustomAPITable): - """Business partner role data fields of all the records available records in the system""" - - name: str = "business_partner_role" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "BusinessPartnerRole", - "ValidFrom", - "ValidTo", - "AuthorizationGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BusinessPartnerRole") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BusinessPartnerTaxNumberTable(CustomAPITable): - """Tax number data of all the available records linked to business partners in the system""" - - name: str = "business_partner_tax_number" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "BPTaxType", - "BPTaxNumber", - "BPTaxLongNumber", - "AuthorizationGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BusinessPartnerTaxNumber") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class BusPartAddrDepdntTaxNumberTable(CustomAPITable): - """Address dependent tax number data of all the available records linked to business partners in the system""" - - name: str = "business_partner_address_dependent_tax_number" - columns: List[str] = [ - "id", - "uri", - "type", - "BusinessPartner", - "AddressID", - "BPTaxType", - "BPTaxNumber", - "BPTaxLongNumber", - "AuthorizationGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_BusPartAddrDepdntTaxNmbr") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustAddrDepdntExtIdentifierTable(CustomAPITable): - """Address dependent external identifiers of all the available records linked to customers in the system""" - - name: str = "cust_addr_depdnt_ext_identifier" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "AddressID", - "CustomerExternalRefID", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustAddrDepdntExtIdentifier") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustAddrDepdntInformationTable(CustomAPITable): - """General data of all the customer records available in the system""" - - name: str = "customer" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "AuthorizationGroup", - "BillingIsBlockedForCustomer", - "CreatedByUser", - "CreationDate", - "CustomerAccountGroup", - "CustomerClassification", - "CustomerFullName", - "BPCustomerFullName", - "CustomerName", - "BPCustomerName", - "DeliveryIsBlocked", - "FreeDefinedAttribute01", - "FreeDefinedAttribute02", - "FreeDefinedAttribute03", - "FreeDefinedAttribute04", - "FreeDefinedAttribute05", - "FreeDefinedAttribute06", - "FreeDefinedAttribute07", - "FreeDefinedAttribute08", - "FreeDefinedAttribute09", - "FreeDefinedAttribute10", - "NFPartnerIsNaturalPerson", - "OrderIsBlockedForCustomer", - "PostingIsBlocked", - "Supplier", - "CustomerCorporateGroup", - "FiscalAddress", - "Industry", - "IndustryCode1", - "IndustryCode2", - "IndustryCode3", - "IndustryCode4", - "IndustryCode5", - "InternationalLocationNumber1", - "InternationalLocationNumber2", - "InternationalLocationNumber3", - "NielsenRegion", - "PaymentReason", - "ResponsibleType", - "TaxNumber1", - "TaxNumber2", - "TaxNumber3", - "TaxNumber4", - "TaxNumber5", - "TaxNumberType", - "VATRegistration", - "DeletionIndicator", - "ExpressTrainStationName", - "TrainStationName", - "CityCode", - "County", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_Customer") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustomerCompanyTable(CustomAPITable): - """Customer company data fields of all the available records in the system linked to customer""" - - name: str = "customer_company" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "CompanyCode", - "APARToleranceGroup", - "AccountByCustomer", - "AccountingClerk", - "AccountingClerkFaxNumber", - "AccountingClerkInternetAddress", - "AccountingClerkPhoneNumber", - "AlternativePayerAccount", - "AuthorizationGroup", - "CollectiveInvoiceVariant", - "CustomerAccountNote", - "CustomerHeadOffice", - "CustomerSupplierClearingIsUsed", - "HouseBank", - "InterestCalculationCode", - "InterestCalculationDate", - "IntrstCalcFrequencyInMonths", - "IsToBeLocallyProcessed", - "ItemIsToBePaidSeparately", - "LayoutSortingRule", - "PaymentBlockingReason", - "PaymentMethodsList", - "PaymentReason", - "PaymentTerms", - "PaytAdviceIsSentbyEDI", - "PhysicalInventoryBlockInd", - "ReconciliationAccount", - "RecordPaymentHistoryIndicator", - "UserAtCustomer", - "DeletionIndicator", - "CashPlanningGroup", - "KnownOrNegotiatedLeave", - "ValueAdjustmentKey", - "CustomerAccountGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustomerCompany") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustomerCompanyTextTable(CustomAPITable): - """Customer company text records attached to customer company in the system""" - - name: str = "customer_company_text" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "CompanyCode", - "Language", - "LongTextID", - "LongText", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustomerCompanyText") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustomerDunningTable(CustomAPITable): - """Dunning records attached to customer company in the system""" - - name: str = "customer_dunning" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "CompanyCode", - "DunningArea", - "DunningBlock", - "DunningLevel", - "DunningProcedure", - "DunningRecipient", - "LastDunnedOn", - "LegDunningProcedureOn", - "DunningClerk", - "AuthorizationGroup", - "CustomerAccountGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustomerDunning") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustomerSalesAreaTable(CustomAPITable): - """Customer sales area data fields of all the available records in the system""" - - name: str = "customer_sales_area" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "SalesOrganization", - "DistributionChannel", - "Division", - "AccountByCustomer", - "AuthorizationGroup", - "BillingIsBlockedForCustomer", - "CompleteDeliveryIsDefined", - "CreditControlArea", - "Currency", - "CustIsRlvtForSettlmtMgmt", - "CustomerABCClassification", - "CustomerAccountAssignmentGroup", - "CustomerGroup", - "CustomerIsRebateRelevant", - "CustomerPaymentTerms", - "CustomerPriceGroup", - "CustomerPricingProcedure", - "CustProdProposalProcedure", - "DeliveryIsBlockedForCustomer", - "DeliveryPriority", - "IncotermsClassification", - "IncotermsLocation2", - "IncotermsVersion", - "IncotermsLocation1", - "IncotermsSupChnLoc1AddlUUID", - "IncotermsSupChnLoc2AddlUUID", - "IncotermsSupChnDvtgLocAddlUUID", - "DeletionIndicator", - "IncotermsTransferLocation", - "InspSbstHasNoTimeOrQuantity", - "InvoiceDate", - "ItemOrderProbabilityInPercent", - "ManualInvoiceMaintIsRelevant", - "MaxNmbrOfPartialDelivery", - "OrderCombinationIsAllowed", - "OrderIsBlockedForCustomer", - "OverdelivTolrtdLmtRatioInPct", - "PartialDeliveryIsAllowed", - "PriceListType", - "ProductUnitGroup", - "ProofOfDeliveryTimeValue", - "SalesGroup", - "SalesItemProposal", - "SalesOffice", - "ShippingCondition", - "SlsDocIsRlvtForProofOfDeliv", - "SlsUnlmtdOvrdelivIsAllwd", - "SupplyingPlant", - "SalesDistrict", - "UnderdelivTolrtdLmtRatioInPct", - "InvoiceListSchedule", - "ExchangeRateType", - "AdditionalCustomerGroup1", - "AdditionalCustomerGroup2", - "AdditionalCustomerGroup3", - "AdditionalCustomerGroup4", - "AdditionalCustomerGroup5", - "PaymentGuaranteeProcedure", - "CustomerAccountGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustomerSalesArea") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustomerSalesAreaTaxTable(CustomAPITable): - """Customer sales area tax data fields of all the available records in the system""" - - name: str = "customer_sales_area_tax" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "SalesOrganization", - "DistributionChannel", - "Division", - "DepartureCountry", - "CustomerTaxCategory", - "CustomerTaxClassification", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustomerSalesAreaTax") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustomerSalesAreaTextTable(CustomAPITable): - """Customer sales area text fields of all the available records in the system linked to customer sales areas""" - - name: str = "customer_sales_area_text" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "SalesOrganization", - "DistributionChannel", - "Division", - "Language", - "LongTextID", - "LongText", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustomerSalesAreaText") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustomerTaxGroupingTable(CustomAPITable): - """Customer tax grouping data attached to a customer in the system""" - - name: str = "customer_tax_grouping" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "CustomerTaxGroupingCode", - "CustTaxGrpExemptionCertificate", - "CustTaxGroupExemptionRate", - "CustTaxGroupExemptionStartDate", - "CustTaxGroupExemptionEndDate", - "CustTaxGroupSubjectedStartDate", - "CustTaxGroupSubjectedEndDate", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustomerTaxGrouping") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustomerTextTable(CustomAPITable): - """Customer text data attached to a customer in the system""" - - name: str = "customer_text" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "Language", - "LongTextID", - "LongText", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustomerText") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustomerUnloadingPointTable(CustomAPITable): - """Unloading point data attached to a customer in the system""" - - name: str = "customer_unloading_point" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "UnloadingPointName", - "CustomerFactoryCalenderCode", - "BPGoodsReceivingHoursCode", - "IsDfltBPUnloadingPoint", - "MondayMorningOpeningTime", - "MondayMorningClosingTime", - "MondayAfternoonOpeningTime", - "MondayAfternoonClosingTime", - "TuesdayMorningOpeningTime", - "TuesdayMorningClosingTime", - "TuesdayAfternoonOpeningTime", - "TuesdayAfternoonClosingTime", - "WednesdayMorningOpeningTime", - "WednesdayMorningClosingTime", - "WednesdayAfternoonOpeningTime", - "WednesdayAfternoonClosingTime", - "ThursdayMorningOpeningTime", - "ThursdayMorningClosingTime", - "ThursdayAfternoonOpeningTime", - "ThursdayAfternoonClosingTime", - "FridayMorningOpeningTime", - "FridayMorningClosingTime", - "FridayAfternoonOpeningTime", - "FridayAfternoonClosingTime", - "SaturdayMorningOpeningTime", - "SaturdayMorningClosingTime", - "SaturdayAfternoonOpeningTime", - "SaturdayAfternoonClosingTime", - "SundayMorningOpeningTime", - "SundayMorningClosingTime", - "SundayAfternoonOpeningTime", - "SundayAfternoonClosingTime", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustomerUnloadingPoint") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustomerWithHoldingTaxTable(CustomAPITable): - """Withholding tax records attached to customer company in the system""" - - name: str = "customer_withholding_tax" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "CompanyCode", - "WithholdingTaxType", - "WithholdingTaxCode", - "WithholdingTaxAgent", - "ObligationDateBegin", - "ObligationDateEnd", - "WithholdingTaxNumber", - "WithholdingTaxCertificate", - "WithholdingTaxExmptPercent", - "ExemptionDateBegin", - "ExemptionDateEnd", - "ExemptionReason", - "AuthorizationGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustomerWithHoldingTax") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustSalesPartnerFuncTable(CustomAPITable): - """Partner function fields of all the available records in the system linked to customer sales areas""" - - name: str = "customer_sales_partner_func" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "SalesOrganization", - "DistributionChannel", - "Division", - "PartnerCounter", - "PartnerFunction", - "BPCustomerNumber", - "CustomerPartnerDescription", - "DefaultPartner", - "Supplier", - "PersonnelNumber", - "ContactPerson", - "AddressID", - "AuthorizationGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustSalesPartnerFunc") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustSlsAreaAddrDepdntInfoTable(CustomAPITable): - """Address dependent customer sales area data fields of all the available records in the system""" - - name: str = "customer_sales_area_addr_depdnt_info" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "SalesOrganization", - "DistributionChannel", - "Division", - "AddressID", - "IncotermsClassification", - "IncotermsLocation1", - "IncotermsLocation2", - "IncotermsSupChnLoc1AddlUUID", - "IncotermsSupChnLoc2AddlUUID", - "IncotermsSupChnDvtgLocAddlUUID", - "DeliveryIsBlocked", - "SalesOffice", - "SalesGroup", - "ShippingCondition", - "SupplyingPlant", - "IncotermsVersion", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustSlsAreaAddrDepdntInfo") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustSlsAreaAddrDepdntTaxInfoTable(CustomAPITable): - """Address dependent customer sales area tax data fields of all the available records in the system""" - - name: str = "customer_sales_area_addr_depdnt_tax_info" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "SalesOrganization", - "DistributionChannel", - "Division", - "AddressID", - "DepartureCountry", - "CustomerTaxCategory", - "CustomerTaxClassification", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustSlsAreaAddrDepdntTaxInfo") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class CustUnldgPtAddrDepdntInfoTable(CustomAPITable): - """Address dependent customer unloading point data fields of all the available records in the system""" - - name: str = "customer_unloading_point_addr_depdnt_info" - columns: List[str] = [ - "id", - "uri", - "type", - "Customer", - "AddressID", - "UnloadingPointName", - "CustomerFactoryCalenderCode", - "BPGoodsReceivingHoursCode", - "IsDfltBPUnloadingPoint", - "MondayMorningOpeningTime", - "MondayMorningClosingTime", - "MondayAfternoonOpeningTime", - "MondayAfternoonClosingTime", - "TuesdayMorningOpeningTime", - "TuesdayMorningClosingTime", - "TuesdayAfternoonOpeningTime", - "TuesdayAfternoonClosingTime", - "WednesdayMorningOpeningTime", - "WednesdayMorningClosingTime", - "WednesdayAfternoonOpeningTime", - "WednesdayAfternoonClosingTime", - "ThursdayMorningOpeningTime", - "ThursdayMorningClosingTime", - "ThursdayAfternoonOpeningTime", - "ThursdayAfternoonClosingTime", - "FridayMorningOpeningTime", - "FridayMorningClosingTime", - "FridayAfternoonOpeningTime", - "FridayAfternoonClosingTime", - "SaturdayMorningOpeningTime", - "SaturdayMorningClosingTime", - "SaturdayAfternoonOpeningTime", - "SaturdayAfternoonClosingTime", - "SundayMorningOpeningTime", - "SundayMorningClosingTime", - "SundayAfternoonOpeningTime", - "SundayAfternoonClosingTime", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_CustUnldgPtAddrDepdntInfo") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class SupplierTable(CustomAPITable): - """General data of all the supplier records available in the system""" - - name: str = "supplier" - columns: List[str] = [ - "id", - "uri", - "type", - "Supplier", - "AlternativePayeeAccountNumber", - "AuthorizationGroup", - "CreatedByUser", - "CreationDate", - "Customer", - "PaymentIsBlockedForSupplier", - "PostingIsBlocked", - "PurchasingIsBlocked", - "SupplierAccountGroup", - "SupplierFullName", - "SupplierName", - "VATRegistration", - "BirthDate", - "ConcatenatedInternationalLocNo", - "DeletionIndicator", - "FiscalAddress", - "Industry", - "InternationalLocationNumber1", - "InternationalLocationNumber2", - "InternationalLocationNumber3", - "IsNaturalPerson", - "PaymentReason", - "ResponsibleType", - "SuplrQltyInProcmtCertfnValidTo", - "SuplrQualityManagementSystem", - "SupplierCorporateGroup", - "SupplierProcurementBlock", - "TaxNumber1", - "TaxNumber2", - "TaxNumber3", - "TaxNumber4", - "TaxNumber5", - "TaxNumberResponsible", - "TaxNumberType", - "SuplrProofOfDelivRlvtCode", - "BR_TaxIsSplit", - "DataExchangeInstructionKey", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_Supplier") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class SupplierCompanyTable(CustomAPITable): - """supplier company data available in the system""" - - name: str = "supplier_company" - columns: List[str] = [ - "id", - "uri", - "type", - "Supplier", - "CompanyCode", - "AuthorizationGroup", - "CompanyCodeName", - "PaymentBlockingReason", - "SupplierIsBlockedForPosting", - "AccountingClerk", - "AccountingClerkFaxNumber", - "AccountingClerkPhoneNumber", - "SupplierClerk", - "SupplierClerkURL", - "PaymentMethodsList", - "PaymentReason", - "PaymentTerms", - "ClearCustomerSupplier", - "IsToBeLocallyProcessed", - "ItemIsToBePaidSeparately", - "PaymentIsToBeSentByEDI", - "HouseBank", - "CheckPaidDurationInDays", - "Currency", - "BillOfExchLmtAmtInCoCodeCrcy", - "SupplierClerkIDBySupplier", - "ReconciliationAccount", - "InterestCalculationCode", - "InterestCalculationDate", - "IntrstCalcFrequencyInMonths", - "SupplierHeadOffice", - "AlternativePayee", - "LayoutSortingRule", - "APARToleranceGroup", - "SupplierCertificationDate", - "SupplierAccountNote", - "WithholdingTaxCountry", - "DeletionIndicator", - "CashPlanningGroup", - "IsToBeCheckedForDuplicates", - "MinorityGroup", - "SupplierAccountGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_SupplierCompany") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class SupplierCompanyTextTable(CustomAPITable): - """Supplier company text data attached to supplier company in the system""" - - name: str = "supplier_company_text" - columns: List[str] = [ - "id", - "uri", - "type", - "Supplier", - "CompanyCode", - "Language", - "LongTextID", - "LongText", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_SupplierCompanyText") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class SupplierDunningTable(CustomAPITable): - """Dunning records attached to supplier company in the system""" - - name: str = "supplier_dunning" - columns: List[str] = [ - "id", - "uri", - "type", - "Supplier", - "CompanyCode", - "DunningArea", - "DunningBlock", - "DunningLevel", - "DunningProcedure", - "DunningRecipient", - "LastDunnedOn", - "LegDunningProcedureOn", - "DunningClerk", - "AuthorizationGroup", - "SupplierAccountGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_SupplierDunning") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class SupplierPartnerFuncTable(CustomAPITable): - """Partner function fields of all the available records in the system linked to supplier purchasing organization""" - - name: str = "supplier_partner_func" - columns: List[str] = [ - "id", - "uri", - "type", - "Supplier", - "PurchasingOrganization", - "SupplierSubrange", - "Plant", - "PartnerFunction", - "PartnerCounter", - "DefaultPartner", - "CreationDate", - "CreatedByUser", - "ReferenceSupplier", - "AuthorizationGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_SupplierPartnerFunc") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class SupplierPurchasingOrgTable(CustomAPITable): - """Supplier purchasing organization data attached to supplier records in the system""" - - name: str = "supplier_purchasing_org" - columns: List[str] = [ - "id", - "uri", - "type", - "Supplier", - "PurchasingOrganization", - "AutomaticEvaluatedRcptSettlmt", - "CalculationSchemaGroupCode", - "DeletionIndicator", - "EvaldReceiptSettlementIsActive", - "IncotermsClassification", - "IncotermsTransferLocation", - "IncotermsVersion", - "IncotermsLocation1", - "IncotermsLocation2", - "IncotermsSupChnLoc1AddlUUID", - "IncotermsSupChnLoc2AddlUUID", - "IncotermsSupChnDvtgLocAddlUUID", - "IntrastatCrsBorderTrMode", - "InvoiceIsGoodsReceiptBased", - "InvoiceIsMMServiceEntryBased", - "MaterialPlannedDeliveryDurn", - "MinimumOrderAmount", - "PaymentTerms", - "PlanningCycle", - "PricingDateControl", - "ProdStockAndSlsDataTransfPrfl", - "ProductUnitGroup", - "PurOrdAutoGenerationIsAllowed", - "PurchaseOrderCurrency", - "PurchasingGroup", - "PurchasingIsBlockedForSupplier", - "RoundingProfile", - "ShippingCondition", - "SuplrDiscountInKindIsGranted", - "SuplrInvcRevalIsAllowed", - "SuplrIsRlvtForSettlmtMgmt", - "SuplrPurgOrgIsRlvtForPriceDetn", - "SupplierABCClassificationCode", - "SupplierAccountNumber", - "SupplierIsReturnsSupplier", - "SupplierPhoneNumber", - "SupplierRespSalesPersonName", - "SupplierConfirmationControlKey", - "IsOrderAcknRqd", - "AuthorizationGroup", - "SupplierAccountGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_SupplierPurchasingOrg") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class SupplierPurchasingOrgTextTable(CustomAPITable): - """Supplier purchasing organization text data attached to purchasing organization in the system""" - - name: str = "supplier_purchasing_org_text" - columns: List[str] = [ - "id", - "uri", - "type", - "Supplier", - "PurchasingOrganization", - "Language", - "LongTextID", - "LongText", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_SupplierPurchasingOrgText") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class SupplierTextTable(CustomAPITable): - """Supplier text data attached to purchasing organization in the system""" - - name: str = "supplier_text" - columns: List[str] = [ - "id", - "uri", - "type", - "Supplier", - "Language", - "LongTextID", - "LongText", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_SupplierText") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) - - -class SupplierWithHoldingTaxTable(CustomAPITable): - """Withholding tax records attached to supplier company in the system""" - - name: str = "supplier_withholding_tax" - columns: List[str] = [ - "id", - "uri", - "type", - "Supplier", - "CompanyCode", - "WithholdingTaxType", - "ExemptionDateBegin", - "ExemptionDateEnd", - "ExemptionReason", - "IsWithholdingTaxSubject", - "RecipientType", - "WithholdingTaxCertificate", - "WithholdingTaxCode", - "WithholdingTaxExmptPercent", - "WithholdingTaxNumber", - "AuthorizationGroup", - ] - - def __init__(self, handler: APIHandler): - super().__init__(handler) - self.connection = self.handler.connect() - - def select(self, query: ast.Select) -> pd.DataFrame: - data = self.connection.get("A_SupplierWithHoldingTax") - df = pd.DataFrame.from_records(data) - return self.apply_query_params(df, query) diff --git a/mindsdb/integrations/handlers/sap_erp_handler/tests/__init__.py b/mindsdb/integrations/handlers/sap_erp_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/scylla_handler/README.md b/mindsdb/integrations/handlers/scylla_handler/README.md deleted file mode 100644 index 26ae1998535..00000000000 --- a/mindsdb/integrations/handlers/scylla_handler/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# MindsDB ScyllaDB Handler - -This README provides details on the ScyllaDB handler integration for MindsDB. - -## Introduction to ScyllaDB - -ScyllaDB is an open-source distributed NoSQL wide-column data store. It was purposefully designed to offer compatibility with Apache Cassandra while outperforming it with higher throughputs and reduced latencies. For a comprehensive understanding of ScyllaDB, visit ScyllaDB's official website. - -### Integration Implementation - -The ScyllaDB handler for MindsDB was developed using the scylla-driver library for Python. -Connection Parameters: - -- host: Host name or IP address of ScyllaDB. -- port: Connection port -- user: Authentication username. Optional; required only if authentication is enabled. -- password: Authentication password. Optional; required only if authentication is enabled. -- keyspace: The specific keyspace (top-level container for tables) to connect to. -- protocol_version: Optional. Defaults to 4. -- secure_connect_bundle: Optional. Needed only for connections to DataStax Astra. - -## Usage Guide - -To set up a connection between MindsDB and a Scylla server, utilize the following SQL syntax: - -```sql - -CREATE DATABASE scylladb_datasource -WITH ENGINE='scylladb', -PARAMETERS={ - "user":"user@mindsdb.com", - "password": "pass", - "host": "127.0.0.1", - "port": 9042, - "keyspace": "test_data" -}; -``` - -> ℹ️ Tip: The protocol version is set to 4 by default. Should you wish to modify it, simply include "protocol_version": 5 within the PARAMETERS dictionary in the query above. - -## Querying the Keyspace: - -With the connection established, you can execute queries on your keyspace as demonstrated below: - -```sql -SELECT * FROM scylladb_datasource.keystore.example_table LIMIT 10; -``` diff --git a/mindsdb/integrations/handlers/scylla_handler/__about__.py b/mindsdb/integrations/handlers/scylla_handler/__about__.py deleted file mode 100644 index 60e50993337..00000000000 --- a/mindsdb/integrations/handlers/scylla_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Scylla handler' -__package_name__ = 'mindsdb_scylla_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Scylla" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/scylla_handler/__init__.py b/mindsdb/integrations/handlers/scylla_handler/__init__.py deleted file mode 100644 index a809585a2d9..00000000000 --- a/mindsdb/integrations/handlers/scylla_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args -try: - from .scylla_handler import ScyllaHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - - -title = 'ScyllaDB' -name = 'scylladb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/scylla_handler/connection_args.py b/mindsdb/integrations/handlers/scylla_handler/connection_args.py deleted file mode 100644 index 12ab6247776..00000000000 --- a/mindsdb/integrations/handlers/scylla_handler/connection_args.py +++ /dev/null @@ -1,50 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'User name', - 'required': True, - 'label': 'User' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'Password', - 'required': True, - 'label': 'Password', - 'secret': True - }, - protocol_version={ - 'type': ARG_TYPE.INT, - 'description': 'is not required, and default to 4.', - 'required': False, - 'label': 'Protocol version' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': ' is the host name or IP address of the ScyllaDB.', - 'required': True, - 'label': 'Host' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'Server port', - 'required': True, - 'label': 'Port' - }, - keyspace={ - 'type': ARG_TYPE.STR, - 'description': 'is the keyspace to connect to. It is a top level container for tables.', - 'required': True, - 'label': 'Keyspace' - }, - secure_connect_bundle={ - 'type': ARG_TYPE.STR, - 'description': 'Path or URL to the secure connect bundle', - 'required': True, - 'label': 'Host' - } -) diff --git a/mindsdb/integrations/handlers/scylla_handler/icon.svg b/mindsdb/integrations/handlers/scylla_handler/icon.svg deleted file mode 100644 index 3ea6d0e1a63..00000000000 --- a/mindsdb/integrations/handlers/scylla_handler/icon.svg +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/scylla_handler/requirements.txt b/mindsdb/integrations/handlers/scylla_handler/requirements.txt deleted file mode 100644 index cfb07fc70e1..00000000000 --- a/mindsdb/integrations/handlers/scylla_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -scylla-driver \ No newline at end of file diff --git a/mindsdb/integrations/handlers/scylla_handler/scylla_handler.py b/mindsdb/integrations/handlers/scylla_handler/scylla_handler.py deleted file mode 100644 index 87dd688165a..00000000000 --- a/mindsdb/integrations/handlers/scylla_handler/scylla_handler.py +++ /dev/null @@ -1,206 +0,0 @@ -import tempfile - -import pandas as pd -import requests - -from cassandra.cluster import Cluster -from cassandra.auth import PlainTextAuthProvider -from cassandra.util import Date - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser import ast -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender - -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class ScyllaHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Scylla statements. - """ - name = 'scylla' - - def __init__(self, name=None, **kwargs): - super().__init__(name) - self.parser = parse_sql - self.connection_args = kwargs.get('connection_data') - self.session = None - self.is_connected = False - - def download_secure_bundle(self, url, max_size=10 * 1024 * 1024): - """ - Downloads the secure bundle from a given URL and stores it in a temporary file. - - :param url: URL of the secure bundle to be downloaded. - :param max_size: Maximum allowable size of the bundle in bytes. Defaults to 10MB. - :return: Path to the downloaded secure bundle saved as a temporary file. - :raises ValueError: If the secure bundle size exceeds the allowed `max_size`. - - TODO: - - Find a way to periodically clean up or delete the temporary files - after they have been used to prevent filling up storage over time. - """ - response = requests.get(url, stream=True, timeout=10) - response.raise_for_status() - - content_length = int(response.headers.get('content-length', 0)) - if content_length > max_size: - raise ValueError("Secure bundle is larger than the allowed size!") - - with tempfile.NamedTemporaryFile(delete=False) as temp_file: - size_downloaded = 0 - for chunk in response.iter_content(chunk_size=8192): - size_downloaded += len(chunk) - if size_downloaded > max_size: - raise ValueError("Secure bundle is larger than the allowed size!") - temp_file.write(chunk) - return temp_file.name - - def connect(self): - """ - Handles the connection to a Scylla keystore. - """ - if self.is_connected is True: - return self.session - - auth_provider = None - if any(key in self.connection_args for key in ('user', 'password')): - if all(key in self.connection_args for key in ('user', 'password')): - auth_provider = PlainTextAuthProvider( - username=self.connection_args['user'], password=self.connection_args['password'] - ) - else: - raise ValueError("If authentication is required, both 'user' and 'password' must be provided!") - - connection_props = { - 'auth_provider': auth_provider - } - connection_props['protocol_version'] = self.connection_args.get('protocol_version', 4) - secure_connect_bundle = self.connection_args.get('secure_connect_bundle') - - if secure_connect_bundle: - # Check if the secure bundle is a URL - if secure_connect_bundle.startswith(('http://', 'https://')): - secure_connect_bundle = self.download_secure_bundle(secure_connect_bundle) - connection_props['cloud'] = { - 'secure_connect_bundle': secure_connect_bundle - } - else: - connection_props['contact_points'] = [self.connection_args['host']] - connection_props['port'] = int(self.connection_args['port']) - - cluster = Cluster(**connection_props) - session = cluster.connect(self.connection_args.get('keyspace')) - - self.is_connected = True - self.session = session - return self.session - - def check_connection(self) -> StatusResponse: - """ - Check the connection of the Scylla database - :return: success status and error message if error occurs - """ - response = StatusResponse(False) - - try: - session = self.connect() - # TODO: change the healthcheck - session.execute('SELECT release_version FROM system.local').one() - response.success = True - except Exception as e: - logger.error(f'Error connecting to Scylla {self.connection_args["keyspace"]}, {e}!') - response.error_message = e - - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def prepare_response(self, resp): - # replace cassandra types - data = [] - for row in resp: - row2 = {} - for k, v in row._asdict().items(): - if isinstance(v, Date): - v = v.date() - row2[k] = v - data.append(row2) - return data - - def native_query(self, query: str) -> Response: - """ - Receive SQL query and runs it - :param query: The SQL query to run in MySQL - :return: returns the records from the current recordset - """ - session = self.connect() - try: - resp = session.execute(query).all() - resp = self.prepare_response(resp) - if resp: - response = Response( - RESPONSE_TYPE.TABLE, - pd.DataFrame( - resp - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f'Error running query: {query} on {self.connection_args["keyspace"]}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - return response - - def query(self, query: ASTNode) -> Response: - """ - Retrieve the data from the SQL statement. - """ - - # remove table alias because Cassandra Query Language doesn't support it - if isinstance(query, ast.Select): - if isinstance(query.from_table, ast.Identifier) and query.from_table.alias is not None: - query.from_table.alias = None - - # remove table name from fields - table_name = query.from_table.parts[-1] - - for target in query.targets: - if isinstance(target, ast.Identifier): - if target.parts[0] == table_name: - target.parts.pop(0) - - renderer = SqlalchemyRender('mysql') - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Get a list with all of the tabels in MySQL - """ - q = "DESCRIBE TABLES;" - result = self.native_query(q) - df = result.data_frame - result.data_frame = df.rename(columns={df.columns[0]: 'table_name'}) - return result - - def get_columns(self, table_name) -> Response: - """ - Show details about the table - """ - q = f"DESCRIBE {table_name};" - result = self.native_query(q) - return result diff --git a/mindsdb/integrations/handlers/scylla_handler/tests/__init__.py b/mindsdb/integrations/handlers/scylla_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/scylla_handler/tests/test_scylla_handler.py b/mindsdb/integrations/handlers/scylla_handler/tests/test_scylla_handler.py deleted file mode 100644 index be3de5cb503..00000000000 --- a/mindsdb/integrations/handlers/scylla_handler/tests/test_scylla_handler.py +++ /dev/null @@ -1,38 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.scylla_handler.scylla_handler import ScyllaHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class ScyllaHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "", - "port": "9042", - "user": "", - "password": "", - "keyspace": "test_data" - } - } - cls.handler = ScyllaHandler('test_scylla_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.check_connection() - - def test_1_native_query_show_keyspaces(self): - dbs = self.handler.native_query("DESC KEYSPACES;") - assert dbs['type'] is not RESPONSE_TYPE.ERROR - - def test_2_get_tables(self): - tbls = self.handler.get_tables() - assert tbls['type'] is not RESPONSE_TYPE.ERROR - - def test_3_describe_table(self): - described = self.handler.get_columns("home_rentals") - assert described['type'] is RESPONSE_TYPE.TABLE - - def test_4_select_query(self): - query = "SELECT * FROM home_rentals WHERE 'id'='3712'" - result = self.handler.query(query) - assert result['type'] is RESPONSE_TYPE.TABLE diff --git a/mindsdb/integrations/handlers/sendinblue_handler/README.md b/mindsdb/integrations/handlers/sendinblue_handler/README.md deleted file mode 100644 index 35c66fc489a..00000000000 --- a/mindsdb/integrations/handlers/sendinblue_handler/README.md +++ /dev/null @@ -1,82 +0,0 @@ -# Sendinblue Handler - -Sendinblue handler for MindsDB provides interfaces to connect to Sendinblue via APIs and pull repository data into MindsDB. - ---- - -## Table of Contents - -- [Sendinblue Handler](#github-handler) - - [Table of Contents](#table-of-contents) - - [About Sendinblue](#about-githhub) - - [Sendinblue Handler Implementation](#sendinblue-handler-implementation) - - [Sendinblue Handler Initialization](#sendinblue-handler-initialization) - - [Implemented Features](#implemented-features) - - [TODO](#todo) - - [Example Usage](#example-usage) - ---- - -## About Sendinblue - -Sendinblue is the only all-in-one digital marketing platform empowering B2B and B2C businesses, ecommerce sellers and agencies to build customer relationships through end to end digital marketing campaigns, transactional messaging, and marketing automation. -
-https://www.sendinblue.com/about/ - -## Sendinblue Handler Implementation - -This handler was implemented using [sib-api-v3-sdk](https://github.com/sendinblue/APIv3-python-library), the Python SDK for Sendinblue. - -## Sendinblue Handler Initialization - -The Sendinblue handler is initialized with the following parameters: - -- `api_key`: a required Sendinblue API key to use for authentication - -Read about creating a Sendinblue API key [here](https://developers.sendinblue.com/docs). - -## Implemented Features - -- [x] Sendinblue Email Campaigns Table for a given Repository - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - -## TODO - -- [ ] Support INSERT, UPDATE and DELETE for Email Campaigns table -- [ ] Sendinblue Contacts table -- [ ] Sendinblue Companies table -- [ ] Sendinblue Conversations table -- [ ] Sendinblue Deals table -- [ ] Many more - -## Example Usage - -The first step is to create a database with the new `sendinblue` engine by passing in the required `api_key` parameter: - -~~~~sql -CREATE DATABASE sib_datasource -WITH ENGINE = 'sendinblue', -PARAMETERS = { - "api_key": "xkeysib-..." -}; -~~~~ - -Use the established connection to query your database: - -~~~~sql -SELECT * FROM sib_datasource.email_campaigns -~~~~ - -Run more advanced queries: - -~~~~sql -SELECT id, name -FROM sib_datasource.email_campaigns -WHERE status = 'sent' -ORDER BY name -LIMIT 5 -~~~~ diff --git a/mindsdb/integrations/handlers/sendinblue_handler/__about__.py b/mindsdb/integrations/handlers/sendinblue_handler/__about__.py deleted file mode 100644 index d4f0f95f892..00000000000 --- a/mindsdb/integrations/handlers/sendinblue_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Sendinblue handler" -__package_name__ = "mindsdb_sendinblue_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Sendinblue" -__author__ = "Minura Punchihewa" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/sendinblue_handler/__init__.py b/mindsdb/integrations/handlers/sendinblue_handler/__init__.py deleted file mode 100644 index 61e0aab853f..00000000000 --- a/mindsdb/integrations/handlers/sendinblue_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .sendinblue_handler import SendinblueHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Sendinblue" -name = "sendinblue" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/sendinblue_handler/icon.svg b/mindsdb/integrations/handlers/sendinblue_handler/icon.svg deleted file mode 100644 index 4de5f88af28..00000000000 --- a/mindsdb/integrations/handlers/sendinblue_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/sendinblue_handler/requirements.txt b/mindsdb/integrations/handlers/sendinblue_handler/requirements.txt deleted file mode 100644 index 6f0b49768ef..00000000000 --- a/mindsdb/integrations/handlers/sendinblue_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -sib_api_v3_sdk -urllib3>=2.6.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/mindsdb/integrations/handlers/sendinblue_handler/sendinblue_handler.py b/mindsdb/integrations/handlers/sendinblue_handler/sendinblue_handler.py deleted file mode 100644 index 81f27a9ba6d..00000000000 --- a/mindsdb/integrations/handlers/sendinblue_handler/sendinblue_handler.py +++ /dev/null @@ -1,95 +0,0 @@ -import sib_api_v3_sdk - -from mindsdb.integrations.handlers.sendinblue_handler.sendinblue_tables import EmailCampaignsTable -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) - -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - -logger = log.getLogger(__name__) - - -class SendinblueHandler(APIHandler): - """ - The Sendinblue handler implementation. - """ - - name = 'sendinblue' - - def __init__(self, name: str, **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - email_campaigns_data = EmailCampaignsTable(self) - self._register_table("email_campaigns", email_campaigns_data) - - def connect(self): - """ - Set up the connection required by the handler. - Returns - ------- - StatusResponse - connection object - """ - if self.is_connected is True: - return self.connection - - configuration = sib_api_v3_sdk.Configuration() - configuration.api_key['api-key'] = self.connection_data['api_key'] - - self.connection = sib_api_v3_sdk.ApiClient(configuration) - - self.is_connected = True - - return self.connection - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - - try: - connection = self.connect() - api_instance = sib_api_v3_sdk.AccountApi(connection) - api_instance.get_account() - response.success = True - except Exception as e: - logger.error('Error connecting to Sendinblue!') - response.error_message = str(e) - - self.is_connected = response.success - - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - Parameters - ---------- - query : str - query in a native format - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/sendinblue_handler/sendinblue_tables.py b/mindsdb/integrations/handlers/sendinblue_handler/sendinblue_tables.py deleted file mode 100644 index 139a4a99865..00000000000 --- a/mindsdb/integrations/handlers/sendinblue_handler/sendinblue_tables.py +++ /dev/null @@ -1,276 +0,0 @@ -import sib_api_v3_sdk -import pandas as pd - - -from typing import List, Dict, Text, Any -from mindsdb.utilities import log -from mindsdb.integrations.libs.api_handler import APITable - -from mindsdb_sql_parser import ast -from sib_api_v3_sdk.rest import ApiException - - -from mindsdb.integrations.utilities.handlers.query_utilities import ( - SELECTQueryParser, - SELECTQueryExecutor, - UPDATEQueryExecutor, - UPDATEQueryParser, - DELETEQueryParser, - DELETEQueryExecutor, - INSERTQueryParser, -) - -logger = log.getLogger(__name__) - - -class EmailCampaignsTable(APITable): - """The Sendinblue Email Campaigns Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the Sendinblue "GET /emailCampaigns" API endpoint. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Sendinblue Email Campaigns matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, 'email_campaigns', self.get_columns() - ) - ( - selected_columns, - where_conditions, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - email_campaigns_df = pd.json_normalize( - self.get_email_campaigns(limit=result_limit) - ) - - select_statement_executor = SELECTQueryExecutor( - email_campaigns_df, selected_columns, where_conditions, order_by_conditions - ) - email_campaigns_df = select_statement_executor.execute_query() - - return email_campaigns_df - - def get_columns(self) -> List[str]: - return pd.json_normalize(self.get_email_campaigns(limit=1)).columns.tolist() - - def get_email_campaigns(self, **kwargs): - connection = self.handler.connect() - email_campaigns_api_instance = sib_api_v3_sdk.EmailCampaignsApi(connection) - email_campaigns = email_campaigns_api_instance.get_email_campaigns(**kwargs) - return [email_campaign for email_campaign in email_campaigns.campaigns] - - def delete(self, query: ast.Delete) -> None: - """ - Deletes an email campaign from Sendinblue. - - Parameters - ---------- - query : ast.Delete - Given SQL DELETE query - - Returns - ------- - None - - Raises - ------ - RuntimeError - If an error occurs when calling Sendinblue's API - """ - # this parses the DELETE statement to extract where conditions - delete_statement_parser = DELETEQueryParser(query) - where_conditions = delete_statement_parser.parse_query() - # this retrieves the current list of email campaigns and normalize the data into a DataFrame - email_campaigns_df = pd.json_normalize(self.get_email_campaigns()) - # this execute the delete query to filter out the campaigns to be deleted - delete_query_executor = DELETEQueryExecutor( - email_campaigns_df, where_conditions - ) - # this gets the updated DataFrame after executing delete conditions - email_campaigns_df = delete_query_executor.execute_query() - campaign_ids = email_campaigns_df['id'].tolist() - self.delete_email_campaigns(campaign_ids) - - def delete_email_campaigns(self, campaign_ids: List[Text]) -> None: - # this establish a connection to Sendinblue API - connection = self.handler.connect() - email_campaigns_api_instance = sib_api_v3_sdk.EmailCampaignsApi(connection) - for campaign_id in campaign_ids: - try: - email_campaigns_api_instance.delete_email_campaign(campaign_id) - logger.info(f'Email Campaign {campaign_id} deleted') - except ApiException as e: - logger.error( - f"Exception when calling EmailCampaignsApi->delete_email_campaign: {e}\n" - ) - raise RuntimeError( - f"Failed to execute the delete command for Email Campaign {campaign_id}" - ) from e - - def update(self, query: 'ast.Update') -> None: - """ - Updates data in Sendinblue "PUT /emailCampaigns/{campaignId}" API endpoint. - - Parameters - ---------- - query : ast.Update - Given SQL UPDATE query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - RuntimeError - If an error occurs when calling Sendinblue's API - """ - # this parse the UPDATE statement to extract the new values and the conditions for the update - update_statement_parser = UPDATEQueryParser(query) - values_to_update, where_conditions = update_statement_parser.parse_query() - - email_campaigns_df = pd.json_normalize(self.get_email_campaigns()) - update_query_executor = UPDATEQueryExecutor( - email_campaigns_df, where_conditions - ) - # this retrieves the current list of email campaigns - email_campaigns_df = update_query_executor.execute_query() - # this extracts the IDs of the campaigns that have been updated - campaign_ids = email_campaigns_df['id'].tolist() - - self.update_email_campaigns(campaign_ids, values_to_update) - - def update_email_campaigns( - self, campaign_ids: List[int], values_to_update: Dict - ) -> None: - # this establish a connection to Sendinblue API - - connection = self.handler.connect() - email_campaigns_api_instance = sib_api_v3_sdk.EmailCampaignsApi(connection) - - for campaign_id in campaign_ids: - try: - email_campaigns_api_instance.update_email_campaign( - campaign_id, values_to_update - ) - logger.info(f'Email Campaign {campaign_id} updated') - except ApiException as e: - logger.error( - f"Exception when calling EmailCampaignsApi->update_email_campaign: {e}\n" - ) - raise RuntimeError( - f"Failed to execute the update command for Email Campaign {campaign_id}" - ) from e - - def insert(self, query: 'ast.Insert') -> None: - """ - Inserts new email campaigns into Sendinblue. - - Parameters - ---------- - query : ast.Insert - The SQL INSERT query to be parsed and executed. - - Raises - ------ - ValueError - If the necessary sender information is incomplete or incorrectly formatted. - Exception - For any unexpected errors during the email campaign creation. - """ - # this defines columns that are supported and mandatory for an INSERT operation. - supported_columns = [ - 'name', 'subject', 'sender_name', 'sender_email', - 'html_content', 'scheduled_at', 'recipients_lists', 'tag' - ] - mandatory_columns = ['name', 'subject', 'sender_name', 'sender_email', 'html_content'] - - # this Parse the INSERT query to extract data. - insert_statement_parser = INSERTQueryParser( - query, supported_columns=supported_columns, - mandatory_columns=mandatory_columns, all_mandatory=True - ) - email_campaigns_data = insert_statement_parser.parse_query() - - # this processes each campaign data extracted from the query. - for email_campaign_data in email_campaigns_data: - # this extracts and format sender information. - sender_info = {} - if 'sender_name' in email_campaign_data: - sender_info['name'] = email_campaign_data.pop('sender_name') - if 'sender_email' in email_campaign_data and email_campaign_data['sender_email'] is not None: - sender_info['email'] = email_campaign_data.pop('sender_email') - if 'sender_id' in email_campaign_data and email_campaign_data['sender_id'] is not None: - sender_info['id'] = email_campaign_data.pop('sender_id') - - # this validates sender information. - if not sender_info.get('name') or (not sender_info.get('email') and not sender_info.get('id')): - raise ValueError("Sender information is incomplete or incorrectly formatted.") - - email_campaign_data['sender'] = sender_info - - # this creates each email campaign. - self.create_email_campaign(email_campaign_data) - - def create_email_campaign(self, email_campaign_data: Dict[str, Any]) -> None: - """ - Creates a new email campaign in Sendinblue. - - Parameters - ---------- - email_campaign_data : Dict[str, Any] - The data for the email campaign to be created. - - Raises - ------ - Exception - For any errors during the email campaign creation process. - """ - # this establish a connection to the Sendinblue API. - api_session = self.handler.connect() - email_campaigns_api_instance = sib_api_v3_sdk.EmailCampaignsApi(api_session) - - # this logs the data for the email campaign being created. - logger.info(f"Email campaign data before creating the object: {email_campaign_data}") - - try: - # this creates the email campaign object and send it to Sendinblue. - email_campaign = sib_api_v3_sdk.CreateEmailCampaign(**email_campaign_data) - logger.info(f"Email campaign object after creation: {email_campaign}") - - # this executes the API call to create the campaign. - created_campaign = email_campaigns_api_instance.create_email_campaign(email_campaign) - - # this checks and log the response from the API. - if 'id' not in created_campaign.to_dict(): - logger.error('Email campaign creation failed') - else: - logger.info(f'Email Campaign {created_campaign.to_dict()["id"]} created') - except ApiException as e: - # this handles API exceptions and log the detailed response. - logger.error(f"Exception when calling EmailCampaignsApi->create_email_campaign: {e}") - if hasattr(e, 'body'): - logger.error(f"Sendinblue API response body: {e.body}") - raise Exception(f'Failed to create Email Campaign with data: {email_campaign_data}') from e - except Exception as e: - # this handles any other unexpected exceptions. - logger.error(f"Unexpected error occurred: {e}") - raise Exception(f'Unexpected error during Email Campaign creation: {e}') from e diff --git a/mindsdb/integrations/handlers/serpstack_handler/README.md b/mindsdb/integrations/handlers/serpstack_handler/README.md deleted file mode 100644 index 6db2284f49b..00000000000 --- a/mindsdb/integrations/handlers/serpstack_handler/README.md +++ /dev/null @@ -1,120 +0,0 @@ -# Serpstack Handler - -Serpstack handler for MindsDB connects to the Serpstack API and pulls data into MindsDB. - ---- - -## Table of Contents - -- [Serpstack Handler](#serpstack-handler) - - [Table of Contents](#table-of-contents) - - [About Serpstack](#about-serpstack) - - [Serpstack Handler Implementation](#serpstack-handler-implementation) - - [Serpstack Handler Initialisation](#serpstack-handler-initialisation) - - [How to Get Your Serpstack Access Key](#how-to-get-your-serpstack-access-key) - - [Implemented Features](#implemented-features) - - [TODO](#todo) - - [Example Usage](#example-usage) - ---- - -## About Serpstack - -Serpstack is a real-time search engine results API that provides accurate and reliable search data. It allows users to perform keyword searches and retrieve various types of search results including organic results, images, videos, news, and shopping results etc. - -## Serpstack Handler Implementation - -This handler was implemented using the Serpstack API. The API provides a simple and efficient way to access search engine results programmatically. - -## Serpstack Handler Initialisation - -The Serpstack handler is initialised with the following parameter: - -- `access_key`: a required API access key for the Serpstack API - -## How to Get Your Serpstack Access Key - -1. Sign up for an account on [Serpstack](https://serpstack.com). -2. Choose the plan that suits your needs. -3. Once the account is created, an access key will be generated for you. This key is required to authenticate API requests. - -## Implemented Features - -- Fetch organic search results based on a query. -- Fetch image search results based on a query. -- Fetch video search results based on a query. -- Fetch news search results based on a query. -- Fetch shopping search results based on a query. - -## TODO - -- Potentially create more tables for: - - Knowledge graph - - Inline tweets - - Related searches - - Questions - -## Example Usage - -### Create the Serpstack Database - -```sql -CREATE DATABASE my_serpstack -WITH - ENGINE = 'serpstack', - PARAMETERS = { - "access_key": "YOUR_ACCESS_KEY" - }; -``` - -After setting up the Serpstack handler, the user can use SQL queries to fetch data from search engines using Serpstack. Note that the `query` parameter is required in order to make searches. If no results are found for a given query, a table filled with 'No results found' will be returned instead. - -### Fetch Organic Search Results - -```sql -SELECT * -FROM my_serpstack.organic_results -WHERE query = 'KFC'; -``` - -### Fetch Image Search Results - -```sql -SELECT * -FROM my_serpstack.image_results -WHERE query = 'Dinosaurs'; -``` - -### Fetch Video Search Results - -```sql -SELECT * -FROM my_serpstack.video_results -WHERE query = 'NBA Finals'; -``` - -### Fetch News Search Results - -```sql -SELECT * -FROM my_serpstack.news_results -WHERE query = 'Euros 2024'; -``` - -### Fetch Shopping Search Results - -```sql -SELECT * -FROM my_serpstack.shopping_results -WHERE query = 'Nvidia 4090'; -``` - -### Example with Additional Parameters - -You can include additional request parameters in your SQL queries. Refer to the [Serpstack documentation](https://serpstack.com/documentation) under **HTTP GET Request Parameters** for a full list of available parameters. - -```sql -SELECT * -FROM my_serpstack.organic_results -WHERE query = 'McDonalds' AND gl = 'no' AND page = '2'; -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/serpstack_handler/__about__.py b/mindsdb/integrations/handlers/serpstack_handler/__about__.py deleted file mode 100644 index 6c77916b05d..00000000000 --- a/mindsdb/integrations/handlers/serpstack_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Serpstack handler' -__package_name__ = 'mindsdb_serpstack_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Serpstack API" -__author__ = 'Anders Ooi' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2024 - mindsdb' diff --git a/mindsdb/integrations/handlers/serpstack_handler/__init__.py b/mindsdb/integrations/handlers/serpstack_handler/__init__.py deleted file mode 100644 index f06de8ab1d9..00000000000 --- a/mindsdb/integrations/handlers/serpstack_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .serpstack_handler import SerpstackHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Serpstack" -name = "serpstack" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path" -] diff --git a/mindsdb/integrations/handlers/serpstack_handler/icon.svg b/mindsdb/integrations/handlers/serpstack_handler/icon.svg deleted file mode 100644 index 110bbdeb732..00000000000 --- a/mindsdb/integrations/handlers/serpstack_handler/icon.svg +++ /dev/null @@ -1,86 +0,0 @@ - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/serpstack_handler/serpstack_handler.py b/mindsdb/integrations/handlers/serpstack_handler/serpstack_handler.py deleted file mode 100644 index 48b7199b6df..00000000000 --- a/mindsdb/integrations/handlers/serpstack_handler/serpstack_handler.py +++ /dev/null @@ -1,123 +0,0 @@ -import requests - -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - -from mindsdb.integrations.libs.api_handler import APIHandler - -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response -) - -from .serpstack_tables import ( - OrganicResultsTable, ImageResultsTable, - VideoResultsTable, NewsResultsTable, ShoppingResultsTable -) - - -logger = log.getLogger(__name__) - - -class SerpstackHandler(APIHandler): - """A class for handling connections and interactions with the Serpstack API. - Attributes: - api_key (str): API access key for the Serpstack API. - is_connected (bool): Whether or not the API client is connected to Serpstack. - """ - - name = 'serpstack' - - def __init__(self, name: str = None, **kwargs): - super().__init__(name) - - connection_data = kwargs.get('connection_data', {}) - self.connection_data = connection_data - - self.access_key = None - self.base_url = None - self.is_connected = False - - if 'access_key' in self.connection_data: - self.access_key = self.connection_data['access_key'] - - # register tables - organic_results = OrganicResultsTable(self) - self._register_table('organic_results', organic_results) - - image_results = ImageResultsTable(self) - self._register_table('image_results', image_results) - - video_results = VideoResultsTable(self) - self._register_table('video_results', video_results) - - news_results = NewsResultsTable(self) - self._register_table('news_results', news_results) - - shopping_results = ShoppingResultsTable(self) - self._register_table('shopping_results', shopping_results) - - def connect(self): - """ Sets up connection and returns the base URL to be used""" - - if self.is_connected: - return self.base_url - - if not self.access_key: - logger.error("No access key provided for Serpstack API") - return None - - try: - url = f"https://api.serpstack.com/search?access_key={self.access_key}" - api_request = requests.get(url) - api_response = api_request.json() - # error 105 means that user is on a free plan - if api_response['error']['code'] == 105: - self.base_url = "http://api.serpstack.com/search" - # error 310 means that missing search query, which means that user can use https - elif api_response['error']['code'] == 310: - self.base_url = "https://api.serpstack.com/search" - # any other error suggests issues with the account - else: - logger.error(f"Failed to connect to Serpstack API: {api_response['error']['info']}") - return None - - self.is_connected = True - return self.base_url - - except Exception as e: - logger.error(f"Failed to connect to Serpstack API: {str(e)}") - return None - - def check_connection(self) -> StatusResponse: - """ Checks connection to Serpstack API""" - response = StatusResponse(False) - - try: - self.connect() - response.success = True - response.copy_storage = True - except Exception as e: - response.error_message = ( - f"Failed to connect to Serpstack API: {str(e)}" - ) - logger.error(response.error_message) - response.success = False - - self.is_connected = response.success - - return response - - def native_query(self, query: str = None) -> Response: - """Receive and process a raw query. - Parameters - ---------- - query : str - query in a native format - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/serpstack_handler/serpstack_tables.py b/mindsdb/integrations/handlers/serpstack_handler/serpstack_tables.py deleted file mode 100644 index b9e1def3fff..00000000000 --- a/mindsdb/integrations/handlers/serpstack_handler/serpstack_tables.py +++ /dev/null @@ -1,260 +0,0 @@ -import pandas as pd -import requests -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb_sql_parser import ast -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions - - -class BaseResultsTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - """ - Selects data from the results table and returns it as a pandas DataFrame. - - Args: - query (ast.Select): The SQL query to be executed. - - Returns: - pandas.DataFrame: A pandas DataFrame containing the selected data. - """ - base_url = self.handler.connect() - - params = {'access_key': self.handler.access_key} - conditions = extract_comparison_conditions(query.where) - params.update({condition[1]: condition[2] for condition in conditions if condition[0] == '='}) - - if 'query' not in params: - raise ValueError('Query is missing in the SQL query') - if 'type' not in params and hasattr(self, 'default_type'): - params['type'] = self.default_type - try: - api_response = requests.get(base_url, params=params) - api_response.raise_for_status() # raises HTTPError for bad responses - api_result = api_response.json() - except requests.exceptions.HTTPError as e: - raise SystemError(f"HTTP error occurred: {e.response.status_code} - {e.response.reason}") - except requests.exceptions.ConnectionError as e: - raise SystemError(f"Connection error occurred: {str(e)}") - except requests.exceptions.Timeout as e: - raise SystemError(f"Request timeout: {str(e)}") - except requests.exceptions.RequestException as e: - raise SystemError(f"Request exception occurred: {str(e)}") - except ValueError as e: - raise SystemError(f"Failed to parse JSON response: {str(e)}") - - results = api_result.get(self.results_key, []) - processed_results = [self.extract_data(result) for result in results] - - if len(processed_results) == 0: - columns = self.get_columns() - empty_data = {col: ["No results found"] for col in columns} - return pd.DataFrame(empty_data, columns=columns) - - result_df = pd.DataFrame(processed_results) - result_df = self.filter_columns(result_df, query) - return result_df - - def extract_data(self, data): - """ - Extracts the required data from the result. - - Args: - data (dict): The result data. - - Returns: - dict: A dictionary containing the extracted data. - """ - raise NotImplementedError("Subclasses must implement this method.") - - def filter_columns(self, result: pd.DataFrame, query: ast.Select = None): - """ - Filters the columns of the result DataFrame. - - Args: - result (pandas.DataFrame): The result DataFrame. - query (ast.Select): The SQL query to be executed. - - Returns: - pandas.DataFrame: A pandas DataFrame containing the filtered data. - """ - columns = [] - if query is not None: - for target in query.targets: - if isinstance(target, ast.Star): - columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - else: - columns = self.get_columns() - - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - result = result[columns] - - if query is not None and query.limit is not None: - result = result.head(query.limit.value) - - return result - - -class OrganicResultsTable(BaseResultsTable): - results_key = 'organic_results' - - def extract_data(self, organic): - return { - 'position': organic.get('position'), - 'title': organic.get('title'), - 'url': organic.get('url'), - 'domain': organic.get('domain'), - 'displayed_url': organic.get('displayed_url'), - 'snippet': organic.get('snippet'), - 'cached_page_url': organic.get('cached_page_url'), - 'related_pages_url': organic.get('related_pages_url'), - 'prerender': organic.get('prerender'), - 'sitelinks': self._extract_sitelinks(organic.get('sitelinks')), - 'rich_snippet': self._extract_rich_snippet(organic.get('rich_snippet')) - } - - def _extract_sitelinks(self, sitelinks): - if not sitelinks: - return None - return { - 'inline': [{'title': link['title'], 'url': link['url']} for link in sitelinks.get('inline', [])], - 'expanded': [{'title': link['title'], 'url': link['url']} for link in sitelinks.get('expanded', [])] - } - - def _extract_rich_snippet(self, rich_snippet): - if not rich_snippet: - return None - snippet_type = 'top' if 'top' in rich_snippet else 'bottom' - return { - 'detected_extensions': rich_snippet.get(snippet_type, {}).get('detected_extensions', []), - 'extensions': rich_snippet.get(snippet_type, {}).get('extensions', []) - } - - def get_columns(self): - return [ - 'position', - 'title', - 'url', - 'domain', - 'displayed_url', - 'snippet', - 'cached_page_url', - 'related_pages_url', - 'prerender', - 'sitelinks', - 'rich_snippet' - ] - - -class ImageResultsTable(BaseResultsTable): - results_key = 'image_results' - default_type = 'images' - - def extract_data(self, image): - return { - 'position': image.get('position'), - 'title': image.get('title'), - 'width': image.get('width'), - 'height': image.get('height'), - 'image_url': image.get('image_url'), - 'type': image.get('type'), - 'url': image.get('url'), - 'source': image.get('source') - } - - def get_columns(self): - return [ - 'position', - 'title', - 'width', - 'height', - 'image_url', - 'type', - 'url', - 'source' - ] - - -class VideoResultsTable(BaseResultsTable): - results_key = 'video_results' - default_type = 'videos' - - def extract_data(self, video): - return { - 'position': video.get('position'), - 'title': video.get('title'), - 'url': video.get('url'), - 'displayed_url': video.get('displayed_url'), - 'uploaded': video.get('uploaded'), - 'snippet': video.get('snippet'), - 'length': video.get('length') - } - - def get_columns(self): - return [ - 'position', - 'title', - 'url', - 'displayed_url', - 'uploaded', - 'snippet', - 'length' - ] - - -class NewsResultsTable(BaseResultsTable): - results_key = 'news_results' - default_type = 'news' - - def extract_data(self, news): - return { - 'position': news.get('position'), - 'title': news.get('title'), - 'url': news.get('url'), - 'source_name': news.get('source_name'), - 'uploaded': news.get('uploaded'), - 'uploaded_utc': news.get('uploaded_utc'), - 'snippet': news.get('snippet'), - 'thumbnail_url': news.get('thumbnail_url') - } - - def get_columns(self): - return [ - 'position', - 'title', - 'url', - 'source_name', - 'uploaded', - 'uploaded_utc', - 'snippet', - 'thumbnail_url' - ] - - -class ShoppingResultsTable(BaseResultsTable): - results_key = 'shopping_results' - default_type = 'shopping' - - def extract_data(self, shopping): - return { - 'position': shopping.get('position'), - 'title': shopping.get('title'), - 'url': shopping.get('url') - } - - def get_columns(self): - return [ - 'position', - 'title', - 'url' - ] diff --git a/mindsdb/integrations/handlers/sharepoint_handler/README.md b/mindsdb/integrations/handlers/sharepoint_handler/README.md deleted file mode 100644 index 738cb2e4d89..00000000000 --- a/mindsdb/integrations/handlers/sharepoint_handler/README.md +++ /dev/null @@ -1,110 +0,0 @@ -# Sharepoint Handler - -Sharepoint handler for MindsDB provides interfaces to connect to Sharepoint via graph APIs and pull data into MindsDB. - ---- - -## Table of Contents - -- [Sharepoint Handler](#Sharepoint-handler) - - [Table of Contents](#table-of-contents) - - [About Sharepoint](#about-sharepoint) - - [Sharepoint Handler Implementation](#sharepoint-handler-implementation) - - [Sharepoint Handler Initialization](#sharepoint-handler-initialization) - - [Implemented Features](#implemented-features) - - [Some useful definitions](#some-useful-definitions) - - [TODO](#todo) - - [Example Usage](#example-usage) - ---- - -## About Sharepoint - -SharePoint in Microsoft 365 empowers teamwork with dynamic and productive team sites for every project team, department, and division. Share files, data, news, and resources. Customize your site to streamline your team’s work. - -## Sharepoint Handler Implementation - -This handler was implemented using the [Microsoft Graph API](https://learn.microsoft.com/en-us/graph/use-the-api) endpoint. - -Graph API is a REST API endpoint that provides a simple and easy-to-use interface to access many microsoft tools including Sharepoint. - -## Sharepoint Handler Initialization - -The Sharepoint handler is initialized with the following parameters: - -- `clientId`: (required) Microsoft App client ID -- `clientSecret`: (required) client secret associated with the App -- `tenantId`: (required) GUID of the tenant in which the App has been created - -## How to get your credentials. - -1. Visit Microsoft Entra admin center and register a new App -2. Go to API permissions and grant all the permissions related to sharepoint sites and resources -3. Go to Certificates & secrets tab of the app and create a new client secret -4. Now go to the App overview page where you will find client-ID and tenant-ID of your App. - -## Implemented Features - -- Fetch sites associated with the account and ability to update the metadata associated with a site (deletion and creation of sites has not been implemented) -- Fetch lists associated with the account and ability to create more lists, update fields associated with lists and deletion of lists -- Fetch site columns associated with the account and ability to create more site columns, update fields associated with site columns and deletion of site columns -- Fetch list items associated with the account and ability to create more list items, update fields associated with list items and deletion of list items - - -## Some useful definitions - -### Sites: -SharePoint-sites are essentially containers for information. The way you store and organize things in SharePoint is by Sites. - -### Lists: -A list is a collection of data that you can share with your team members and people who you've provided access to. You'll find a number of ready-to-use list templates to provide a good starting point for organizing list items. - -https://support.microsoft.com/en-us/office/introduction-to-lists-0a1c3ace-def0-44af-b225-cfa8d92c52d7 - -### Site columns -A Site Column is a template of a configured column. By creating a Site Column, you can reuse it anywhere else in the site and not have to manually rebuild its configuration at each reuse. - -When creating a new column in a list or library, you have a choice to either "Create column" or "Add from existing site columns". Selecting the latter will add a replica of the Site Column to the location you are working. - -https://learn.microsoft.com/en-us/microsoft-365/community/what-is-site-column - -### List items -A SharePoint list can be considered as a collection of items. The list items can be a variety of things, such as contacts, calendars, announcements, and issues-tracking. - -https://support.microsoft.com/en-us/office/introduction-to-lists-0a1c3ace-def0-44af-b225-cfa8d92c52d7 - -### Difference between site column and list columns -The main difference between site column and list columns is the scope of use. -That is a list column will only be available to that particular list/library, and not outside that boundary. -If you wish to use that column outside that list/library, you will have to recreate it at the new location. - -Site columns on the other hand, are created at the site level, and available to reuse from the site they're created in (as the starting point). - -https://learn.microsoft.com/en-us/microsoft-365/community/list-column-or-site-column-which-one-to-choose - -## TODO - -- Update and delete a site which is functionality that is not yet supported by Graph API -- Add other tables like list columns and other components that are part of a sharepoint site -- Replace the REST calls with an SDK. Currently, the Microsoft Graph SDK is under development/preview mode. In the future, we should replace the REST calls with library's methods. - -## Example Usage -``` -CREATE DATABASE sharepoint_test -With - ENGINE = 'sharepoint', - PARAMETERS = { - "clientId":"YOUR_CLIENT_ID", - "clientSecret":"YOUR_CLIENT_SECRET", - "tenantId":"YOUR_TENANT_ID" - }; -``` - -After setting up the Sharepoint Handler, you can use SQL queries to fetch data from Sharepoint -and perform CRUD operations on it: - -Example shows how to fetch all the lists associated with the account: -```sql -SELECT * -FROM sharepoint_test.lists -``` diff --git a/mindsdb/integrations/handlers/sharepoint_handler/__about__.py b/mindsdb/integrations/handlers/sharepoint_handler/__about__.py deleted file mode 100644 index 49537068f23..00000000000 --- a/mindsdb/integrations/handlers/sharepoint_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Sharepoint handler" -__package_name__ = "mindsdb_sharepoint_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Sharepoint" -__author__ = "Abhijit Pal" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/sharepoint_handler/__init__.py b/mindsdb/integrations/handlers/sharepoint_handler/__init__.py deleted file mode 100644 index 8854ce77ae7..00000000000 --- a/mindsdb/integrations/handlers/sharepoint_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .sharepoint_handler import SharepointHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Microsoft Sharepoint" -name = "sharepoint" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/sharepoint_handler/icon.svg b/mindsdb/integrations/handlers/sharepoint_handler/icon.svg deleted file mode 100644 index f024ce4a95d..00000000000 --- a/mindsdb/integrations/handlers/sharepoint_handler/icon.svg +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_api.py b/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_api.py deleted file mode 100644 index 4651b4e8db0..00000000000 --- a/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_api.py +++ /dev/null @@ -1,579 +0,0 @@ -import ast -from datetime import datetime, timezone -from typing import Text, List, Dict, Any - -from mindsdb.integrations.handlers.sharepoint_handler.utils import ( - bearer_token_request, - get_an_entity, - delete_an_entity, - update_an_entity, - create_an_entity, -) - - -class SharepointAPI: - def __init__( - self, client_id: str = None, client_secret: str = None, tenant_id: str = None - ): - self.client_id = client_id - self.client_secret = client_secret - self.tenant_id = tenant_id - self.bearer_token = None - self.is_connected = False - self.expiration_time = datetime.now(timezone.utc).timestamp() - - def get_bearer_token(self) -> None: - """ - Generates new bearer token for the credentials - - Returns - None - """ - response = bearer_token_request( - client_id=self.client_id, - tenant_id=self.tenant_id, - client_secret=self.client_secret, - ) - self.bearer_token = response["access_token"] - self.expiration_time = int(response["expires_on"]) - self.is_connected = True - - def check_bearer_token_validity(self) -> bool: - """ - Provides information whether a valid bearer token is available or not. Returns true if available - otherwise false - - Returns - bool - """ - if ( - self.is_connected - and datetime.now(timezone.utc).astimezone().timestamp() - < self.expiration_time - ): - return True - else: - return False - - def disconnect(self) -> None: - """ - Removes bearer token from the sharepoint API class (makes it null) - - Returns - None - """ - self.bearer_token = None - self.is_connected = False - - def get_all_sites(self, limit: int = None) -> List[Dict[Text, Any]]: - """ - Gets all sites associated with the account - - limit: limits the number of site information to be returned - - Returns - response: metadata information corresponding to all sites - """ - url = "https://graph.microsoft.com/v1.0/sites?search=*" - response = get_an_entity(url=url, bearer_token=self.bearer_token) - if limit: - response = response[:limit] - return response - - def update_sites( - self, site_dict: List[Dict[Text, Text]], values_to_update: Dict[Text, Any] - ) -> None: - """ - Updates the given sites (site_dict) with the provided values (values_to_update) - Calls the function update_a_site for every site - site_dict: A dictionary containing site ids of the sites which are to be updated - values_to_update: a dictionary which will be used to update the fields of the sites - - Returns - None - """ - for site_entry in site_dict: - self.update_a_site( - site_id=site_entry["siteId"], - values_to_update=values_to_update, - ) - - def update_a_site(self, site_id: str, values_to_update: Dict[Text, Any]) -> None: - """ - Updates a site with given values - site_id: GUID of the site - values_to_update: a dictionary values which will be used to update the properties of the site - - Returns - None - """ - url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/" - update_an_entity( - url=url, values_to_update=values_to_update, bearer_token=self.bearer_token - ) - - def get_lists_by_site( - self, site_id: str, limit: int = None - ) -> List[Dict[Text, Any]]: - """ - Gets lists' information corresponding to a site - - site_id: GUID of a site - limit: limits the number of lists for which information is returned - - Returns - response: metadata information/ fields corresponding to lists of the site - """ - url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists" - response = get_an_entity(url=url, bearer_token=self.bearer_token) - if limit: - response = response[:limit] - return response - - def get_all_lists(self, limit: int = None) -> List[Dict[Text, Any]]: - """ - Gets all the lists' information assocaited with the account - - limit: puts a limit to the number of lists returned - - Returns - response: returns metadata information regarding all the lists that have been made using that account - """ - sites = self.get_all_sites() - lists = [] - for site in sites: - for list_dict in self.get_lists_by_site(site_id=site["id"].split(",")[1]): - list_dict["siteName"] = site["name"] - list_dict["siteId"] = site["id"].split(",")[1] - lists.append(list_dict) - if limit: - lists = lists[:limit] - return lists - - def delete_lists(self, list_dict: List[Dict[Text, Any]]) -> None: - """ - Deletes lists for the given site ID and list ID - - list_dict: a dictionary values containing the list IDs which are to be deleted and - their corresponding site IDs - - Returns - None - """ - for list_entry in list_dict: - self.delete_a_list(site_id=list_entry["siteId"], list_id=list_entry["id"]) - - def delete_a_list(self, site_id: str, list_id: str) -> None: - """ - Deletes a list, given its list ID and its site ID - - site_id: GUID of the site in which the list is present - list_id: GUID of the list which is to be deleted - - Returns - None - """ - url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}" - delete_an_entity(url=url, bearer_token=self.bearer_token) - - def update_lists( - self, list_dict: List[Dict[Text, Text]], values_to_update: Dict[Text, Any] - ) -> None: - """ - Updates the given lists (list_dict) with the provided values (values_to_update) - Calls the function update_a_list for every list - list_dict: A dictionary containing ids of the list which are to be updated and also their site IDs - values_to_update: a dictionary which will be used to update the fields of the lists - - Returns - None - """ - for list_entry in list_dict: - self.update_a_list( - site_id=list_entry["siteId"], - list_id=list_entry["id"], - values_to_update=values_to_update, - ) - - def update_a_list( - self, site_id: str, list_id: str, values_to_update: Dict[Text, Any] - ) -> None: - """ - Updates a list with given values - list_id: GUID of the list - site_id: GUID of the site in which the list is present - values_to_update: a dictionary values which will be used to update the properties of the list - - Returns - None - """ - url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/" - update_an_entity( - url=url, bearer_token=self.bearer_token, values_to_update=values_to_update - ) - - def create_lists(self, data: List[Dict[Text, Any]]) -> None: - """ - Creates lists with the information provided in the data parameter - calls create_a_list for each entry of list metadata dictionary - - data: parameter which contains information such as the site IDs where the lists would be created - and their metadata information which will be used to create them - - Returns - None - """ - for entry in data: - self.create_a_list( - site_id=entry["siteId"], - column=entry.get("column"), - display_name=entry["displayName"], - list_template=entry["list"], - ) - - def create_a_list( - self, site_id: str, list_template: str, display_name: str, column: str = None - ) -> None: - """ - Creates a list with metadata information provided in the params - - site_id: GUID of the site where the list is to be created - list_template: a string which contains the list template information (type of list) - eg.- "{'template': 'documentLibrary'}" - display_name: the display name of the given list, which will be displayed in the site - column: specifies the list of columns that should be created for the list - eg.- "[{'name': 'Author', 'text': { }},{'name': 'PageCount', 'number': { }}]" - - Returns - None - """ - url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/" - payload = {} - if column: - column = ast.literal_eval(column) - payload["column"] = column - payload["displayName"] = display_name - payload["list"] = ast.literal_eval(list_template) - create_an_entity(url=url, payload=payload, bearer_token=self.bearer_token) - - def get_site_columns_by_site( - self, site_id: str, limit: int = None - ) -> List[Dict[Text, Any]]: - """ - Gets columns' information corresponding to a site - - site_id: GUID of a site - limit: limits the number of columns for which information is returned - - Returns - response: metadata information/ fields corresponding to columns of the site - """ - url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/columns/" - response = get_an_entity(url=url, bearer_token=self.bearer_token) - if limit: - response = response[:limit] - return response - - def get_all_site_columns(self, limit: int = None) -> List[Dict[Text, Any]]: - """ - Gets all the columns' information associated with the account - - limit: puts a limit to the number of columns returned - - Returns - response: returns metadata information regarding all the columns that have been made using that account - """ - sites = self.get_all_sites() - site_columns = [] - for site in sites: - for site_column_dict in self.get_site_columns_by_site( - site_id=site["id"].split(",")[1] - ): - site_column_dict["siteName"] = site["name"] - site_column_dict["siteId"] = site["id"].split(",")[1] - site_columns.append(site_column_dict) - if limit: - site_columns = site_columns[:limit] - return site_columns - - def update_site_columns( - self, - site_column_dict: List[Dict[Text, Text]], - values_to_update: Dict[Text, Any], - ) -> None: - """ - Updates the given columns (site_column_dict) with the provided values (values_to_update) - Calls the function update_a_site_column for every column - - site_column_dict: A dictionary containing ids of the column which are to be updated and - also their site IDs - values_to_update: a dictionary which will be used to update the fields of the columns - - Returns - None - """ - for site_column_entry in site_column_dict: - self.update_a_site_column( - site_id=site_column_entry["siteId"], - column_id=site_column_entry["id"], - values_to_update=values_to_update, - ) - - def update_a_site_column( - self, site_id: str, column_id: str, values_to_update: Dict[Text, Any] - ): - """ - Updates a column with given values - - column_id: GUID of the column - site_id: GUID of the site in which the column is present - values_to_update: a dictionary values which will be used to update the properties of the column - - Returns - None - """ - url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/columns/{column_id}" - update_an_entity( - url=url, values_to_update=values_to_update, bearer_token=self.bearer_token - ) - - def delete_site_columns(self, column_dict: List[Dict[Text, Any]]) -> None: - """ - Deletes columns for the given site ID and column ID - - column_dict: a dictionary values containing the column IDs which are to be deleted and - their corresponding site IDs - - Returns - None - """ - for column_entry in column_dict: - self.delete_a_site_columns( - site_id=column_entry["siteId"], column_id=column_entry["id"] - ) - - def delete_a_site_columns(self, site_id: str, column_id: str) -> None: - """ - Deletes a column, given its column ID and its site ID - - site_id: GUID of the site in which the column is present - column_id: GUID of the column which is to be deleted - - Returns - None - """ - url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/columns/{column_id}" - delete_an_entity(url=url, bearer_token=self.bearer_token) - - def create_site_columns(self, data: List[Dict[Text, Any]]) -> None: - """ - Creates columns with the information provided in the data parameter - calls create_a_site_column for each entry of column metadata dictionary - - data: parameter which contains information such as the site IDs where the columns would be created - and their metadata information which will be used to create them - - Returns - None - """ - for entry in data: - self.create_a_site_column( - site_id=entry["siteId"], - enforce_unique_values=entry.get("enforceUniqueValues"), - hidden=entry.get("hidden"), - indexed=entry.get("indexed"), - name=entry["name"], - text=entry.get("text"), - ) - - def create_a_site_column( - self, - site_id: str, - enforce_unique_values: bool, - hidden: bool, - indexed: bool, - name: str, - text: str = None, - ) -> None: - """ - Creates a list with metadata information provided in the params - - site_id: GUID of the site where the column is to be created - enforced_unique_values: if true, no two list items may have the same value for this column - hidden: specifies whether the column is displayed in the user interface - name: the API-facing name of the column as it appears in the fields on a listItem. - text: details regarding the text values in the column - - Returns - None - """ - - url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/columns/" - payload = {} - if text: - text = ast.literal_eval(text) - payload["text"] = text - payload["name"] = name - if enforce_unique_values is not None: - payload["enforceUniqueValues"] = enforce_unique_values - if hidden is not None: - payload["hidden"] = hidden - if indexed is not None: - payload["indexed"] = indexed - create_an_entity(url=url, payload=payload, bearer_token=self.bearer_token) - - def get_items_by_sites_and_lists( - self, site_id: str, list_id: str, limit: int = None - ) -> List[Dict[Text, Any]]: - """ - Gets items' information corresponding to a site and a list - - site_id: GUID of a site - list_id: GUID of a list - limit: limits the number of columns for which information is returned - - Returns - response: metadata information/ fields corresponding to list-items of the site - """ - url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/items?expand=fields&select=*" - response = get_an_entity(url=url, bearer_token=self.bearer_token) - if limit: - response = response[:limit] - return response - - def get_all_items(self, limit: int = None) -> List[Dict[Text, Any]]: - """ - Gets all the items' information associated with the account - - limit: puts a limit to the number of items returned - - Returns - response: returns metadata information regarding all the items that are associated with that account - """ - sites = self.get_all_sites() - items = [] - for site in sites: - site_id = site["id"].split(",")[1] - for sharepoint_list in self.get_lists_by_site(site_id=site_id): - for item_dict in self.get_items_by_sites_and_lists( - site_id=site["id"].split(",")[1], list_id=sharepoint_list["id"] - ): - item_dict["siteName"] = site["name"] - item_dict["siteId"] = site["id"].split(",")[1] - item_dict["listId"] = sharepoint_list["id"] - item_dict["list_name"] = sharepoint_list["displayName"] - items.append(item_dict) - if limit: - items = items[:limit] - return items - - def update_items( - self, item_dict: List[Dict[Text, Text]], values_to_update: Dict[Text, Any] - ) -> None: - """ - Updates the given items (item_dict) with the provided values (values_to_update) - Calls the function update_a_item for every column - - item_dict: A dictionary containing ids of the list-items which are to be updated and - also their site IDs - values_to_update: a dictionary which will be used to update the fields of the items - - Returns - None - """ - for item_entry in item_dict: - self.update_an_item( - site_id=item_entry["siteId"], - list_id=item_entry["listId"], - item_id=item_entry["id"], - values_to_update=values_to_update, - ) - - def update_an_item( - self, - site_id: str, - list_id: str, - item_id: str, - values_to_update: Dict[Text, Any], - ): - """ - Updates an item with given values - - item_id: GUID of the column - list_id: GUID of the list - site_id: GUID of the site in which the list is present - values_to_update: a dictionary values which will be used to update the properties of the list-item - - Returns - None - """ - url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/items/{item_id}" - update_an_entity( - url=url, values_to_update=values_to_update, bearer_token=self.bearer_token - ) - - def delete_items(self, item_dict: List[Dict[Text, Any]]) -> None: - """ - Deletes items for the given site ID and list ID - - item_dict: a dictionary values containing the item IDs which are to be deleted, - their corresponding site IDs and their list IDs - - Returns - None - """ - for item_entry in item_dict: - self.delete_an_item( - site_id=item_entry["siteId"], - list_id=item_entry["listId"], - item_id=item_entry["id"], - ) - - def delete_an_item(self, site_id: str, list_id: str, item_id: str) -> None: - """ - Deletes an item, given its item ID, its site ID and its list ID - - list_id: GUID of the list in which the site is present - site_id: GUID of the site in which the list is present - item_id: GUID of the item which is to be deleted - - Returns - None - """ - url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/items/{item_id}" - delete_an_entity(url=url, bearer_token=self.bearer_token) - - def create_items(self, data: List[Dict[Text, Any]]) -> None: - """ - Creates items with the information provided in the data parameter - calls create_an_item for each entry of item metadata dictionary - - data: parameter which contains information such as the site IDs and list IDs where the items - would be created and their metadata information which will be used to create them - - Returns - None - """ - for entry in data: - self.create_an_item( - site_id=entry["siteId"], - list_id=entry["listId"], - fields=entry.get("fields"), - ) - - def create_an_item(self, site_id: str, list_id: str, fields: str) -> None: - """ - Creates an item with metadata information provided in the params - - site_id: GUID of the site where the list id present - list_id: GUID of the list where the item is to be created - fields: The values of the columns set on this list item. - - Returns - None - """ - url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/items/" - payload = {} - if fields: - payload["fields"] = ast.literal_eval(fields) - create_an_entity(url=url, payload=payload, bearer_token=self.bearer_token) diff --git a/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_handler.py b/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_handler.py deleted file mode 100644 index 1672cde8f3d..00000000000 --- a/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_handler.py +++ /dev/null @@ -1,147 +0,0 @@ -from mindsdb_sql_parser import parse_sql - -from mindsdb.integrations.handlers.sharepoint_handler.sharepoint_api import ( - SharepointAPI, -) -from mindsdb.integrations.handlers.sharepoint_handler.sharepoint_tables import ( - ListsTable, - SitesTable, - ListItemsTable, - SiteColumnsTable, -) -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - -from mindsdb.utilities import log -from collections import OrderedDict - -logger = log.getLogger(__name__) - - -class SharepointHandler(APIHandler): - """ - The Sharepoint handler implementation. - """ - - name = "sharepoint" - - def __init__(self, name: str, **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.kwargs = kwargs - - if not ( - self.connection_data["clientId"] - and ( - self.connection_data["tenantId"] - and self.connection_data["clientSecret"] - ) - ): - raise Exception( - "client params and tenant id is required for Sharepoint connection!" - ) - - self.connection = None - self.is_connected = False - self._client = None - lists_data = ListsTable(self) - self._register_table("lists", lists_data) - - sites_data = SitesTable(self) - self._register_table("sites", sites_data) - - site_columns_data = SiteColumnsTable(self) - self._register_table("siteColumns", site_columns_data) - - list_items_data = ListItemsTable(self) - self._register_table("listItems", list_items_data) - - def connect(self): - """ - Set up the context connection required by the handler. - Returns - ------- - StatusResponse - connection object - """ - if self.is_connected is True: - return self.connection - self.connection = SharepointAPI( - tenant_id=self.connection_data["tenantId"], - client_id=self.connection_data["clientId"], - client_secret=self.connection_data["clientSecret"], - ) - self.connection.get_bearer_token() - self.is_connected = True - return self.connection - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - - try: - connection = self.connect() - response.success = connection.check_bearer_token_validity() - except Exception as e: - logger.error("Error connecting to Sharepoint! " + str(e)) - response.error_message = str(e) - - self.is_connected = response.success - - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - Parameters - ---------- - query : str - query in a native format - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) - - -connection_args = OrderedDict( - clientId={ - "type": ARG_TYPE.STR, - "description": "Client Id of the App", - "required": True, - "label": "Client ID", - }, - clientSecret={ - "type": ARG_TYPE.PWD, - "description": "Client Secret of the App", - "required": True, - "label": "Client Secret", - }, - tenantId={ - "type": ARG_TYPE.STR, - "description": "Tenant Id of the tenant of the App", - "required": True, - "label": "Tenant ID", - }, -) - -connection_args_example = OrderedDict( - clientId="xxxx-xxxx-xxxx-xxxx", - clientSecret="", - tenantId="xxxx-xxxx-xxxx-xxxx", -) diff --git a/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_tables.py b/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_tables.py deleted file mode 100644 index 04e9a0804a5..00000000000 --- a/mindsdb/integrations/handlers/sharepoint_handler/sharepoint_tables.py +++ /dev/null @@ -1,581 +0,0 @@ -import pandas as pd -from typing import Text, List, Dict, Any - -from mindsdb_sql_parser import ast -from mindsdb.integrations.libs.api_handler import APITable - -from mindsdb.integrations.utilities.handlers.query_utilities.select_query_utilities import ( - SELECTQueryParser, - SELECTQueryExecutor, -) -from mindsdb.integrations.utilities.handlers.query_utilities.delete_query_utilities import ( - DELETEQueryParser, - DELETEQueryExecutor, -) - -from mindsdb.integrations.utilities.handlers.query_utilities.update_query_utilities import ( - UPDATEQueryParser, - UPDATEQueryExecutor, -) - -from mindsdb.integrations.utilities.handlers.query_utilities import INSERTQueryParser - - -class SitesTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - """ - Pulls Sharepoint Sites data. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Sharepoint Sites matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - select_statement_parser = SELECTQueryParser(query, "sites", self.get_columns()) - ( - selected_columns, - where_conditions, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - sites_df = pd.json_normalize(self.get_sites(limit=result_limit)) - select_statement_executor = SELECTQueryExecutor( - sites_df, selected_columns, where_conditions, order_by_conditions - ) - sites_df = select_statement_executor.execute_query() - - return sites_df - - def update(self, query: ast.Update) -> None: - """Updates data in the Sharepoint "PUT /lists" API endpoint. - - Parameters - ---------- - query : ast.Update - Given SQL UPDATE query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - update_statement_parser = UPDATEQueryParser(query) - values_to_update, where_conditions = update_statement_parser.parse_query() - - sites_df = pd.json_normalize(self.get_sites()) - - update_query_executor = UPDATEQueryExecutor(sites_df, where_conditions) - - sites_df = update_query_executor.execute_query() - - sites_ids = sites_df[["siteId"]].to_dict(orient="records") - - self.update_sites(sites_ids, values_to_update) - - def get_columns(self) -> List[Text]: - return pd.json_normalize(self.get_sites(limit=1)).columns.tolist() - - def get_sites(self, **kwargs) -> List[Dict]: - if not self.handler.connection.check_bearer_token_validity(): - self.handler.connect() - client = self.handler.connection - site_data = client.get_all_sites(**kwargs) - return site_data - - def update_sites(self, site_ids: List[dict], values_to_update: dict) -> None: - if not self.handler.connection.check_bearer_token_validity(): - self.handler.connect() - client = self.handler.connection - client.update_sites(site_ids, values_to_update) - - -class ListsTable(APITable): - def insert(self, query: ast.Insert) -> None: - """Inserts data into the Sharepoint "POST /lists" API endpoint. - - Parameters - ---------- - query : ast.Insert - Given SQL INSERT query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - insert_statement_parser = INSERTQueryParser( - query, - supported_columns=["displayName", "columns", "list", "siteId"], - mandatory_columns=["displayName", "list", "siteId"], - all_mandatory=False, - ) - lists_data = insert_statement_parser.parse_query() - self.create_lists(lists_data) - - def select(self, query: ast.Select) -> pd.DataFrame: - """ - Pulls Sharepoint lists data. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Sharepoint lists matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - select_statement_parser = SELECTQueryParser(query, "lists", self.get_columns()) - ( - selected_columns, - where_conditions, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - lists_df = pd.json_normalize(self.get_lists(limit=result_limit)) - select_statement_executor = SELECTQueryExecutor( - lists_df, selected_columns, where_conditions, order_by_conditions - ) - lists_df = select_statement_executor.execute_query() - - return lists_df - - def update(self, query: ast.Update) -> None: - """Updates data in the Sharepoint "PUT /lists" API endpoint. - - Parameters - ---------- - query : ast.Update - Given SQL UPDATE query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - update_statement_parser = UPDATEQueryParser(query) - values_to_update, where_conditions = update_statement_parser.parse_query() - - lists_df = pd.json_normalize(self.get_lists()) - - update_query_executor = UPDATEQueryExecutor(lists_df, where_conditions) - - lists_df = update_query_executor.execute_query() - - list_ids = lists_df[["id", "siteId"]].to_dict(orient="records") - - self.update_lists(list_ids, values_to_update) - - def delete(self, query: ast.Delete) -> None: - """ - Deletes data from the Sharepoint "DELETE /lists" API endpoint. - - Parameters - ---------- - query : ast.Delete - Given SQL DELETE query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - delete_statement_parser = DELETEQueryParser(query) - where_conditions = delete_statement_parser.parse_query() - - lists_df = pd.json_normalize(self.get_lists()) - - delete_query_executor = DELETEQueryExecutor(lists_df, where_conditions) - - lists_df = delete_query_executor.execute_query() - - list_ids = lists_df[["id", "siteId"]].to_dict(orient="records") - self.delete_lists(list_ids) - - def get_lists(self, **kwargs) -> List[Dict]: - if not self.handler.connection.check_bearer_token_validity(): - self.handler.connect() - client = self.handler.connection - lists_data = client.get_all_lists(**kwargs) - return lists_data - - def delete_lists(self, list_ids: List[dict]) -> None: - if not self.handler.connection.check_bearer_token_validity(): - self.handler.connect() - client = self.handler.connection - client.delete_lists(list_ids) - - def update_lists(self, list_ids: List[dict], values_to_update: dict) -> None: - if not self.handler.connection.check_bearer_token_validity(): - self.handler.connect() - client = self.handler.connection - client.update_lists(list_ids, values_to_update) - - def create_lists(self, lists_data: List[Dict[Text, Any]]) -> None: - if not self.handler.connection.check_bearer_token_validity(): - self.handler.connect() - client = self.handler.connection - client.create_lists(data=lists_data) - - def get_columns(self) -> List[Text]: - return pd.json_normalize(self.get_lists(limit=1)).columns.tolist() - - -class SiteColumnsTable(APITable): - def insert(self, query: ast.Insert) -> None: - """Inserts data into the Sharepoint "POST /columns" API endpoint. - - Parameters - ---------- - query : ast.Insert - Given SQL INSERT query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - insert_statement_parser = INSERTQueryParser( - query, - supported_columns=[ - "text", - "name", - "indexed", - "enforceUniqueValues", - "hidden", - "siteId", - ], - mandatory_columns=["name", "siteId"], - all_mandatory=False, - ) - site_columns_data = insert_statement_parser.parse_query() - self.create_site_columns(site_columns_data) - - def select(self, query: ast.Select) -> pd.DataFrame: - """ - Pulls Sharepoint columns data. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Sharepoint Columns matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - select_statement_parser = SELECTQueryParser( - query, "siteColumns", self.get_columns() - ) - ( - selected_columns, - where_conditions, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - site_columns_df = pd.json_normalize(self.get_site_columns(limit=result_limit)) - select_statement_executor = SELECTQueryExecutor( - site_columns_df, - selected_columns, - where_conditions, - order_by_conditions, - ) - site_columns_df = select_statement_executor.execute_query() - - return site_columns_df - - def update(self, query: ast.Update) -> None: - """Updates data in the Sharepoint "PUT /columns" API endpoint. - - Parameters - ---------- - query : ast.Update - Given SQL UPDATE query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - update_statement_parser = UPDATEQueryParser(query) - values_to_update, where_conditions = update_statement_parser.parse_query() - - site_columns_df = pd.json_normalize(self.get_site_columns()) - - update_query_executor = UPDATEQueryExecutor(site_columns_df, where_conditions) - - site_columns_df = update_query_executor.execute_query() - - site_columns_ids = site_columns_df[["id", "siteId"]].to_dict(orient="records") - - self.update_site_columns(site_columns_ids, values_to_update) - - def delete(self, query: ast.Delete) -> None: - """ - Deletes data from the Sharepoint "DELETE /columns" API endpoint. - - Parameters - ---------- - query : ast.Delete - Given SQL DELETE query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - delete_statement_parser = DELETEQueryParser(query) - where_conditions = delete_statement_parser.parse_query() - - site_columns_df = pd.json_normalize(self.get_site_columns()) - - delete_query_executor = DELETEQueryExecutor(site_columns_df, where_conditions) - - site_columns_df = delete_query_executor.execute_query() - - site_columns_ids = site_columns_df[["id", "siteId"]].to_dict(orient="records") - self.delete_site_columns(site_columns_ids) - - def get_site_columns(self, **kwargs) -> List[Dict]: - if not self.handler.connection.check_bearer_token_validity(): - self.handler.connect() - client = self.handler.connection - site_columns_data = client.get_all_site_columns(**kwargs) - return site_columns_data - - def delete_site_columns(self, sharepoint_column_ids: List[dict]) -> None: - if not self.handler.connection.check_bearer_token_validity(): - self.handler.connect() - client = self.handler.connection - client.delete_site_columns(sharepoint_column_ids) - - def update_site_columns( - self, sharepoint_column_ids: List[dict], values_to_update: dict - ) -> None: - if not self.handler.connection.check_bearer_token_validity(): - self.handler.connect() - client = self.handler.connection - client.update_site_columns(sharepoint_column_ids, values_to_update) - - def create_site_columns( - self, sharepoint_column_data: List[Dict[Text, Any]] - ) -> None: - if not self.handler.connection.check_bearer_token_validity(): - self.handler.connect() - client = self.handler.connection - client.create_site_columns(data=sharepoint_column_data) - - def get_columns(self) -> List[Text]: - return pd.json_normalize(self.get_site_columns(limit=1)).columns.tolist() - - -class ListItemsTable(APITable): - def insert(self, query: ast.Insert) -> None: - """Inserts data into the Sharepoint "POST /items" API endpoint. - - Parameters - ---------- - query : ast.Insert - Given SQL INSERT query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - insert_statement_parser = INSERTQueryParser( - query, - supported_columns=["fields", "listId", "siteId"], - mandatory_columns=["listId", "siteId"], - all_mandatory=False, - ) - list_items_data = insert_statement_parser.parse_query() - self.create_list_items(list_items_data) - - def select(self, query: ast.Select) -> pd.DataFrame: - """ - Pulls Sharepoint items data. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Sharepoint items matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - select_statement_parser = SELECTQueryParser( - query, "listItems", self.get_columns() - ) - ( - selected_columns, - where_conditions, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - list_items_df = pd.json_normalize(self.get_list_items(limit=result_limit)) - select_statement_executor = SELECTQueryExecutor( - list_items_df, selected_columns, where_conditions, order_by_conditions - ) - list_items_df = select_statement_executor.execute_query() - - return list_items_df - - def update(self, query: ast.Update) -> None: - """Updates data in the Sharepoint "PUT /items" API endpoint. - - Parameters - ---------- - query : ast.Update - Given SQL UPDATE query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - update_statement_parser = UPDATEQueryParser(query) - values_to_update, where_conditions = update_statement_parser.parse_query() - - list_items_df = pd.json_normalize(self.get_list_items()) - - update_query_executor = UPDATEQueryExecutor(list_items_df, where_conditions) - - list_items_df = update_query_executor.execute_query() - - list_items_ids = list_items_df[["id", "siteId", "listId"]].to_dict( - orient="records" - ) - - self.update_list_items(list_items_ids, values_to_update) - - def delete(self, query: ast.Delete) -> None: - """ - Deletes data from the Sharepoint "DELETE /items" API endpoint. - - Parameters - ---------- - query : ast.Delete - Given SQL DELETE query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - delete_statement_parser = DELETEQueryParser(query) - where_conditions = delete_statement_parser.parse_query() - - list_items_df = pd.json_normalize(self.get_list_items()) - - delete_query_executor = DELETEQueryExecutor(list_items_df, where_conditions) - - list_items_df = delete_query_executor.execute_query() - - list_items_ids = list_items_df[["id", "siteId", "listId"]].to_dict( - orient="records" - ) - self.delete_list_items(list_items_ids) - - def get_list_items(self, **kwargs) -> List[Dict]: - if not self.handler.connection.check_bearer_token_validity(): - self.handler.connect() - client = self.handler.connection - list_items_data = client.get_all_items(**kwargs) - return list_items_data - - def delete_list_items(self, list_item_ids: List[dict]) -> None: - if not self.handler.connnection.check_connection(): - self.handler.connect() - client = self.handler.connection - client.delete_items(list_item_ids) - - def update_list_items( - self, list_items_ids: List[dict], values_to_update: dict - ) -> None: - if not self.handler.connection.check_bearer_token_validity(): - self.handler.connect() - client = self.handler.connection - client.update_items(item_dict=list_items_ids, values_to_update=values_to_update) - - def create_list_items(self, list_items_data: List[Dict[Text, Any]]) -> None: - if not self.handler.connection.check_bearer_token_validity(): - self.handler.connect() - client = self.handler.connection - client.create_items(data=list_items_data) - - def get_columns(self) -> List[Text]: - return pd.json_normalize(self.get_list_items(limit=1)).columns.tolist() diff --git a/mindsdb/integrations/handlers/sharepoint_handler/tests/__init__.py b/mindsdb/integrations/handlers/sharepoint_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/sharepoint_handler/tests/test_sharepoint_handler.py b/mindsdb/integrations/handlers/sharepoint_handler/tests/test_sharepoint_handler.py deleted file mode 100644 index 4048e5fa08b..00000000000 --- a/mindsdb/integrations/handlers/sharepoint_handler/tests/test_sharepoint_handler.py +++ /dev/null @@ -1,53 +0,0 @@ -import os -import unittest -from mindsdb.integrations.handlers.sharepoint_handler import Handler as SharepointHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class SharepointHandlerTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "clientId": os.environ.get('CLIENT_ID'), - "clientSecret": os.environ.get('CLIENT_SECRET'), - "tenantId": os.environ.get('TENANT_ID'), - } - } - cls.handler = SharepointHandler('test_sharepoint_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_2_select_sites_query(self): - query = "SELECT * FROM test_sharepoint_handler.sites" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_3_select_lists_query(self): - query = "SELECT * FROM test_sharepoint_handler.lists" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_4_select_siteColumns_query(self): - query = "SELECT * FROM test_sharepoint_handler.siteColumns" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_select_listItems_query(self): - query = "SELECT * FROM test_sharepoint_handler.listItems" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_6_get_columns(self): - columns = self.handler.get_columns('test_sharepoint_handler.siteColumns') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/sharepoint_handler/utils.py b/mindsdb/integrations/handlers/sharepoint_handler/utils.py deleted file mode 100644 index 9187581bd49..00000000000 --- a/mindsdb/integrations/handlers/sharepoint_handler/utils.py +++ /dev/null @@ -1,163 +0,0 @@ -import json -from typing import Dict, List, Text, Any - -import requests -from requests import Response - - -def create_an_entity(url: str, payload: Dict[Text, Any], bearer_token: str) -> None: - """ - Makes a POST request to the given url - Creates an entity for the given url, bearer token and payload - - url: URL to which the get request is made - bearer_token: authentication token for the request - payload: a dictionary which provides the metadata information regarding the entity being created - - Returns - None - """ - payload = json.dumps(payload, indent=2) - headers = { - "Authorization": f"Bearer {bearer_token}", - "Content-Type": "application/json", - } - getresponse( - request_type="POST", url=url, headers=headers, payload=payload, files=[] - ) - - -def get_an_entity(url: str, bearer_token: str) -> Any: - """ - Makes a GET request to the given url - Gets the entity for the given url and bearer token - - url: URL to which the get request is made - bearer_token: authentication token for the request - - Returns - Dictionary or list of dictionaries containing information/metadata corresponding to the entities - """ - payload = {} - headers = { - "Authorization": f"Bearer {bearer_token}", - "Content-Type": "application/json", - } - response = getresponse( - request_type="GET", url=url, headers=headers, payload=payload, files=[] - ) - response = response.json()["value"] - return response - - -def update_an_entity( - url: str, values_to_update: Dict[Text, Any], bearer_token: str -) -> None: - """ - Makes a PATCH request to given url with the given values_to_update and bearer_token - updates the entity with the provided values - - url: url provided by the user - values_to_update: values that would be used to update the entity, would be passed in the payload - Mostly would be a dictionary mapping fields to values - bearer_token: authentication token passed in the header to make the request - - Returns - None - """ - payload = values_to_update - payload = json.dumps(payload, indent=2) - headers = { - "Authorization": f"Bearer {bearer_token}", - "Content-Type": "application/json", - } - getresponse( - request_type="PATCH", url=url, headers=headers, payload=payload, files=[] - ) - - -def delete_an_entity(url: str, bearer_token: str): - """ - Makes a DELETE request to the given url - - url: url string provided to which the request would be made - bearer_token: authorization token which will be used to execute the request - - Returns - None - """ - payload = {} - headers = { - "Authorization": f"Bearer {bearer_token}", - "Content-Type": "application/json", - } - getresponse( - request_type="DELETE", url=url, headers=headers, payload=payload, files=[] - ) - - -def bearer_token_request(tenant_id: str, client_id: str, client_secret: str) -> Any: - """ - Sends a request to login.microsoftonline.com for the given tenant to generate a bearer token - which is then used in making graph api call - - tenant_id: tenant ID is a globally unique identifier (GUID) for your organization - that is different from your organization or domain name - client_id: client ID is a globally unique identifier (GUID) for your app registered in Entra - client_secret: client secret is the password of the service principal or the app. - - Returns - response: Dictionary containing bearer token and other information - """ - url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token" - - payload = { - "client_id": client_id, - "client_secret": client_secret, - "redirect_uri": "http://localhost", - "grant_type": "client_credentials", - "resource": "https://graph.microsoft.com", - } - files = [] - headers = {} - - response = getresponse( - request_type="POST", url=url, headers=headers, payload=payload, files=files - ) - response = response.json() - return response - - -def getresponse( - url: str, - payload: Dict[Text, Any], - files: List[Any], - headers: Dict[Text, Any], - request_type: str, -) -> Response: - """ - Makes a standard HTTP request based on the params provided and returns the response. - May raise an error if the response code does not indicate success - - url: url string provided to which the request would be made - payload: the payload which supply additional info regarding the request - files: the files that are needed to be passed in the request - headers: additional information regarding the request - may also indicate the type of content passed in the payload - request_type: the request performs different actions based on the request type - DELETE/POST/PATCH/GET - - Returns - response: may return based on the response code - """ - response = requests.request( - request_type, url, headers=headers, data=payload, files=files - ) - status_code = response.status_code - - if 400 <= status_code <= 499: - raise Exception("Client error: " + response.text) - - if 500 <= status_code <= 599: - raise Exception("Server error: " + response.text) - return response diff --git a/mindsdb/integrations/handlers/sheets_handler/README.md b/mindsdb/integrations/handlers/sheets_handler/README.md deleted file mode 100644 index 39ec52944c7..00000000000 --- a/mindsdb/integrations/handlers/sheets_handler/README.md +++ /dev/null @@ -1,49 +0,0 @@ -# Google Sheets Handler - -This is the implementation of the Google Sheets handler for MindsDB. - -## Google Sheets -Google Sheets is a spreadsheet program included as part of the free, web-based Google Docs Editors suite offered by Google. -https://en.wikipedia.org/wiki/Google_Sheets - -## Implementation -This handler was implemented using `duckdb`, a library that allows SQL queries to be executed on `pandas` DataFrames. - -In essence, when querying a particular sheet, the entire sheet will first be pulled into a `pandas` DataFrame using the Google Visualization API. Once this is done, SQL queries can be run on the DataFrame using `duckdb`. - -Note: Since the entire sheet needs to be pulled into memory first (DataFrame), it is recommended to be somewhat careful when querying large datasets so as not to overload your machine. - -The documentation for the Google Visualization API is available here, -
-https://developers.google.com/chart/interactive/docs/reference - -The required arguments to establish a connection are, -* `spreadsheet_id`: the unique ID of the Google Sheet. -* `sheet_name`: the name of the sheet within the Google Sheet. - -## Usage -In order to make use of this handler and connect to a Google Sheet in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE sheets_datasource -WITH -engine='sheets', -parameters={ - "spreadsheet_id": "12wgS-1KJ9ymUM-6VYzQ0nJYGitONxay7cMKLnEE2_d0", - "sheet_name": "iris" -}; -~~~~ - -Now, you can use this established connection to query your table as follows, -~~~~sql -SELECT * FROM sheets_datasource.example_tbl -~~~~ - -and for columns that contain special characters as follows, - -~~~~sql -SELECT `column name` FROM sheets_datasource.example_tbl -~~~~ - -The name of the table will be the name of the relevant sheet, provided as an input to the `sheet_name` parameter. - -At the moment, only `SELECT` queries are allowed to be executed through `duckdb`. This, however, has no restriction on running machine learning algorithms against your data in Airtable using `CREATE PREDICTOR` statements. \ No newline at end of file diff --git a/mindsdb/integrations/handlers/sheets_handler/__about__.py b/mindsdb/integrations/handlers/sheets_handler/__about__.py deleted file mode 100644 index 10d096e69c9..00000000000 --- a/mindsdb/integrations/handlers/sheets_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Google Sheets handler' -__package_name__ = 'mindsdb_sheets_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Google Sheets" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/sheets_handler/__init__.py b/mindsdb/integrations/handlers/sheets_handler/__init__.py deleted file mode 100644 index 4532b9b5afa..00000000000 --- a/mindsdb/integrations/handlers/sheets_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .sheets_handler import SheetsHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Google Sheets' -name = 'sheets' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/sheets_handler/connection_args.py b/mindsdb/integrations/handlers/sheets_handler/connection_args.py deleted file mode 100644 index 186c0776f7c..00000000000 --- a/mindsdb/integrations/handlers/sheets_handler/connection_args.py +++ /dev/null @@ -1,20 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - spreadsheet_id={ - 'type': ARG_TYPE.STR, - 'description': 'The unique ID of the Google Sheet.' - }, - sheet_name={ - 'type': ARG_TYPE.STR, - 'description': 'The name of the sheet within the Google Sheet.' - } -) - -connection_args_example = OrderedDict( - spreadsheet_id='12wgS-1KJ9ymUM-6VYzQ0nJYGitONxay7cMKLnEE2_d0', - sheet_name='iris' -) diff --git a/mindsdb/integrations/handlers/sheets_handler/icon.svg b/mindsdb/integrations/handlers/sheets_handler/icon.svg deleted file mode 100644 index 8b4ba3c3c9b..00000000000 --- a/mindsdb/integrations/handlers/sheets_handler/icon.svg +++ /dev/null @@ -1,55 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/sheets_handler/sheets_handler.py b/mindsdb/integrations/handlers/sheets_handler/sheets_handler.py deleted file mode 100644 index 06494f9a367..00000000000 --- a/mindsdb/integrations/handlers/sheets_handler/sheets_handler.py +++ /dev/null @@ -1,441 +0,0 @@ -from typing import Optional - -import pandas as pd -from pandas.api import types as pd_types -import duckdb - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.utilities import log -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - - -logger = log.getLogger(__name__) - - -def _pandas_dtype_to_sql_type(dtype) -> str: - """Convert pandas dtype to SQL data type string. - - Args: - dtype: pandas dtype object - - Returns: - str: SQL data type string (VARCHAR, INTEGER, DECIMAL, etc.) - """ - # Handle string dtypes - if pd_types.is_string_dtype(dtype): - return "VARCHAR" - - # Handle integer dtypes - if pd_types.is_integer_dtype(dtype): - return "INTEGER" - - # Handle float/numeric dtypes - if pd_types.is_float_dtype(dtype) or pd_types.is_numeric_dtype(dtype): - return "DECIMAL" - - # Handle boolean dtypes - if pd_types.is_bool_dtype(dtype): - return "BOOLEAN" - - # Handle datetime dtypes - if pd_types.is_datetime64_any_dtype(dtype): - return "DATETIME" - - # Handle date dtypes - if pd_types.is_date_dtype(dtype): - return "DATE" - - # Default to VARCHAR for object and unknown types - return "VARCHAR" - - -def _infer_data_type(value) -> str: - """Infer SQL data type from Python value. - - Args: - value: Python value to infer type from - - Returns: - str: SQL data type string (VARCHAR, INTEGER, DECIMAL, etc.) - """ - if value is None: - return "VARCHAR" - elif isinstance(value, bool): - return "BOOLEAN" - elif isinstance(value, int): - return "INTEGER" - elif isinstance(value, float): - return "DECIMAL" - elif isinstance(value, str): - # Check if it looks like a timestamp - if "T" in value and ("Z" in value or "+" in value or "-" in value[-6:]): - return "TIMESTAMP" - # Check if it looks like a date - try: - pd.to_datetime(value) - if len(value) == 10: # Just date, no time - return "DATE" - return "DATETIME" - except (ValueError, TypeError): - pass - return "VARCHAR" - elif pd_types.is_datetime64_any_dtype(type(value)) or isinstance(value, pd.Timestamp): - return "DATETIME" - else: - return "VARCHAR" - - -def _infer_data_type_from_samples(values) -> str: - """Infer data type from multiple sample values for better accuracy. - - Args: - values: List of sample values from a column - - Returns: - str: SQL data type string - """ - non_null_values = [v for v in values if v is not None and pd.notna(v)] - - if not non_null_values: - return "VARCHAR" - - # Analyze types across all samples - type_counts = {} - for value in non_null_values: - inferred_type = _infer_data_type(value) - type_counts[inferred_type] = type_counts.get(inferred_type, 0) + 1 - - # Return the most common type - if type_counts: - return max(type_counts.items(), key=lambda x: x[1])[0] - - return "VARCHAR" - - -def _map_type(data_type: str) -> MYSQL_DATA_TYPE: - """Map SQL data types to MySQL types. - - Args: - data_type (str): The SQL data type name - - Returns: - MYSQL_DATA_TYPE: The corresponding MySQL data type - """ - if data_type is None: - return MYSQL_DATA_TYPE.VARCHAR - - data_type_upper = data_type.upper() - - type_map = { - "VARCHAR": MYSQL_DATA_TYPE.VARCHAR, - "TEXT": MYSQL_DATA_TYPE.TEXT, - "INTEGER": MYSQL_DATA_TYPE.INT, - "INT": MYSQL_DATA_TYPE.INT, - "BIGINT": MYSQL_DATA_TYPE.BIGINT, - "DECIMAL": MYSQL_DATA_TYPE.DECIMAL, - "FLOAT": MYSQL_DATA_TYPE.FLOAT, - "DOUBLE": MYSQL_DATA_TYPE.DOUBLE, - "BOOLEAN": MYSQL_DATA_TYPE.BOOL, - "BOOL": MYSQL_DATA_TYPE.BOOL, - "DATE": MYSQL_DATA_TYPE.DATE, - "DATETIME": MYSQL_DATA_TYPE.DATETIME, - "TIMESTAMP": MYSQL_DATA_TYPE.DATETIME, - "TIME": MYSQL_DATA_TYPE.TIME, - } - - return type_map.get(data_type_upper, MYSQL_DATA_TYPE.VARCHAR) - - -def _cast_column_to_type(series: pd.Series, sql_type: str) -> pd.Series: - """Cast a pandas Series to the appropriate type based on SQL data type. - - Args: - series: pandas Series to cast - sql_type: SQL data type string (VARCHAR, INTEGER, etc.) - - Returns: - pandas Series with appropriate dtype - """ - sql_type_upper = sql_type.upper() if sql_type else "VARCHAR" - - try: - if sql_type_upper in ("INTEGER", "INT", "BIGINT"): - # Try to convert to integer, handling NaN values - return pd.to_numeric(series, errors="coerce").astype("Int64") - elif sql_type_upper in ("DECIMAL", "FLOAT", "DOUBLE"): - # Convert to float - return pd.to_numeric(series, errors="coerce").astype("float64") - elif sql_type_upper in ("BOOLEAN", "BOOL"): - # Convert to boolean - return series.astype("boolean") - elif sql_type_upper in ("DATE", "DATETIME", "TIMESTAMP"): - # Convert to datetime - return pd.to_datetime(series, errors="coerce") - else: - # VARCHAR, TEXT, or unknown - keep as string - return series.astype("string") - except Exception as e: - logger.warning(f"Error casting column to {sql_type}: {e}, keeping original type") - return series - - -class SheetsHandler(DatabaseHandler): - """ - This handler handles connection and execution of queries against the Excel Sheet. - TODO: add authentication for private sheets - """ - - name = "sheets" - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.renderer = SqlalchemyRender("postgresql") - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - self._column_types_cache = None # Cache for column types - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - url = f"https://docs.google.com/spreadsheets/d/{self.connection_data['spreadsheet_id']}/gviz/tq?tqx=out:csv&sheet={self.connection_data['sheet_name']}" - try: - self.sheet = pd.read_csv(url, on_bad_lines="skip") - except pd.errors.EmptyDataError: - error_msg = ( - f"Google Sheet '{self.connection_data['sheet_name']}' " - f"(ID: {self.connection_data['spreadsheet_id']}) is empty or has no columns. " - f"Please ensure the sheet contains data." - ) - logger.error(error_msg) - raise ValueError(error_msg) - except Exception as e: - error_msg = ( - f"Error reading Google Sheet '{self.connection_data['sheet_name']}' " - f"(ID: {self.connection_data['spreadsheet_id']}): {str(e)}" - ) - logger.error(error_msg) - raise - - self.connection = duckdb.connect() - self.connection.register(self.connection_data["sheet_name"], self.sheet) - self.is_connected = True - # Clear column types cache when reconnecting - self._column_types_cache = None - - return self.connection - - def disconnect(self): - """ - Close any existing connections. - """ - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return self.is_connected - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f"Error connecting to the Google Sheet with ID {self.connection_data['spreadsheet_id']}, {e}!") - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - connection = self.connect() - try: - result_df = connection.execute(query).fetchdf() - if not result_df.empty: - # Get column types and cast result columns accordingly - column_types = self._get_column_types() - - # Cast each column to its inferred type - for column_name in result_df.columns: - if column_name in column_types: - sql_type = column_types[column_name] - logger.debug(f"Casting column '{column_name}' to type '{sql_type}'") - result_df[column_name] = _cast_column_to_type(result_df[column_name], sql_type) - - response = Response(RESPONSE_TYPE.TABLE, result_df) - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except Exception as e: - logger.error( - f"Error running query: {query} on the Google Sheet with ID {self.connection_data['spreadsheet_id']}!" - ) - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - query_str = self.renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - response = Response( - RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame([self.connection_data["sheet_name"]], columns=["table_name"]) - ) - - return response - - def _get_column_types(self) -> dict: - """Get or infer column types for the sheet. - - Returns: - dict: Mapping of column names to SQL data types - """ - # Return cached types if available - if self._column_types_cache is not None: - return self._column_types_cache - - # Ensure we're connected and have the sheet data - if not hasattr(self, "sheet") or self.sheet is None: - self.connect() - - # Sample 3 rows to infer data types from actual values - sample_size = min(3, len(self.sheet)) - sample_data = self.sheet.head(sample_size) if not self.sheet.empty else pd.DataFrame() - - logger.debug(f"Sampling {sample_size} rows for column type inference") - logger.debug(f"Sampled data:\n{sample_data.to_string()}") - logger.debug(f"Column names: {list(self.sheet.columns)}") - - # Infer data types from sample values for each column - column_types = {} - for column_name in self.sheet.columns: - if not sample_data.empty: - column_values = sample_data[column_name].tolist() - logger.debug(f"Column '{column_name}' sample values: {column_values}") - inferred_type = _infer_data_type_from_samples(column_values) - logger.debug(f"Column '{column_name}' inferred type: {inferred_type}") - column_types[column_name] = inferred_type - else: - # If no data, fall back to pandas dtype - dtype = self.sheet[column_name].dtype - inferred_type = _pandas_dtype_to_sql_type(dtype) if dtype is not None else "VARCHAR" - logger.debug( - f"Column '{column_name}' has no sample data, using pandas dtype '{dtype}' -> '{inferred_type}'" - ) - column_types[column_name] = inferred_type - - # Cache the types - self._column_types_cache = column_types - return column_types - - def get_columns(self, table_name: str) -> StatusResponse: - """ - Returns a list of entity columns in standard information_schema.columns format. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - # Ensure we're connected and have the sheet data - if not hasattr(self, "sheet") or self.sheet is None: - self.connect() - - # Validate that table_name matches the configured sheet_name - if table_name != self.connection_data.get("sheet_name"): - return Response( - RESPONSE_TYPE.ERROR, - error_message=f"Table '{table_name}' not found. Available table: {self.connection_data.get('sheet_name')}", - ) - - # Get column types - column_types = self._get_column_types() - sql_types = [column_types.get(col, "VARCHAR") for col in self.sheet.columns] - - # Transform to information_schema.columns format (all required fields) - columns_data = [] - for ordinal_position, (column_name, sql_type) in enumerate(zip(self.sheet.columns, sql_types), start=1): - columns_data.append( - { - "COLUMN_NAME": column_name, - "DATA_TYPE": sql_type, - "ORDINAL_POSITION": ordinal_position, - "COLUMN_DEFAULT": None, - "IS_NULLABLE": "YES", # Assume nullable since we can't determine from CSV - "CHARACTER_MAXIMUM_LENGTH": None, - "CHARACTER_OCTET_LENGTH": None, - "NUMERIC_PRECISION": None, - "NUMERIC_SCALE": None, - "DATETIME_PRECISION": None, - "CHARACTER_SET_NAME": None, - "COLLATION_NAME": None, - } - ) - - df = pd.DataFrame(columns_data) - result = Response(RESPONSE_TYPE.TABLE, data_frame=df) - result.to_columns_table_response(map_type_fn=_map_type) - - return result diff --git a/mindsdb/integrations/handlers/sheets_handler/tests/__init__.py b/mindsdb/integrations/handlers/sheets_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/sheets_handler/tests/test_sheets_handler.py b/mindsdb/integrations/handlers/sheets_handler/tests/test_sheets_handler.py deleted file mode 100644 index cacd2c98b56..00000000000 --- a/mindsdb/integrations/handlers/sheets_handler/tests/test_sheets_handler.py +++ /dev/null @@ -1,33 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.sheets_handler.sheets_handler import SheetsHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class SheetsHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "spreadsheet_id": "12wgS-1KJ9ymUM-6VYzQ0nJYGitONxay7cMKLnEE2_d0", - "sheet_name": "iris" - } - cls.handler = SheetsHandler('test_sheets_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM iris" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_get_columns(self): - columns = self.handler.get_columns() - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/shopify_handler/__init__.py b/mindsdb/integrations/handlers/shopify_handler/__init__.py index a3057dbc5f6..b0ec9e5f3a0 100644 --- a/mindsdb/integrations/handlers/shopify_handler/__init__.py +++ b/mindsdb/integrations/handlers/shopify_handler/__init__.py @@ -15,7 +15,7 @@ name = "shopify" type = HANDLER_TYPE.DATA icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY +support_level = HANDLER_SUPPORT_LEVEL.MINDSDB __all__ = [ "Handler", diff --git a/mindsdb/integrations/handlers/shopify_handler/shopify_handler.py b/mindsdb/integrations/handlers/shopify_handler/shopify_handler.py index 5e876751566..055deb1a0f7 100644 --- a/mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +++ b/mindsdb/integrations/handlers/shopify_handler/shopify_handler.py @@ -35,6 +35,8 @@ CompanyContactsTable, ) from mindsdb.integrations.libs.api_handler import MetaAPIHandler +from mindsdb.integrations.libs.passthrough import PassthroughMixin +from mindsdb.integrations.libs.passthrough_types import PassthroughRequest from mindsdb.integrations.libs.response import ( HandlerStatusResponse as StatusResponse, HandlerResponse as Response, @@ -51,13 +53,37 @@ logger = log.getLogger(__name__) -class ShopifyHandler(MetaAPIHandler): +class ShopifyHandler(MetaAPIHandler, PassthroughMixin): """ The Shopify handler implementation. """ name = "shopify" + # REST passthrough configuration. Shopify sends the Admin API token in + # `X-Shopify-Access-Token`, not `Authorization: Bearer`, so we override + # the default auth header. v1 requires the caller to pre-supply the + # access token in connection_data β€” the existing client_id/client_secret + # OAuth dance runs inside `connect()` and isn't surfaced to the mixin. + _bearer_token_arg = "access_token" + _auth_header_name = "X-Shopify-Access-Token" + _auth_header_format = "{token}" + _auth_mode = "custom" + _base_url_default = None + # Version-less path β€” Shopify redirects this to the current stable + # Admin API version, so the probe survives quarterly API releases. + _test_request = PassthroughRequest(method="GET", path="/admin/shop.json") + + def _build_base_url(self) -> str | None: + data = self._get_connection_data() + shop = data.get("shop_url") + if not shop: + return None + shop = str(shop) + if not shop.startswith(("http://", "https://")): + shop = f"https://{shop}" + return shop.rstrip("/") + def __init__(self, name: str, **kwargs): """ Initialize the handler. diff --git a/mindsdb/integrations/handlers/singlestore_handler/README.md b/mindsdb/integrations/handlers/singlestore_handler/README.md deleted file mode 100644 index 2507b121413..00000000000 --- a/mindsdb/integrations/handlers/singlestore_handler/README.md +++ /dev/null @@ -1,42 +0,0 @@ -## Implementation - -This handler is implemented by extending the MySQLHandler. - -The required arguments to establish a connection are as follows: - -* `user` is the database user. -* `password` is the database password. -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `database` is the database name. - -There are several optional arguments that can be used as well. - -* `ssl` is the `ssl` parameter value that indicates whether SSL is enabled (`True`) or disabled (`False`). -* `ssl_ca` is the SSL Certificate Authority. -* `ssl_cert` stores SSL certificates. -* `ssl_key` stores SSL keys. - -## Usage - -In order to make use of this handler and connect to the SingleStore database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE singlestore_datasource -WITH - ENGINE = 'singlestore', - PARAMETERS = { - "host": "127.0.0.1", - "port": 3306, - "database": "singlestore", - "user": "root", - "password": "password" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM singlestore_datasource.example_table; -``` diff --git a/mindsdb/integrations/handlers/singlestore_handler/__about__.py b/mindsdb/integrations/handlers/singlestore_handler/__about__.py deleted file mode 100644 index 5331cf6a310..00000000000 --- a/mindsdb/integrations/handlers/singlestore_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB SingleStore handler' -__package_name__ = 'mindsdb_singlestore_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for SingleStore" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/singlestore_handler/__init__.py b/mindsdb/integrations/handlers/singlestore_handler/__init__.py deleted file mode 100644 index 20eb42c98bb..00000000000 --- a/mindsdb/integrations/handlers/singlestore_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -try: - from .singlestore_handler import SingleStoreHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = 'SingleStore' -name = 'singlestore' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/singlestore_handler/icon.svg b/mindsdb/integrations/handlers/singlestore_handler/icon.svg deleted file mode 100644 index 288499760e8..00000000000 --- a/mindsdb/integrations/handlers/singlestore_handler/icon.svg +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/singlestore_handler/requirements.txt b/mindsdb/integrations/handlers/singlestore_handler/requirements.txt deleted file mode 100644 index ee467569031..00000000000 --- a/mindsdb/integrations/handlers/singlestore_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/mysql_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/singlestore_handler/singlestore_handler.py b/mindsdb/integrations/handlers/singlestore_handler/singlestore_handler.py deleted file mode 100644 index c1516194a48..00000000000 --- a/mindsdb/integrations/handlers/singlestore_handler/singlestore_handler.py +++ /dev/null @@ -1,13 +0,0 @@ -from mindsdb.integrations.handlers.mysql_handler import Handler as MySQLHandler -from .__about__ import __version__ as version - - -class SingleStoreHandler(MySQLHandler): - """ - This handler handles connection and execution of the SingleStore statements. - """ - name = 'singlestore' - - def __init__(self, name, **kwargs): - kwargs['conn_attrs'] = {'mindsdb', 'MindsDB', version} - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/singlestore_handler/tests/__init__.py b/mindsdb/integrations/handlers/singlestore_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/singlestore_handler/tests/test_singlestore_handler.py b/mindsdb/integrations/handlers/singlestore_handler/tests/test_singlestore_handler.py deleted file mode 100644 index a4ff05afdfd..00000000000 --- a/mindsdb/integrations/handlers/singlestore_handler/tests/test_singlestore_handler.py +++ /dev/null @@ -1,42 +0,0 @@ -import unittest - -from mindsdb.integrations.handlers.mysql_handler.mysql_handler import MySQLHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class MySQLHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": "localhost", - "port": "3306", - "user": "root", - "password": "", - "database": "test", - "ssl": False - } - cls.handler = MySQLHandler('test_singlestore_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.check_connection() - - def test_1_native_query_show_dbs(self): - dbs = self.handler.native_query("SHOW DATABASES;") - assert dbs['type'] is not RESPONSE_TYPE.ERROR - - def test_2_get_tables(self): - tbls = self.handler.get_tables() - assert tbls['type'] is not RESPONSE_TYPE.ERROR - - def test_5_drop_table(self): - res = self.handler.native_query("DROP TABLE IF EXISTS test_mdb") - assert res['type'] is not RESPONSE_TYPE.ERROR - - def test_4_create_table(self): - res = self.handler.native_query("CREATE TABLE IF NOT EXISTS test_mdb (test_col INT)") - assert res['type'] is not RESPONSE_TYPE.ERROR - - def test_7_select_query(self): - query = "SELECT * FROM test_mdb WHERE 'id'='a'" - result = self.handler.native_query(query) - assert result['type'] is RESPONSE_TYPE.TABLE diff --git a/mindsdb/integrations/handlers/slack_handler/README.md b/mindsdb/integrations/handlers/slack_handler/README.md deleted file mode 100644 index cd0c2e0bb04..00000000000 --- a/mindsdb/integrations/handlers/slack_handler/README.md +++ /dev/null @@ -1,268 +0,0 @@ ---- -title: Slack -sidebarTitle: Slack ---- - -This documentation describes the integration of MindsDB with [Slack](https://slack.com/), a cloud-based collaboration platform. -The integration allows MindsDB to access data from Slack and enhance Slack with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Slack to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Slack. - -## Connection - -Establish a connection to Slack from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/slack_handler) as an engine. - -```sql -CREATE DATABASE slack_datasource -WITH ENGINE = 'slack', -PARAMETERS = { - "token": "values", -- required parameter - "app_token": "values" -- optional parameter -}; -``` - -The Slack handler is initialized with the following parameters: - -* `token` is a Slack bot token to use for authentication. -* `app_token` is a Slack app token to use for authentication. - - -Please note that `app_token` is an optional parameter. Without providing it, you need to integrate an app into a Slack channel. - - -### Method 1: Chatbot responds in direct messages to a Slack app - -One way to connect Slack is to use both bot and app tokens. By following the instructions below, you'll set up the Slack app and be able to message this Slack app directly to chat with the bot. - - -If you want to use Slack in the [`CREATE CHATBOT`](/agents/chatbot) syntax, use this method of connecting Slack to MindsDB. - - - -Here is how to set up a Slack app and generate both a Slack bot token and a Slack app token: - - 1. Follow [this link](https://api.slack.com/apps) and sign in with your Slack account. - 2. Create a new app `From scratch` or select an existing app. - - Please note that the following instructions support apps created `From scratch`. - - For apps created `From an app manifest`, please follow the [Slack docs here](https://api.slack.com/reference/manifests). - 3. Go to *Basic Information* under *Settings*. - - Under *App-Level Tokens*, click on *Generate Token and Scopes*. - - Name the token `socket` and add the `connections:write` scope. - - **Copy and save the `xapp-...` token - you'll need it to publish the chatbot.** - 4. Go to *Socket Mode* under *Settings* and toggle the button to *Enable Socket Mode*. - 5. Go to *OAuth & Permissions* under *Features*. - - Add the following *Bot Token Scopes*: - - app_mentions:read - - channels:history - - channels:read - - chat:write - - groups:history - - groups:read (optional) - - im:history - - im:read - - im:write - - mpim:read (optional) - - users.profile:read - - users:read (optional) - - In the *OAuth Tokens for Your Workspace* section, click on *Install to Workspace* and then *Allow*. - - **Copy and save the `xoxb-...` token - you'll need it to publish the chatbot.** - 6. Go to *App Home* under *Features* and click on the checkbox to *Allow users to send Slash commands and messages from the messages tab*. - 7. Go to *Event Subscriptions* under *Features*. - - Toggle the button to *Enable Events*. - - Under *Subscribe to bot events*, click on *Add Bot User Event* and add `app_mention` and `message.im`. - - Click on *Save Changes*. - 8. Now you can use tokens from points 3 and 5 to initialize the Slack handler in MindsDB. - - - -This connection method enables you to chat directly with an app via Slack. - -Alternatively, you can connect an app to the Slack channel: - - Go to the channel where you want to use the bot. - - Right-click on the channel and select *View Channel Details*. - - Select *Integrations*. - - Click on *Add an App*. - - -Here is how to connect Slack to MindsDB: - -```sql -CREATE DATABASE slack_datasource -WITH - ENGINE = 'slack', - PARAMETERS = { - "token": "xoxb-...", - "app_token": "xapp-..." - }; -``` - -It comes with the `conversations` and `messages` tables. - -### Method 2: Chatbot responds on a defined Slack channel - -Another way to connect to Slack is to use the bot token only. By following the instructions below, you'll set up the Slack app and integrate it into one of the channels from which you can directly chat with the bot. - - -Here is how to set up a Slack app and generate a Slack bot token: - - 1. Follow [this link](https://api.slack.com/apps) and sign in with your Slack account. - 2. Create a new app `From scratch` or select an existing app. - - Please note that the following instructions support apps created `From scratch`. - - For apps created `From an app manifest`, please follow the [Slack docs here](https://api.slack.com/reference/manifests). - 3. Go to the *OAuth & Permissions* section. - 4. Under the *Scopes* section, add the *Bot Token Scopes* necessary for your application. You can add more later as well. - - channels:history - - channels:read - - chat:write - - groups:read - - im:read - - mpim:read - - users:read - 5. Install the bot in your workspace. - 6. Under the *OAuth Tokens for Your Workspace* section, copy the the *Bot User OAuth Token* value. - 7. Open your Slack application and add the App/Bot to one of the channels: - - Go to the channel where you want to use the bot. - - Right-click on the channel and select *View Channel Details*. - - Select *Integrations*. - - Click on *Add an App*. - 8. Now you can use the token from step 6 to initialize the Slack handler in MindsDB and use the channel name to query and write messages. - - -Here is how to connect Slack to MindsDB: - -```sql -CREATE DATABASE slack_datasource -WITH - ENGINE = 'slack', - PARAMETERS = { - "token": "xoxb-..." - }; -``` - -## Usage - - -The following usage applies when **Connection Method 2** was used to connect Slack. - -See the usage for **Connection Method 1** [via the `CREATE CHATBOT` syntax](/sql/tutorials/create-chatbot). - - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM slack_datasource.table_name -LIMIT 10; -``` - -## Supported Tables - -The Slack integration supports the following tables: - -### `conversations` Table - -The `conversations` virtual table is used to query conversations (channels, DMs, and groups) in the connected Slack workspace. - -```sql --- Retrieve all conversations in the workspace -SELECT * -FROM slack_datasource.conversations; - --- Retrieve a specific conversation using its ID -SELECT * -FROM slack_datasource.conversations -WHERE id = ""; - --- Retrieve a specific conversation using its name -SELECT * -FROM slack_datasource.conversations -WHERE name = ""; -``` - -### `messages` Table - -The `messages` virtual table is used to query, post, update, and delete messages in specific conversations within the connected Slack workspace. - -```sql --- Retrieve all messages from a specific conversation --- channel_id is a required parameter and can be found in the conversations table -SELECT * -FROM slack_datasource.messages -WHERE channel_id = ""; - --- Post a new message --- channel_id and text are required parameters -INSERT INTO slack_datasource.messages (channel_id, text) -VALUES("", "Hello from SQL!"); - --- Update a bot-posted message --- channel_id, ts, and text are required parameters -UPDATE slack_datasource.messages -SET text = "Updated message content" -WHERE channel_id = "" AND ts = ""; - --- Delete a bot-posted message --- channel_id and ts are required parameters -DELETE FROM slack_datasource.messages -WHERE channel_id = "" AND ts = ""; -``` - - -You can also find the channel ID by right-clicking on the conversation in Slack, selecting 'View conversation details' or 'View channel details,' and copying the channel ID from the bottom of the 'About' tab. - - -### `threads` Table - -The `threads` virtual table is used to query and post messages in threads within the connected Slack workspace. - -```sql --- Retrieve all messages in a specific thread --- channel_id and thread_ts are required parameters --- thread_ts is the timestamp of the parent message and can be found in the messages table -SELECT * -FROM slack_datasource.threads -WHERE channel_id = "" AND thread_ts = ""; - --- Post a message to a thread -INSERT INTO slack_datasource.threads (channel_id, thread_ts, text) -VALUES("", "", "Replying to the thread!"); -``` - -### `users` Table - -The `users` virtual table is used to query user information in the connected Slack workspace. - -```sql --- Retrieve all users in the workspace -SELECT * -FROM slack_datasource.users; - --- Retrieve a specific user by name -SELECT * -FROM slack_datasource.users -WHERE name = "John Doe"; -``` - -## Rate Limit Considerations - -The Slack API enforces rate limits on data retrieval. Therefore, when querying the above tables, by default, the first 1000 (999 for `messages`) records are returned. - -To retrieve more records, use the `LIMIT` clause in your SQL queries. For example: - -```sql -SELECT * -FROM slack_datasource.conversations -LIMIT 2000; -``` - -When using the LIMIT clause to query additional records, you may encounter Slack API rate limits. - -## Next Steps - -Follow [this tutorial](use-cases/ai_agents/build_ai_agents) to build an AI agent with MindsDB. diff --git a/mindsdb/integrations/handlers/slack_handler/__about__.py b/mindsdb/integrations/handlers/slack_handler/__about__.py deleted file mode 100644 index 2daf56c510f..00000000000 --- a/mindsdb/integrations/handlers/slack_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Slack Handler' -__package_name__ = 'mindsdb_slack_handler' -__version__ = '0.0.2' -__description__ = 'MindsDB handler for Slack' -__author__ = 'Tarun Chawla' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/slack_handler/__init__.py b/mindsdb/integrations/handlers/slack_handler/__init__.py deleted file mode 100644 index be1e820efb1..00000000000 --- a/mindsdb/integrations/handlers/slack_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description - -try: - from .slack_handler import SlackHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Slack" -name = "slack" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY - -__all__ = [ - "Handler", - "version", - "name", - "type", - "support_level", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/slack_handler/connection_args.py b/mindsdb/integrations/handlers/slack_handler/connection_args.py deleted file mode 100644 index 6e7f6bad93c..00000000000 --- a/mindsdb/integrations/handlers/slack_handler/connection_args.py +++ /dev/null @@ -1,26 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - token={ - 'type': ARG_TYPE.STR, - 'description': 'The bot token for the Slack app.', - 'secret': True, - 'required': True, - 'label': 'Token', - }, - app_token={ - 'type': ARG_TYPE.PWD, - 'description': 'The app token for the Slack app.', - 'secret': True, - 'required': False, - 'label': 'App Token' - } -) - -connection_args_example = OrderedDict( - token='xapp-A111-222-xyz', - app_token='xoxb-111-222-xyz' -) diff --git a/mindsdb/integrations/handlers/slack_handler/icon.svg b/mindsdb/integrations/handlers/slack_handler/icon.svg deleted file mode 100644 index 9803b60eef9..00000000000 --- a/mindsdb/integrations/handlers/slack_handler/icon.svg +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/slack_handler/requirements.txt b/mindsdb/integrations/handlers/slack_handler/requirements.txt deleted file mode 100644 index 826b5c2cd2d..00000000000 --- a/mindsdb/integrations/handlers/slack_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -slack_sdk==3.30.0 diff --git a/mindsdb/integrations/handlers/slack_handler/slack_handler.py b/mindsdb/integrations/handlers/slack_handler/slack_handler.py deleted file mode 100644 index 10dca335385..00000000000 --- a/mindsdb/integrations/handlers/slack_handler/slack_handler.py +++ /dev/null @@ -1,351 +0,0 @@ -from copy import deepcopy -import datetime as dt -import os -import threading -from typing import Any, Callable, Dict, List, Text - -import pandas as pd -from slack_sdk import WebClient -from slack_sdk.errors import SlackApiError -from slack_sdk.socket_mode import SocketModeClient -from slack_sdk.socket_mode.response import SocketModeResponse -from slack_sdk.socket_mode.request import SocketModeRequest - -from mindsdb.integrations.handlers.slack_handler.slack_tables import ( - SlackConversationsTable, - SlackMessagesTable, - SlackThreadsTable, - SlackUsersTable -) -from mindsdb.integrations.libs.api_handler import APIChatHandler, FuncParser -from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, - HandlerStatusResponse as StatusResponse, - RESPONSE_TYPE -) -from mindsdb.utilities.config import Config -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class SlackHandler(APIChatHandler): - """ - This handler handles the connection and execution of SQL statements on Slack. - Additionally, it allows the setup of a real-time connection to the Slack API using the Socket Mode for chat-bots. - """ - - def __init__(self, name: Text, connection_data: Dict, **kwargs: Any) -> None: - """ - Initializes the handler. - - Args: - name (Text): The name of the handler instance. - connection_data (Dict): The connection data required to connect to the SAP HANA database. - kwargs(Any): Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - # If the parameters are not provided, check the environment variables and the handler configuration. - handler_config = Config().get('slack_handler', {}) - - for key in ['token', 'app_token']: - if key not in self.connection_data: - if f'SLACK_{key.upper()}' in os.environ: - self.connection_data[key] = os.environ[f'SLACK_{key.upper()}'] - elif key in handler_config: - self.connection_data[key] = handler_config[key] - - self.web_connection = None - self._socket_connection = None - self.is_connected = False - - self._register_table('conversations', SlackConversationsTable(self)) - self._register_table('messages', SlackMessagesTable(self)) - self._register_table('threads', SlackThreadsTable(self)) - self._register_table('users', SlackUsersTable(self)) - - def connect(self) -> WebClient: - """ - Establishes a connection to the Slack API using the WebClient. - - Returns: - WebClient: The WebClient object to interact with the Slack API. - """ - if self.is_connected is True: - return self.web_connection - - # Check if the mandatory connection parameter (token) is available. - if 'token' not in self.connection_data: - raise ValueError('Required parameter (token) must be provided.') - - try: - self.web_connection = WebClient(token=self.connection_data['token']) - self.is_connected = True - return self.web_connection - except Exception as unknown_error: - logger.error(f'Unknown error connecting to Slack API: {unknown_error}') - raise - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the Slack API, both for the WebClient and the Socket Mode. - - Raises: - SlackApiError: If an error occurs while connecting to the Slack API. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - - try: - web_connection = self.connect() - # Check the status of the web connection. - web_connection.auth_test() - - # Check the status of the socket connection if the app_token is provided. - if 'app_token' in self.connection_data: - _socket_connection = SocketModeClient( - app_token=self.connection_data['app_token'], - web_client=web_connection - ) - _socket_connection.connect() - _socket_connection.disconnect() - - response.success = True - except (SlackApiError, ValueError) as known_error: - logger.error(f'Connection check to the Slack API failed, {known_error}!') - response.error_message = str(known_error) - except Exception as unknown_error: - logger.error(f'Connection check to the Slack API failed due to an unknown error, {unknown_error}!') - response.error_message = str(unknown_error) - - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: Text = None) -> Response: - """ - Executes native Slack SDK methods as specified in the query string. - - Args: - query: The query string containing the method name and parameters. - - Returns: - Response: A response object containing the result of the query. - """ - method_name, params = FuncParser().from_string(query) - - df = self._call_slack_api(method_name, params) - - return Response( - RESPONSE_TYPE.TABLE, - data_frame=df - ) - - def _call_slack_api(self, method_name: Text = None, params: Dict = None) -> List[Dict]: - """ - Calls the Slack SDK method with the specified method name and parameters. - - Args: - method_name (Text): The name of the method to call. - params (Dict): The parameters to pass to the method. - - Raises: - SlackApiError: If an error occurs while calling the Slack SDK method - - Returns: - List[Dict]: The result from running the Slack SDK method. - """ - web_connection = self.connect() - method = getattr(web_connection, method_name) - - items = [] - try: - response = method(**params) - response_data = deepcopy(response.data) - - # Get only the data items from the response data. - items.extend(self._extract_data_from_response(response_data)) - - # If the response contains a cursor, fetch the next page of results. - if 'response_metadata' in response and 'next_cursor' in response['response_metadata']: - while response['response_metadata']['next_cursor']: - response = method(cursor=response['response_metadata']['next_cursor'], **params) - response_data = deepcopy(response.data) - - # Get only the data items from the response data. - items.extend(self._extract_data_from_response(response_data)) - except SlackApiError as slack_error: - error = f"Error calling method '{method_name}' with params '{params}': {slack_error.response['error']}" - logger.error(error) - raise - - if items: - df = pd.DataFrame(items) - - return df - - def _extract_data_from_response(self, response_data: Dict) -> List[Dict]: - """ - Extracts the data items from the response object. - - Args: - response_data (Dict): The response object containing the data items. - - Raises: - ValueError: If the response data could not be parsed. - - Returns: - List[Dict]: The data items extracted from the response object. - """ - # Remove the metadata from the response. - for key in ['ok', 'response_metadata', 'cache_ts', 'latest', 'pin_count', 'has_more']: - if key in response_data: - response_data.pop(key) - - # If the response contains only one key, return the value of that key as a list. - if len(response_data) == 1: - key = list(response_data.keys())[0] - if isinstance(response_data[key], list): - return response_data[key] - - else: - return [response_data[key]] - - # Otherwise, raise an error. - raise ValueError('Response data could not be parsed.') - - def get_chat_config(self) -> Dict: - """ - Returns the chat configuration for the Slack handler. - - Returns: - Dict: The chat configuration. - """ - return { - 'polling': { - 'type': 'realtime', - }, - 'memory': { - 'type': 'handler', - }, - 'tables': [ - { - 'chat_table': { - 'name': 'messages', - 'chat_id_col': 'channel_id', - 'username_col': 'user', - 'text_col': 'text', - 'time_col': 'created_at', - } - }, - { - 'chat_table': { - 'name': 'threads', - 'chat_id_col': ['channel_id', 'thread_ts'], - 'username_col': 'user', - 'text_col': 'text', - 'time_col': 'thread_ts', - } - } - ] - } - - def get_my_user_name(self) -> Text: - """ - Gets the name of the bot user. - - Returns: - Text: The name of the bot user. - """ - web_connection = self.connect() - user_info = web_connection.auth_test().data - return user_info['bot_id'] - - def subscribe(self, stop_event: threading.Event, callback: Callable, table_name: Text = 'messages', - columns: List = None, **kwargs: Any) -> None: - """ - Subscribes to the Slack API using the Socket Mode for real-time responses to messages. - - Args: - stop_event (threading.Event): The event to stop the subscription. - callback (Callable): The callback function to process the messages. - table_name (Text): The name of the table to subscribe to. - kwargs: Arbitrary keyword arguments. - """ - if table_name not in ['messages', 'threads']: - raise RuntimeError(f'Table {table_name} is not supported for subscription.') - - # Raise an error if columns are provided. - # Since Slack subscriptions depend on events and not changes to the virtual tables, columns are not supported. - if columns: - raise RuntimeError('Columns are not supported for Slack subscriptions.') - - self._socket_connection = SocketModeClient( - # This app-level token will be used only for establishing a connection. - app_token=self.connection_data['app_token'], # xapp-A111-222-xyz - # The WebClient for performing Web API calls in listeners. - web_client=WebClient(token=self.connection_data['token']), # xoxb-111-222-xyz - ) - - def _process_websocket_message(client: SocketModeClient, request: SocketModeRequest) -> None: - """ - Pre-processes the incoming WebSocket message from the Slack API and calls the callback function to process the message. - - Args: - client (SocketModeClient): The client object to send the response. - request (SocketModeRequest): The request object containing the payload. - """ - # Acknowledge the request. - response = SocketModeResponse(envelope_id=request.envelope_id) - client.send_socket_mode_response(response) - - # Ignore requests that are not events. - if request.type != 'events_api': - return - - # Ignore duplicate requests. - if request.retry_attempt is not None and request.retry_attempt > 0: - return - - payload_event = request.payload['event'] - # Avoid responding to events other than direct messages and app mentions. - if payload_event['type'] not in ('message', 'app_mention'): - return - - # Avoid responding to unrelated events like message_changed, message_deleted, etc. - if 'subtype' in payload_event: - return - - # Avoid responding to messages from the bot. - if 'bot_id' in payload_event: - return - - key = { - 'channel_id': payload_event['channel'], - } - - row = { - 'text': payload_event['text'], - 'user': payload_event['user'], - 'created_at': dt.datetime.fromtimestamp(float(payload_event['ts'])).strftime('%Y-%m-%d %H:%M:%S') - } - - # Add thread_ts to the key and row if it is a thread message. This is used to identify threads. - # This message should be handled via the threads table. - if 'thread_ts' in payload_event: - key['thread_ts'] = payload_event['thread_ts'] - - callback(row, key) - - self._socket_connection.socket_mode_request_listeners.append(_process_websocket_message) - self._socket_connection.connect() - - stop_event.wait() - - self._socket_connection.close() diff --git a/mindsdb/integrations/handlers/slack_handler/slack_tables.py b/mindsdb/integrations/handlers/slack_handler/slack_tables.py deleted file mode 100644 index 1a88ab0eda3..00000000000 --- a/mindsdb/integrations/handlers/slack_handler/slack_tables.py +++ /dev/null @@ -1,718 +0,0 @@ -import datetime as dt -from typing import Any, Dict, List, Text - -from mindsdb_sql_parser.ast import Delete, Insert, Update -import pandas as pd -from slack_sdk.errors import SlackApiError - -from mindsdb.integrations.libs.api_handler import APIResource -from mindsdb.integrations.utilities.sql_utils import ( - extract_comparison_conditions, - FilterCondition, - FilterOperator, - SortColumn, -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class SlackConversationsTable(APIResource): - """ - This is the table abstraction for interacting with conversations via the Slack API. - """ - - def list(self, conditions: List[FilterCondition] = None, limit: int = None, **kwargs: Any) -> pd.DataFrame: - """ - Retrieves a list of Slack conversations based on the specified conditions. - If no channel ID(s) are provided, all channels are retrieved as follows: - - If the provided limit is greater than 1000, no limit to the API call is provided and the results are paginated until the limit is reached. - - Otherwise, the provided limit or a default limit of 1000 is used when making the API call. - - Therefore, if a user is to retrieve more than 1000 channels, the limit should be set to a value greater than 1000. - - The above is designed to prevent rate limiting by the Slack API. - - Args: - conditions (List[FilterCondition]): The conditions to filter the conversations. - limit (int): The limit of the conversations to return. - kwargs(Any): Arbitrary keyword arguments. - - Raises: - ValueError: - - If an unsupported operator is used for the column 'id'. - - If the channel ID(s) provided are not found. - SlackApiError: If an error occurs when getting the channels from the Slack API. - - Returns: - pd.DataFrame: The list of conversations. - """ - channels = [] - for condition in conditions: - value = condition.value - op = condition.op - - # Handle the column 'id'. - if condition.column == "id": - if op not in [FilterOperator.EQUAL, FilterOperator.IN]: - raise ValueError(f"Unsupported operator '{op}' for column 'id'") - - if op == FilterOperator.EQUAL: - try: - channels = [self.get_channel(value)] - condition.applied = True - except ValueError: - raise - - if op == FilterOperator.IN: - try: - channels = self._get_channels(value if isinstance(value, list) else [value]) - condition.applied = True - except ValueError: - raise - - # If no channel ID(s) are provided, get all channels with the specified limit. - if not channels: - channels = self._get_all_channels(limit) - - for channel in channels: - channel["created_at"] = dt.datetime.fromtimestamp(channel["created"]) - channel["updated_at"] = dt.datetime.fromtimestamp(channel["updated"] / 1000) - - return pd.DataFrame(channels, columns=self.get_columns()) - - def get_channel(self, channel_id: Text) -> Dict: - """ - Gets the channel data for the specified channel id. - - Args: - channel_id (Text): The channel id. - - Raises: - ValueError: If the channel ID is not found. - - Returns: - Dict: The channel data. - """ - client = self.handler.connect() - - try: - response = client.conversations_info(channel=channel_id) - except SlackApiError as slack_error: - logger.error(f"Error getting channel '{channel_id}': {slack_error.response['error']}") - raise ValueError(f"Channel '{channel_id}' not found") - - return response["channel"] - - def _get_channels(self, channel_ids: List[Text]) -> List[Dict]: - """ - Gets the channel data for multiple channel ids. - As it is unlikely that a large number of channels will be provided, the API rate limits are ignored here. - - Args: - channel_ids (List[Text]): The list of channel ids. - - Raises: - ValueError: If a channel ID is not found. - - Returns: - List[Dict]: The list of channel data. - """ - channels = [] - for channel_id in channel_ids: - channel = self.get_channel(channel_id) - channels.append(channel) - - return channels - - def _get_all_channels(self, limit: int = None) -> List[Dict]: - """ - Gets the list of channels with a limit. - If the provided limit is greater than 1000, no limit to the API call is provided and the results are paginated until the limit is reached. - Otherwise, the provided limit or a default limit of 1000 is used when making the API call. - - Args: - limit (int): The limit of channels to return. - - Raises: - SlackApiError: If an error occurs when getting the channels from the Slack API. - - Returns: - List[Dict]: The list of channels. - """ - client = self.handler.connect() - - try: - # If the limit is greater than 1000, paginate the results until the limit is reached. - if limit and limit > 1000: - response = client.conversations_list() - channels = response["channels"] - - # Paginate the results until the limit is reached. - while response["response_metadata"]["next_cursor"]: - response = client.conversations_list(cursor=response["response_metadata"]["next_cursor"]) - channels.extend(response["channels"]) - if len(channels) >= limit: - break - - channels = channels[:limit] - # Otherwise, use the provided limit or a default limit of 1000. - else: - response = client.conversations_list(limit=limit if limit else 1000) - channels = response["channels"] - except SlackApiError as slack_error: - logger.error(f"Error getting channels: {slack_error.response['error']}") - raise - - return channels - - def get_columns(self) -> List[str]: - """ - Retrieves the attributes (columns) of the Slack conversations. - - Returns: - List[str]: The list of columns. - """ - return [ - "id", - "name", - "is_channel", - "is_group", - "is_im", - "is_mpim", - "is_private", - "is_archived", - "is_general", - "is_shared", - "is_ext_shared", - "is_org_shared", - "creator", - "created_at", - "updated_at", - ] - - -class SlackMessagesTable(APIResource): - """ - This is the table abstraction for interacting with messages via the Slack API. - """ - - def list( - self, conditions: List[FilterCondition] = None, limit: int = None, sort: List[SortColumn] = None, **kwargs: Any - ) -> pd.DataFrame: - """ - Retrieves a list of messages from Slack conversations based on the specified conditions. - - `channel_id` is a required parameter to retrieve messages from a conversation. - - Messages are retrieved as follows for a given conversation: - - If the provided limit is greater than 999, no limit to the API call is provided and the results are paginated until the limit is reached. - - Otherwise, the provided limit or a default limit of 999 is used when making the API call. - - Therefore, if a user is to retrieve more than 999 messages, the limit should be set to a value greater than 999. - The above is dependent on the other parameters provided in the conditions. - - The above is designed to prevent rate limiting by the Slack API. - - Args: - conditions (List[FilterCondition]): The conditions to filter the messages. - limit (int): The limit of the messages to return. - sort (List[SortColumn]): The columns to sort the messages by. - kwargs (Any): Arbitrary keyword arguments. - - Raises: - ValueError: - - If the 'channel_id' parameter is not provided. - - If an unsupported operator is used for the column 'channel_id'. - - If the channel ID provided is not found. - SlackApiError: If an error occurs when getting the messages from the Slack API. - - Returns: - pd.DataFrame: The list of messages. - """ - client = self.handler.connect() - - # Build the parameters for the call to the Slack API. - params = {} - for condition in conditions: - value = condition.value - op = condition.op - - # Handle the column 'channel_id'. - if condition.column == "channel_id": - if op != FilterOperator.EQUAL: - raise ValueError(f"Unsupported operator '{op}' for column 'channel_id'") - - # Check if the provided channel exists. - try: - channel = SlackConversationsTable(self.handler).get_channel(value) - params["channel"] = value - condition.applied = True - except SlackApiError: - raise ValueError(f"Channel '{value}' not found") - - # Handle the column 'created_at'. - elif condition.column == "created_at" and value is not None: - date = dt.datetime.fromisoformat(value).replace(tzinfo=dt.timezone.utc) - if op == FilterOperator.GREATER_THAN: - params["oldest"] = date.timestamp() + 1 - elif op == FilterOperator.GREATER_THAN_OR_EQUAL: - params["oldest"] = date.timestamp() - elif op == FilterOperator.LESS_THAN_OR_EQUAL: - params["latest"] = date.timestamp() - else: - continue - condition.applied = True - - if "channel" not in params: - raise ValueError("To retrieve data from Slack, you need to provide the 'channel_id' parameter.") - - # Retrieve the messages from the Slack API. - try: - # If the limit is greater than 999, paginate the results until the limit is reached. - if limit and limit > 999: - params["limit"] = 999 - response = client.conversations_history(**params) - messages = response["messages"] - - # Paginate the results until the limit is reached. response_metadata may be None. - while response.get("response_metadata", {}).get("next_cursor"): - response = client.conversations_history( - cursor=response["response_metadata"]["next_cursor"], **params - ) - messages.extend(response["messages"]) - if len(messages) >= limit: - break - - messages = messages[:limit] - # Otherwise, use the provided limit or a default limit of 999. - else: - params["limit"] = limit if limit else 999 - response = client.conversations_history(**params) - messages = response["messages"] - except SlackApiError as slack_error: - logger.error(f"Error getting messages: {slack_error.response['error']}") - raise - - result = pd.DataFrame(messages, columns=self.get_columns()) - - result = result[result["text"].notnull()] - - # Add the channel ID and name to the result. - result["channel_id"] = params["channel"] - result["channel_name"] = channel["name"] if "name" in channel else None - - # Translate the time stamp into a 'created_at' field. - result["created_at"] = pd.to_datetime(result["ts"].astype(float), unit="s").dt.strftime("%Y-%m-%d %H:%M:%S") - - # Sort the messages by the specified columns. - if sort: - result.sort_values(by=[col.column for col in sort], ascending=[col.ascending for col in sort], inplace=True) - - return result - - def insert(self, query: Insert): - """ - Executes an INSERT SQL query represented by an ASTNode object and posts a message to a Slack channel. - - Args: - query (Insert): An ASTNode object representing the SQL query to be executed. - - Raises: - ValueError: If the 'channel_id' or 'text' parameters are not provided. - SlackApiError: If an error occurs when posting the message to the Slack channel. - """ - client = self.handler.connect() - - # Get column names and values from the query. - columns = [col.name for col in query.columns] - for row in query.values: - params = dict(zip(columns, row)) - - # Check if required parameters are provided. - if "channel_id" not in params or "text" not in params: - raise ValueError( - "To insert data into Slack, you need to provide the 'channel_id' and 'text' parameters." - ) - - try: - client.chat_postMessage(channel=params["channel_id"], text=params["text"]) - except SlackApiError as slack_error: - logger.error( - f"Error posting message to Slack channel '{params['channel_id']}': {slack_error.response['error']}" - ) - raise - - def update(self, query: Update): - """ - Executes an UPDATE SQL query represented by an ASTNode object and updates a message in a Slack channel. - - Args: - query (Update): An ASTNode object representing the SQL query to be executed. - - Raises: - ValueError: - - If the 'channel_id', 'ts', or 'text' parameters are not provided. - - If an unsupported operator is used for the columns. - - If an unsupported column is used. - - If the channel ID provided is not found. - SlackApiError: If an error occurs when updating the message in the Slack channel. - """ - client = self.handler.connect() - - conditions = extract_comparison_conditions(query.where) - - # Build the parameters for the call to the Slack API. - params = {} - # Extract the parameters from the conditions. - for op, arg1, arg2 in conditions: - # Handle the column 'channel_id'. - if arg1 == "channel_id": - # Check if the provided channel exists. - try: - SlackConversationsTable(self.handler).get_channel(arg2) - params["channel"] = arg2 - except SlackApiError as slack_error: - logger.error(f"Error getting channel '{arg2}': {slack_error.response['error']}") - raise ValueError(f"Channel '{arg2}' not found") - - # Handle the column'ts'. - elif arg1 == "ts": - if op == "=": - params[arg1] = str(arg2) - else: - raise ValueError(f"Unsupported operator '{op}' for column '{arg1}'") - - else: - raise ValueError(f"Unsupported column '{arg1}'") - - # Extract the update columns and values. - for col, val in query.update_columns.items(): - if col == "text": - params[col] = str(val).strip("'") - else: - raise ValueError(f"Unsupported column '{col}'") - - # Check if required parameters are provided. - if "channel" not in params or "ts" not in params or "text" not in params: - raise ValueError( - "To update a message in Slack, you need to provide the 'channel', 'ts', and 'text' parameters." - ) - - try: - client.chat_update(channel=params["channel"], ts=params["ts"], text=params["text"].strip()) - except SlackApiError as slack_error: - logger.error( - f"Error updating message in Slack channel '{params['channel']}' with timestamp '{params['ts']}': {slack_error.response['error']}" - ) - raise - - def delete(self, query: Delete): - """ - Executes a DELETE SQL query represented by an ASTNode object and deletes a message from a Slack channel. - - Args: - query (Delete): An ASTNode object representing the SQL query to be executed. - - Raises: - ValueError: - - If the 'channel_id' or 'ts' parameters are not provided. - - If an unsupported operator is used for the columns. - - If an unsupported column is used. - - If the channel ID provided is not found. - SlackApiError: If an error occurs when deleting the message from the Slack channel. - """ - client = self.handler.connect() - - conditions = extract_comparison_conditions(query.where) - - # Build the parameters for the call to the Slack API. - params = {} - for op, arg1, arg2 in conditions: - # Handle the column 'channel_id'. - if arg1 == "channel_id": - # Check if the provided channel exists. - try: - SlackConversationsTable(self.handler).get_channel(arg2) - params["channel"] = arg2 - except SlackApiError as slack_error: - logger.error(f"Error getting channel '{arg2}': {slack_error.response['error']}") - raise ValueError(f"Channel '{arg2}' not found") - - # Handle the columns 'ts'. - elif arg1 == "ts": - if op == "=": - params["ts"] = float(arg2) - else: - raise NotImplementedError(f"Unknown op: {op}") - - else: - raise ValueError(f"Unsupported column '{arg1}'") - - # Check if required parameters are provided. - if "channel" not in params or "ts" not in params: - raise ValueError("To delete a message from Slack, you need to provide the 'channel' and 'ts' parameters.") - - try: - client.chat_delete(channel=params["channel"], ts=params["ts"]) - - except SlackApiError as slack_error: - logger.error( - f"Error deleting message in Slack channel '{params['channel']}' with timestamp '{params['ts']}': {slack_error.response['error']}" - ) - raise - - def get_columns(self) -> List[Text]: - """ - Retrieves the attributes (columns) of the Slack messages. - - Returns: - List[str]: The list of columns. - """ - return [ - "channel_id", - "channel", - "client_msg_id", - "type", - "subtype", - "ts", - "created_at", - "user", - "text", - "attachments", - "files", - "reactions", - "thread_ts", - "reply_count", - "reply_users_count", - "latest_reply", - "reply_users", - ] - - -class SlackThreadsTable(APIResource): - """ - This is the table abstraction for interacting with threads in Slack conversations. - """ - - def list( - self, conditions: List[FilterCondition] = None, limit: int = None, sort: List[SortColumn] = None, **kwargs: Any - ) -> pd.DataFrame: - """ - Retrieves a list of messages in a thread based on the specified conditions. - - `channel_id` and `thread_ts` are required parameters to retrieve messages from a thread. - - Messages are retrieved as follows for a given thread: - - If the provided limit is greater than 1000, no limit to the API call is provided and the results are paginated until the limit is reached. - - Otherwise, the provided limit or a default limit of 1000 is used when making the API call. - - Therefore, if a user is to retrieve more than 1000 messages, the limit should be set to a value greater than 1000. - - The above is designed to prevent rate limiting by the Slack API. - - Args: - conditions (List[FilterCondition]): The conditions to filter the messages. - limit (int): The limit of the messages to return. - sort (List[SortColumn]): The columns to sort the messages by. - kwargs (Any): Arbitrary keyword arguments. - - Raises: - ValueError: - - If the 'channel_id' or 'thread_ts' parameters are not provided. - - If an unsupported operator is used for the columns. - - If an unsupported column is used. - - If the channel ID provided is not found. - SlackApiError: If an error occurs when getting the messages from the Slack API. - - Returns: - pd.DataFrame: The messages in the thread. - """ - client = self.handler.connect() - - # Build the parameters for the call to the Slack API. - params = {} - for condition in conditions: - value = condition.value - op = condition.op - - # Handle the column 'channel_id'. - if condition.column == "channel_id": - if op != FilterOperator.EQUAL: - raise ValueError(f"Unsupported operator '{op}' for column 'channel_id'") - - # Check if the provided channel exists. - try: - channel = SlackConversationsTable(self.handler).get_channel(value) - params["channel"] = value - condition.applied = True - except SlackApiError as slack_error: - logger.error(f"Error getting channel '{value}': {slack_error.response['error']}") - raise ValueError(f"Channel '{value}' not found") - - # Handle the column 'thread_ts'. - elif condition.column == "thread_ts": - if op != FilterOperator.EQUAL: - raise ValueError(f"Unsupported operator '{op}' for column 'thread_ts'") - - params["ts"] = value - - if "channel" not in params or "ts" not in params: - raise ValueError( - "To retrieve data from Slack, you need to provide the 'channel_id' and 'thread_ts' parameters." - ) - - # Retrieve the messages from the Slack API. - try: - # If the limit is greater than 1000, paginate the results until the limit is reached. - if limit and limit > 1000: - response = client.conversations_replies(**params) - messages = response["messages"] - - # Paginate the results until the limit is reached. - while response["response_metadata"]["next_cursor"]: - response = client.conversations_replies(cursor=response["response_metadata"]["next_cursor"]) - messages.extend(response["messages"]) - if len(messages) >= limit: - break - - messages = messages[:limit] - # Otherwise, use the provided limit or a default limit of 1000. - else: - params["limit"] = limit if limit else 1000 - response = client.conversations_replies(**params) - messages = response["messages"] - except SlackApiError as slack_error: - logger.error(f"Error getting messages: {slack_error.response['error']}") - raise - - result = pd.DataFrame(messages, columns=self.get_columns()) - - result = result[result["text"].notnull()] - - # Add the channel ID and name to the result. - result["channel_id"] = params["channel"] - result["channel_name"] = channel["name"] if "name" in channel else None - - # Sort the messages by the specified columns. - if sort: - result.sort_values(by=[col.column for col in sort], ascending=[col.ascending for col in sort], inplace=True) - - return result - - def insert(self, query: Insert): - """ - Executes an INSERT SQL query represented by an ASTNode object and posts a message to a Slack thread. - - Args: - query (Insert): An ASTNode object representing the SQL query to be executed. - - Raises: - ValueError: If the 'channel_id', 'text', or 'thread_ts' parameters are not provided. - """ - client = self.handler.connect() - - # Get column names and values from the query. - columns = [col.name for col in query.columns] - for row in query.values: - params = dict(zip(columns, row)) - - # Check if required parameters are provided. - if "channel_id" not in params or "text" not in params or "thread_ts" not in params: - raise ValueError( - "To insert data into Slack, you need to provide the 'channel_id', 'text', and 'thread_ts' parameters." - ) - - try: - client.chat_postMessage( - channel=params["channel_id"], text=params["text"], thread_ts=params["thread_ts"] - ) - except SlackApiError as slack_error: - logger.error( - f"Error posting message to Slack channel '{params['channel_id']}': {slack_error.response['error']}" - ) - raise - - def get_columns(self) -> List[Text]: - """ - Retrieves the attributes (columns) of the Slack threads. - - Returns: - List[Text]: The list of columns. - """ - return [ - "channel_id", - "channel_name", - "type", - "user", - "text", - "ts", - "client_msg_id", - "thread_ts", - "parent_user_id", - "reply_count", - "reply_users_count", - "latest_reply", - "reply_users", - ] - - -class SlackUsersTable(APIResource): - """ - This is the table abstraction for interacting with users in Slack. - """ - - def list(self, conditions: List[FilterCondition] = None, limit: int = None, **kwargs: Any) -> pd.DataFrame: - """ - Retrieves a list of users based on the specified conditions. - Users are retrieved as follows: - - If the provided limit is greater than 1000, no limit to the API call is provided and the results are paginated until the limit is reached. - - Otherwise, the provided limit or a default limit of 1000 is used when making the API call. - - Therefore, if a user is to retrieve more than 1000 users, the limit should be set to a value greater than 1000. - - The above is designed to prevent rate limiting by the Slack API. - - Args: - conditions (List[FilterCondition]): The conditions to filter the users. - limit (int): The limit of the users to return. - kwargs (Any): Arbitrary keyword arguments. - - Raises: - SlackApiError: If an error occurs when getting the users from the Slack API. - """ - client = self.handler.connect() - - # Retrieve the users from the Slack API. - try: - # If the limit is greater than 1000, paginate the results until the limit is reached. - if limit and limit > 1000: - response = client.users_list() - users = response["members"] - - # Paginate the results until the limit is reached. - while response["response_metadata"]["next_cursor"]: - response = client.users_list(cursor=response["response_metadata"]["next_cursor"]) - users.extend(response["members"]) - if len(users) >= limit: - break - - users = users[:limit] - # Otherwise, use the provided limit or a default limit of 1000. - else: - response = client.users_list(limit=limit if limit else 1000) - users = response["members"] - except SlackApiError as slack_error: - logger.error(f"Error getting users: {slack_error.response['error']}") - raise - - return pd.DataFrame(users, columns=self.get_columns()) - - def get_columns(self) -> List[Text]: - """ - Retrieves the attributes (columns) of the Slack users. - - Returns: - List[Text]: The list of columns. - """ - return ["id", "name", "real_name"] diff --git a/mindsdb/integrations/handlers/snowflake_handler/requirements.txt b/mindsdb/integrations/handlers/snowflake_handler/requirements.txt index 706f9cd675f..b267c6e302d 100644 --- a/mindsdb/integrations/handlers/snowflake_handler/requirements.txt +++ b/mindsdb/integrations/handlers/snowflake_handler/requirements.txt @@ -1,2 +1,2 @@ -snowflake-connector-python[pandas]==3.15.0 -snowflake-sqlalchemy==1.7.0 +snowflake-connector-python[pandas]==4.4.0 +snowflake-sqlalchemy==1.9.0 diff --git a/mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py b/mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py index 91e20c74e50..04898c3df63 100644 --- a/mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +++ b/mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py @@ -1,24 +1,28 @@ -import psutil +from typing import Any, Optional, List, Generator + import pandas from pandas import DataFrame from pandas.api import types as pd_types from snowflake.sqlalchemy import snowdialect from snowflake import connector from snowflake.connector.errors import NotSupportedError -from snowflake.connector.cursor import SnowflakeCursor, ResultMetadata -from typing import Any, Optional, List +from snowflake.connector.cursor import ResultMetadata from mindsdb_sql_parser.ast.base import ASTNode from mindsdb_sql_parser.ast import Select, Identifier -from mindsdb.utilities import log from mindsdb.integrations.libs.base import MetaDatabaseHandler +from mindsdb.utilities import log from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender +from mindsdb.utilities.types.column import Column from mindsdb.integrations.libs.response import ( HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, + TableResponse, + OkResponse, + ErrorResponse, + DataHandlerResponse, ) + from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE from .auth_types import ( @@ -50,9 +54,9 @@ def _map_type(internal_type_name: str) -> MYSQL_DATA_TYPE: types_map = { ("NUMBER", "DECIMAL", "DEC", "NUMERIC"): MYSQL_DATA_TYPE.DECIMAL, ("INT , INTEGER , BIGINT , SMALLINT , TINYINT , BYTEINT"): MYSQL_DATA_TYPE.INT, - ("FLOAT", "FLOAT4", "FLOAT8"): MYSQL_DATA_TYPE.FLOAT, + ("FLOAT", "FLOAT4", "FLOAT8", "FIXED"): MYSQL_DATA_TYPE.FLOAT, ("DOUBLE", "DOUBLE PRECISION", "REAL"): MYSQL_DATA_TYPE.DOUBLE, - ("VARCHAR"): MYSQL_DATA_TYPE.VARCHAR, + ("VARCHAR",): MYSQL_DATA_TYPE.VARCHAR, ("CHAR", "CHARACTER", "NCHAR"): MYSQL_DATA_TYPE.CHAR, ("STRING", "TEXT", "NVARCHAR"): MYSQL_DATA_TYPE.TEXT, ("NVARCHAR2", "CHAR VARYING", "NCHAR VARYING"): MYSQL_DATA_TYPE.VARCHAR, @@ -61,9 +65,11 @@ def _map_type(internal_type_name: str) -> MYSQL_DATA_TYPE: ("TIMESTAMP_NTZ", "DATETIME"): MYSQL_DATA_TYPE.DATETIME, ("DATE",): MYSQL_DATA_TYPE.DATE, ("TIME",): MYSQL_DATA_TYPE.TIME, - ("TIMESTAMP_LTZ"): MYSQL_DATA_TYPE.DATETIME, - ("TIMESTAMP_TZ"): MYSQL_DATA_TYPE.DATETIME, - ("VARIANT", "OBJECT", "ARRAY", "MAP", "GEOGRAPHY", "GEOMETRY", "VECTOR"): MYSQL_DATA_TYPE.VARCHAR, + ("TIMESTAMP_LTZ",): MYSQL_DATA_TYPE.DATETIME, + ("TIMESTAMP_TZ",): MYSQL_DATA_TYPE.DATETIME, + ("OBJECT", "ARRAY"): MYSQL_DATA_TYPE.JSON, + ("VECTOR",): MYSQL_DATA_TYPE.VECTOR, + ("VARIANT", "MAP", "GEOGRAPHY", "GEOMETRY", "VECTOR"): MYSQL_DATA_TYPE.VARCHAR, } for db_types_list, mysql_data_type in types_map.items(): @@ -74,100 +80,85 @@ def _map_type(internal_type_name: str) -> MYSQL_DATA_TYPE: return MYSQL_DATA_TYPE.VARCHAR -def _make_table_response(result: DataFrame, cursor: SnowflakeCursor) -> Response: - """Build response from result and cursor. - NOTE: Snowflake return only 'general' type in description, so look on result's - DF types and use types from description only if DF type is 'object' +def _get_columns(description: list[ResultMetadata], sample: pandas.DataFrame = None) -> list[Column]: + """Get columns from Snowflake cursor description. Args: - result (DataFrame): result of the query. - cursor (SnowflakeCursor): cursor object. + description (list[ResultMetadata]): cursor description metadata. + sample (pandas.DataFrame): data sample Returns: - Response: response object. + list[Column]: list of columns with mapped MySQL types. """ - description: list[ResultMetadata] = cursor.description - mysql_types: list[MYSQL_DATA_TYPE] = [] + result = [] for column in description: - column_dtype = result[column.name].dtype - description_column_type = connector.constants.FIELD_ID_TO_NAME.get(column.type_code) - if description_column_type in ("OBJECT", "ARRAY"): - mysql_types.append(MYSQL_DATA_TYPE.JSON) - continue - if description_column_type == "VECTOR": - mysql_types.append(MYSQL_DATA_TYPE.VECTOR) - continue - if pd_types.is_integer_dtype(column_dtype): - column_dtype_name = column_dtype.name - if column_dtype_name in ("int8", "Int8"): - mysql_types.append(MYSQL_DATA_TYPE.TINYINT) - elif column_dtype in ("int16", "Int16"): - mysql_types.append(MYSQL_DATA_TYPE.SMALLINT) - elif column_dtype in ("int32", "Int32"): - mysql_types.append(MYSQL_DATA_TYPE.MEDIUMINT) - elif column_dtype in ("int64", "Int64"): - mysql_types.append(MYSQL_DATA_TYPE.BIGINT) - else: - mysql_types.append(MYSQL_DATA_TYPE.INT) - continue - if pd_types.is_float_dtype(column_dtype): - column_dtype_name = column_dtype.name - if column_dtype_name in ("float16", "Float16"): # Float16 does not exists so far - mysql_types.append(MYSQL_DATA_TYPE.FLOAT) - elif column_dtype_name in ("float32", "Float32"): - mysql_types.append(MYSQL_DATA_TYPE.FLOAT) - elif column_dtype_name in ("float64", "Float64"): - mysql_types.append(MYSQL_DATA_TYPE.DOUBLE) - else: - mysql_types.append(MYSQL_DATA_TYPE.FLOAT) - continue - if pd_types.is_bool_dtype(column_dtype): - mysql_types.append(MYSQL_DATA_TYPE.BOOLEAN) - continue - if pd_types.is_datetime64_any_dtype(column_dtype): - mysql_types.append(MYSQL_DATA_TYPE.DATETIME) - series = result[column.name] - # snowflake use pytz.timezone - if series.dt.tz is not None and getattr(series.dt.tz, "zone", "UTC") != "UTC": - series = series.dt.tz_convert("UTC") - result[column.name] = series.dt.tz_localize(None) - continue - - if pd_types.is_object_dtype(column_dtype): - if description_column_type == "TEXT": - # we can also check column.internal_size, if == 16777216 then it is TEXT, else VARCHAR(internal_size) - mysql_types.append(MYSQL_DATA_TYPE.TEXT) - continue - elif description_column_type == "BINARY": - # if column.internal_size == 8388608 then BINARY, else VARBINARY(internal_size) - mysql_types.append(MYSQL_DATA_TYPE.BINARY) - continue - elif description_column_type == "DATE": - mysql_types.append(MYSQL_DATA_TYPE.DATE) - continue - elif description_column_type == "TIME": - mysql_types.append(MYSQL_DATA_TYPE.TIME) - continue - - if description_column_type == "FIXED": - if column.scale == 0: - mysql_types.append(MYSQL_DATA_TYPE.INT) - else: - # It is NUMBER, DECIMAL or NUMERIC with scale > 0 - mysql_types.append(MYSQL_DATA_TYPE.FLOAT) - continue - elif description_column_type == "REAL": - mysql_types.append(MYSQL_DATA_TYPE.FLOAT) - continue - - mysql_types.append(MYSQL_DATA_TYPE.TEXT) - - df = DataFrame( - result, - columns=[column.name for column in description], - ) - - return Response(RESPONSE_TYPE.TABLE, data_frame=df, affected_rows=None, mysql_types=mysql_types) + mysql_type = None + sf_type_name = connector.constants.FIELD_ID_TO_NAME.get(column.type_code) + if sf_type_name is None: + logger.warning(f"Snowflake handler: unknown type code: {column.type_code}") + mysql_type = MYSQL_DATA_TYPE.VARCHAR + + if sample is not None: + column_dtype = sample[column.name].dtype + + if pd_types.is_integer_dtype(column_dtype): + column_dtype_name = column_dtype.name + if column_dtype_name in ("int8", "Int8"): + mysql_type = MYSQL_DATA_TYPE.TINYINT + elif column_dtype in ("int16", "Int16"): + mysql_type = MYSQL_DATA_TYPE.SMALLINT + elif column_dtype in ("int32", "Int32"): + mysql_type = MYSQL_DATA_TYPE.MEDIUMINT + elif column_dtype in ("int64", "Int64"): + mysql_type = MYSQL_DATA_TYPE.BIGINT + else: + mysql_type = MYSQL_DATA_TYPE.INT + + elif pd_types.is_float_dtype(column_dtype): + column_dtype_name = column_dtype.name + if column_dtype_name in ("float16", "Float16"): # Float16 does not exists so far + mysql_type = MYSQL_DATA_TYPE.FLOAT + elif column_dtype_name in ("float32", "Float32"): + mysql_type = MYSQL_DATA_TYPE.FLOAT + elif column_dtype_name in ("float64", "Float64"): + mysql_type = MYSQL_DATA_TYPE.DOUBLE + else: + mysql_type = MYSQL_DATA_TYPE.FLOAT + + elif pd_types.is_bool_dtype(column_dtype): + mysql_type = MYSQL_DATA_TYPE.BOOLEAN + + elif pd_types.is_datetime64_any_dtype(column_dtype): + mysql_type = MYSQL_DATA_TYPE.DATETIME + series = sample[column.name] + # snowflake use pytz.timezone + if series.dt.tz is not None and getattr(series.dt.tz, "zone", "UTC") != "UTC": + series = series.dt.tz_convert("UTC") + sample[column.name] = series.dt.tz_localize(None) + + elif pd_types.is_object_dtype(column_dtype): + if sf_type_name == "TEXT": + # we can also check column.internal_size, if == 16777216 then it is TEXT, else VARCHAR(internal_size) + mysql_type = MYSQL_DATA_TYPE.TEXT + elif sf_type_name == "BINARY": + # if column.internal_size == 8388608 then BINARY, else VARBINARY(internal_size) + mysql_type = MYSQL_DATA_TYPE.BINARY + elif sf_type_name == "DATE": + mysql_type = MYSQL_DATA_TYPE.DATE + elif sf_type_name == "TIME": + mysql_type = MYSQL_DATA_TYPE.TIME + elif sf_type_name == "FIXED": + if getattr(column, "scale", None) == 0: + mysql_type = MYSQL_DATA_TYPE.INT + else: + # It is NUMBER, DECIMAL or NUMERIC with scale > 0 + mysql_type = MYSQL_DATA_TYPE.FLOAT + + if mysql_type is None: + mysql_type = _map_type(sf_type_name) + + result.append(Column(name=column.name, type=mysql_type, original_type=sf_type_name)) + return result class SnowflakeHandler(MetaDatabaseHandler): @@ -176,6 +167,7 @@ class SnowflakeHandler(MetaDatabaseHandler): """ name = "snowflake" + stream_response = True _auth_types = { "key_pair": KeyPairAuthType(), @@ -269,92 +261,84 @@ def check_connection(self) -> StatusResponse: return response - def native_query(self, query: str) -> Response: - """ - Executes a SQL query on the Snowflake account and returns the result. + def native_query(self, query: str, stream: bool = True, **kwargs) -> TableResponse | OkResponse | ErrorResponse: + """Executes a SQL query on the Snowflake account and returns the result. Args: query (str): The SQL query to be executed. + stream (bool): If True - return TableResponse with generator inside. Returns: - Response: A response object containing the result of the query or an error message. + DataHandlerResponse: A response object containing the result of the query or an error message. """ + generator = self._execute_fetch_batches(query) + try: + response: TableResponse = next(generator) + response.data_generator = generator + if stream is False: + response.fetchall() + except StopIteration as e: + response = e.value + if isinstance(response, DataHandlerResponse) is False: + raise + + return response + + def _execute_fetch_batches( + self, query: str + ) -> Generator[TableResponse | pandas.DataFrame, None, OkResponse | ErrorResponse]: + """Execute a SQL query and yield results in batches. - need_to_close = self.is_connected is False + Args: + query (str): The SQL query to execute. + + Yields: + TableResponse: First yield β€” response with column metadata and affected row count. + pandas.DataFrame: Subsequent yields β€” batches of query results. + Returns: + OkResponse: For DML statements (INSERT/DELETE/UPDATE) with affected row count. + ErrorResponse: If an exception occurs during query execution. + """ connection = self.connect() - with connection.cursor(connector.DictCursor) as cur: + with connection.cursor(connector.DictCursor) as cursor: try: - cur.execute(query) + cursor.execute(query) try: try: - batches_iter = cur.fetch_pandas_batches() + batches_iter = cursor.fetch_pandas_batches() except ValueError: # duplicated columns raises ValueError raise NotSupportedError() - - batches = [] - memory_estimation_check_done = False - batches_rowcount = 0 - total_rowcount = cur.rowcount or 0 + try: + sample_df = next(batches_iter) + except StopIteration: + sample_df = None + columns = _get_columns(cursor.description, sample=sample_df) + yield TableResponse(data=sample_df, affected_rows=cursor.rowcount, columns=columns) for batch_df in batches_iter: - batches.append(batch_df) - # region check the size of first batch (if it is big enough) to get an estimate of the full - # dataset size. If it does not fit in memory - raise an error. - # NOTE batch size cannot be set on client side. Also, Snowflake will download - # 'CLIENT_PREFETCH_THREADS' count of chunks in parallel (by default 4), therefore this check - # can not work in some cases. - batches_rowcount += len(batch_df) - if memory_estimation_check_done is False and batches_rowcount > 1000: - memory_estimation_check_done = True - available_memory_kb = psutil.virtual_memory().available >> 10 - batches_size_kb = sum( - [(x.memory_usage(index=True, deep=True).sum() >> 10) for x in batches] - ) - rest_rowcount = total_rowcount - batches_rowcount - rest_estimated_size_kb = int((rest_rowcount / batches_rowcount) * batches_size_kb) - # for pd.concat required at least x2 memory - max_allowed_memory_kb = available_memory_kb / 2.4 - if max_allowed_memory_kb < rest_estimated_size_kb: - error_message = ( - "The query result is too large to fit into available memory. " - f"The dataset contains {total_rowcount} rows with an estimated size " - f"of {rest_estimated_size_kb} KB, but only {max_allowed_memory_kb:.0f} KB " - f"of memory is allowed fot the dataset. Please narrow down the query by adding filters " - f"or a LIMIT clause to reduce the result set size." - ) - logger.error(error_message) - raise MemoryError(error_message) - # endregion - if len(batches) > 0: - response = _make_table_response(result=pandas.concat(batches, ignore_index=True), cursor=cur) - else: - response = Response(RESPONSE_TYPE.TABLE, DataFrame([], columns=[x[0] for x in cur.description])) + yield batch_df except NotSupportedError: # Fallback for CREATE/DELETE/UPDATE. These commands returns table with single column, # but it cannot be retrieved as pandas DataFrame. - result = cur.fetchall() + result = cursor.fetchall() match result: case ( [{"number of rows inserted": affected_rows}] | [{"number of rows deleted": affected_rows}] | [{"number of rows updated": affected_rows, "number of multi-joined rows updated": _}] ): - response = Response(RESPONSE_TYPE.OK, affected_rows=affected_rows) + response = OkResponse(affected_rows=affected_rows) case list(): - response = Response( - RESPONSE_TYPE.TABLE, DataFrame(result, columns=[x[0] for x in cur.description]) - ) + response = TableResponse(data=DataFrame(result, columns=[x[0] for x in cursor.description])) case _: # Looks like SnowFlake always returns something in response, so this is suspicious logger.warning("Snowflake did not return any data in response.") - response = Response(RESPONSE_TYPE.OK) + response = OkResponse() + return response except Exception as e: logger.error(f"Error running query: {query} on {self.connection_data.get('database')}, {e}!") - response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e)) - - if need_to_close is True: - self.disconnect() + return ErrorResponse(error_code=0, error_message=str(e)) if memory_pool is not None and memory_pool.backend_name == "jemalloc": # This reduce memory consumption, but will slow down next query slightly. @@ -362,9 +346,7 @@ def native_query(self, query: str) -> Response: # and next query processing time may be even lower. memory_pool.release_unused() - return response - - def query(self, query: ASTNode) -> Response: + def query(self, query: ASTNode) -> DataHandlerResponse: """ Executes a SQL query represented by an ASTNode and retrieves the data. @@ -372,7 +354,7 @@ def query(self, query: ASTNode) -> Response: query (ASTNode): An ASTNode representing the SQL query to be executed. Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. + DataHandlerResponse: The response from the `native_query` method, containing the result of the SQL query execution. """ query_str = self.renderer.get_string(query, with_failback=True) @@ -381,7 +363,7 @@ def query(self, query: ASTNode) -> Response: return self.lowercase_columns(result, query) def lowercase_columns(self, result, query): - if not isinstance(query, Select) or result.data_frame is None: + if not isinstance(query, Select) or not isinstance(result, TableResponse): return result quoted_columns = [] @@ -394,20 +376,19 @@ def lowercase_columns(self, result, query): if column.is_quoted[-1]: quoted_columns.append(column.parts[-1]) - rename_columns = {} - for col in result.data_frame.columns: - if col.isupper() and col not in quoted_columns: - rename_columns[col] = col.lower() - if rename_columns: - result.data_frame = result.data_frame.rename(columns=rename_columns) + for col in result.columns: + col_name = col.alias or col.name + if col_name.isupper() and col_name not in quoted_columns: + col.alias = col_name.lower() + return result - def get_tables(self) -> Response: + def get_tables(self) -> DataHandlerResponse: """ Retrieves a list of all non-system tables and views in the current schema of the Snowflake account. Returns: - Response: A response object containing the list of tables and views, formatted as per the `Response` class. + DataHandlerResponse: A response object containing the list of tables and views. """ query = """ @@ -418,7 +399,7 @@ def get_tables(self) -> Response: """ return self.native_query(query) - def get_columns(self, table_name) -> Response: + def get_columns(self, table_name) -> DataHandlerResponse: """ Retrieves column details for a specified table in the Snowflake account. @@ -426,7 +407,7 @@ def get_columns(self, table_name) -> Response: table_name (str): The name of the table for which to retrieve column information. Returns: - Response: A response object containing the column details, formatted as per the `Response` class. + DataHandlerResponse: A response object containing the column details. Raises: ValueError: If the 'table_name' is not a valid string. @@ -458,7 +439,7 @@ def get_columns(self, table_name) -> Response: return result - def meta_get_tables(self, table_names: Optional[List[str]] = None) -> Response: + def meta_get_tables(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse: """ Retrieves metadata information about the tables in the Snowflake database to be stored in the data catalog. @@ -466,7 +447,7 @@ def meta_get_tables(self, table_names: Optional[List[str]] = None) -> Response: table_names (list): A list of table names for which to retrieve metadata information. Returns: - Response: A response object containing the metadata information, formatted as per the `Response` class. + DataHandlerResponse: A response object containing the metadata information. """ query = """ SELECT @@ -493,7 +474,7 @@ def meta_get_tables(self, table_names: Optional[List[str]] = None) -> Response: result.data_frame["ROW_COUNT"] = result.data_frame["ROW_COUNT"].astype("Int64") return result - def meta_get_columns(self, table_names: Optional[List[str]] = None) -> Response: + def meta_get_columns(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse: """ Retrieves column metadata for the specified tables (or all tables if no list is provided). @@ -501,7 +482,7 @@ def meta_get_columns(self, table_names: Optional[List[str]] = None) -> Response: table_names (list): A list of table names for which to retrieve column metadata. Returns: - Response: A response object containing the column metadata. + DataHandlerResponse: A response object containing the column metadata. """ query = """ SELECT @@ -529,7 +510,7 @@ def meta_get_columns(self, table_names: Optional[List[str]] = None) -> Response: result = self.native_query(query) return result - def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) -> Response: + def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse: """ Retrieves basic column statistics: null %, distinct count. Due to Snowflake limitations, this runs per-table not per-column. @@ -546,11 +527,11 @@ def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) -> columns_result = self.native_query(columns_query) if ( - columns_result.type == RESPONSE_TYPE.ERROR + isinstance(columns_result, ErrorResponse) or columns_result.data_frame is None or columns_result.data_frame.empty ): - return Response(RESPONSE_TYPE.ERROR, error_message="No columns found.") + return ErrorResponse(error_message="No columns found.") columns_df = columns_result.data_frame grouped = columns_df.groupby(["TABLE_SCHEMA", "TABLE_NAME"]) @@ -585,9 +566,13 @@ def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) -> """ try: stats_res = self.native_query(stats_query) - if stats_res.type != RESPONSE_TYPE.TABLE or stats_res.data_frame is None or stats_res.data_frame.empty: + if ( + not isinstance(stats_res, TableResponse) + or stats_res.data_frame is None + or stats_res.data_frame.empty + ): logger.warning( - f"Could not retrieve stats for table {table_name}. Query returned no data or an error: {stats_res.error_message if stats_res.type == RESPONSE_TYPE.ERROR else 'No data'}" + f"Could not retrieve stats for table {table_name}. Query returned no data or an error: {stats_res.error_message if isinstance(stats_res, ErrorResponse) else 'No data'}" ) # Add placeholder stats if query fails or returns empty for _, row in group.iterrows(): @@ -646,11 +631,11 @@ def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) -> ) if not all_stats: - return Response(RESPONSE_TYPE.TABLE, data_frame=pandas.DataFrame()) + return TableResponse(data=pandas.DataFrame()) - return Response(RESPONSE_TYPE.TABLE, data_frame=pandas.DataFrame(all_stats)) + return TableResponse(data=pandas.DataFrame(all_stats)) - def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Response: + def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse: """ Retrieves primary key information for the specified tables (or all tables if no list is provided). @@ -658,7 +643,7 @@ def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Resp table_names (list): A list of table names for which to retrieve primary key information. Returns: - Response: A response object containing the primary key information. + DataHandlerResponse: A response object containing the primary key information. """ try: query = """ @@ -666,7 +651,7 @@ def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Resp """ response = self.native_query(query) - if response.type == RESPONSE_TYPE.ERROR and response.error_message: + if isinstance(response, ErrorResponse): logger.error(f"Query error in meta_get_primary_keys: {response.error_message}\nQuery:\n{query}") df = response.data_frame @@ -683,9 +668,9 @@ def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Resp except Exception as e: logger.error(f"Exception in meta_get_primary_keys: {e!r}") - return Response(RESPONSE_TYPE.ERROR, error_message=f"Exception querying primary keys: {e!r}") + return ErrorResponse(error_message=f"Exception querying primary keys: {e!r}") - def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Response: + def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse: """ Retrieves foreign key information for the specified tables (or all tables if no list is provided). @@ -693,7 +678,7 @@ def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Resp table_names (list): A list of table names for which to retrieve foreign key information. Returns: - Response: A response object containing the foreign key information. + DataHandlerResponse: A response object containing the foreign key information. """ try: query = """ @@ -701,7 +686,7 @@ def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Resp """ response = self.native_query(query) - if response.type == RESPONSE_TYPE.ERROR and response.error_message: + if isinstance(response, ErrorResponse): logger.error(f"Query error in meta_get_primary_keys: {response.error_message}\nQuery:\n{query}") df = response.data_frame @@ -712,10 +697,10 @@ def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Resp df = df[["pk_table_name", "pk_column_name", "fk_table_name", "fk_column_name"]] df = df.rename( columns={ - "pk_table_name": "child_table_name", - "pk_column_name": "child_column_name", - "fk_table_name": "parent_table_name", - "fk_column_name": "parent_column_name", + "pk_table_name": "parent_table_name", + "pk_column_name": "parent_column_name", + "fk_table_name": "child_table_name", + "fk_column_name": "child_column_name", } ) @@ -725,7 +710,7 @@ def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Resp except Exception as e: logger.error(f"Exception in meta_get_primary_keys: {e!r}") - return Response(RESPONSE_TYPE.ERROR, error_message=f"Exception querying primary keys: {e!r}") + return ErrorResponse(error_message=f"Exception querying primary keys: {e!r}") def meta_get_handler_info(self, **kwargs: Any) -> str: """ diff --git a/mindsdb/integrations/handlers/solace_handler/README.md b/mindsdb/integrations/handlers/solace_handler/README.md deleted file mode 100644 index 568c6fc562b..00000000000 --- a/mindsdb/integrations/handlers/solace_handler/README.md +++ /dev/null @@ -1,85 +0,0 @@ -# Solace Handler - -## Run solace locally (optional): - -Reference: https://solace.com/products/event-broker/software/getting-started/ - -1. Run solace app in docker -Here you can choose different user/password (instead of admin) -```bash -docker run -d -p 8080:8080 -p 55555:55555 -p 8008:8008 -p 1883:1883 -p 8000:8000 -p 5672:5672 -p 9000:9000 -p 2222:2222 --shm-size=2g --env username_admin_globalaccesslevel=admin --env username_admin_password=admin --name=solace solace/solace-pubsub-standard -``` - -2. Solace admin panel will be run on: http://localhost:8080/ - -3. Test Publisher/Subscriber using JS client in "3. Run Samples" on [get started](https://solace.com/products/event-broker/software/getting-started/) page - - -## Client description - -Pip module: https://pypi.org/project/solace-pubsubplus/ - -Client api reference: https://docs.solace.com/API-Developer-Online-Ref-Documentation/python/source/rst/solace.messaging.html - -Client usage samples: https://github.com/SolaceSamples/solace-samples-python/tree/main - - -## Connect to the Mindsdb to Solace - -Command parameters: -```sql -CREATE DATABASE my_solace -WITH - ENGINE = "solace" - PARAMETERS = { - "host": , - "username": , - "password": , - "vpn-name": -- optional, default is "default" - }; -``` - -Example: -```sql -CREATE DATABASE my_solace -WITH - ENGINE = "solace" - PARAMETERS = { - "host": "localhost:55555", - "username": "admin", - "password": "admin" - }; -``` - -## Usage - -Solace is event broker. You can send event or subscribe on event. But can't read past events - -### Publish event: - -Table name is topic name for publishing. -```sql -INSERT INTO my_solace. (, ...) -VALUES (, ...) -``` - -If topic contents slash then it can be replaced by '.' similar to access sql objects -For example to send event to topic `solace/test/my` with dict {'id': 1, 'name': 'me'} use -```sql -INSERT INTO my_solace.solace.test.my ('id', 'name', 'type') -VALUES (1, 'me') -``` - - -### Subscribe on event: - -It is possible to subscribe to incoming events using triggers. -In this example events with topic `solace/test` will be inserted to my_pg.log table: -```sql -create trigger my_trigger -on my_solace.solace.test -( - insert into my_pg.log - select * from TABLE_DELTA -) -``` diff --git a/mindsdb/integrations/handlers/solace_handler/__about__.py b/mindsdb/integrations/handlers/solace_handler/__about__.py deleted file mode 100644 index 218984459dd..00000000000 --- a/mindsdb/integrations/handlers/solace_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Solace handler' -__package_name__ = 'mindsdb_solace_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for the Solace event broker" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/solace_handler/__init__.py b/mindsdb/integrations/handlers/solace_handler/__init__.py deleted file mode 100644 index 697ad937451..00000000000 --- a/mindsdb/integrations/handlers/solace_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .solace_handler import SolaceHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Solace' -name = 'solace' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/solace_handler/icon.svg b/mindsdb/integrations/handlers/solace_handler/icon.svg deleted file mode 100644 index f6a3f33ca8f..00000000000 --- a/mindsdb/integrations/handlers/solace_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/solace_handler/requirements.txt b/mindsdb/integrations/handlers/solace_handler/requirements.txt deleted file mode 100644 index 0868c064846..00000000000 --- a/mindsdb/integrations/handlers/solace_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -solace-pubsubplus diff --git a/mindsdb/integrations/handlers/solace_handler/solace_handler.py b/mindsdb/integrations/handlers/solace_handler/solace_handler.py deleted file mode 100644 index c7206fb1f67..00000000000 --- a/mindsdb/integrations/handlers/solace_handler/solace_handler.py +++ /dev/null @@ -1,169 +0,0 @@ -import pandas as pd - -from solace.messaging.messaging_service import MessagingService, RetryStrategy -from solace.messaging.resources.topic_subscription import TopicSubscription -from solace.messaging.receiver.message_receiver import MessageHandler -from solace.messaging.resources.topic import Topic -from solace.messaging.receiver.inbound_message import InboundMessage - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser import ast - -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class SolaceHandler(DatabaseHandler): - - def __init__(self, name: str = None, **kwargs): - """Registers all API tables and prepares the handler for an API connection. - - Args: - name: (str): The handler name to use - """ - super().__init__(name) - args = kwargs.get('connection_data', {}) - - if 'host' not in args: - raise ValueError('Host parameter is required') - - self.connection_args = args - self.messaging_service = None - self.is_connected = False - - def connect(self): - - broker_props = { - "solace.messaging.transport.host": self.connection_args['host'], - "solace.messaging.service.vpn-name": self.connection_args.get('vpn-name', 'default'), - "solace.messaging.authentication.scheme.basic.username": self.connection_args.get('username'), - "solace.messaging.authentication.scheme.basic.password": self.connection_args.get('password') - } - - self.messaging_service = MessagingService\ - .builder()\ - .from_properties(broker_props) \ - .with_reconnection_retry_strategy(RetryStrategy.parametrized_retry(20, 3)) \ - .build() - - self.messaging_service.connect() - - self.direct_publisher = self.messaging_service\ - .create_direct_message_publisher_builder()\ - .build() - self.direct_publisher.start() - - # TODO support persistent_publisher ? - - self.is_connected = True - return self.messaging_service - - def disconnect(self): - - if self.is_connected is False: - return - - self.direct_publisher.terminate() - self.messaging_service.disconnect() - self.is_connected = False - return self.is_connected - - def check_connection(self) -> StatusResponse: - - response = StatusResponse(False) - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to Solace: {e}!') - response.error_message = e - - if response.success is False: - self.is_connected = False - return response - - def native_query(self, query: str = None) -> Response: - ast = parse_sql(query) - return self.query(ast) - - def query(self, query: ast.ASTNode): - if isinstance(query, ast.Insert): - result = self.handle_insert(query) - else: - raise NotImplementedError - return result - - def get_columns(self, table_name: str) -> Response: - df = pd.DataFrame([], columns=['Field']) - df['Type'] = 'str' - - return Response(RESPONSE_TYPE.TABLE, df) - - def get_tables(self) -> Response: - df = pd.DataFrame([], columns=['table_name', 'table_type']) - - return Response(RESPONSE_TYPE.TABLE, df) - - def handle_insert(self, query: ast.Insert): - - message_builder = self.messaging_service.message_builder() - - topic_name = '/'.join(query.table.parts) - - column_names = [col.name for col in query.columns] - for insert_row in query.values: - data = dict(zip(column_names, insert_row)) - - outbound_message = message_builder.build(data) - self.direct_publisher.publish(destination=Topic.of(topic_name), message=outbound_message) - - return Response(RESPONSE_TYPE.OK) - - def subscribe(self, stop_event, callback, table_name, columns=None, **kwargs): - - class MessageHandlerImpl(MessageHandler): - def on_message(self, message: 'InboundMessage'): - # Check if the payload is a dict - payload = message.get_payload_as_dictionary() - if payload is not None: - data = payload - else: - # check as string - payload = message.get_payload_as_string() - if payload is None: - payload = message.get_payload_as_bytes() - if isinstance(payload, bytearray): - payload = payload.decode() - data = {'data': payload} - - if columns is not None: - updated_columns = data.keys() - if not set(columns) & set(updated_columns): - # skip - return - - callback(data) - - table = ast.Identifier(table_name) - topic_name = '/'.join(table.parts) - - topics = [TopicSubscription.of(topic_name)] - direct_receiver = self.messaging_service\ - .create_direct_message_receiver_builder()\ - .with_subscriptions(topics)\ - .build() - - direct_receiver.start() - direct_receiver.receive_async(MessageHandlerImpl()) - - stop_event.wait() - - direct_receiver.terminate() diff --git a/mindsdb/integrations/handlers/solr_handler/README.md b/mindsdb/integrations/handlers/solr_handler/README.md deleted file mode 100644 index 3aca855f674..00000000000 --- a/mindsdb/integrations/handlers/solr_handler/README.md +++ /dev/null @@ -1,49 +0,0 @@ -# Solr Handler - -This is the implementation of the Solr handler for MindsDB. - -## Solr -The Apache Solr β„’ is a highly reliable, scalable and fault tolerant, providing distributed indexing, replication and load-balanced querying, automated failover and recovery, centralized configuration and more. - -## Implementation -This handler was implemented using the `sqlalchemy-solr` library, which provides a Python / SQLAlchemy interface. - -The required arguments to establish a connection are, -* `username`: the username used to authenticate with the Solr server. This parameter is optional. -* `password`: the password to authenticate the user with the Solr server. This parameter is optional. -* `host`: the host name or IP address of the Solr server(. -* `port`: the port number of the Solr server. -* `server_path`: Defaults to solr in case not provided. -* `collection`: Solr Collection name. -* `use_ssl`: Defaults to false in case not provide. true|false -Refer [https://pypi.org/project/sqlalchemy-solr/](https://pypi.org/project/sqlalchemy-solr/) - -## Usage -In order to make use of this handler and connect to Hive in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE solr_datasource -WITH -engine='solr', -parameters={ - "username": "demo_user", - "password": "demo_password", - "host": "127.0.0.1", - "port": "8981", - "server_path": "solr", - "collection": "gettingstarted", - "use_ssl": "false" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM solr_datasource.gettingstarted limit 10000; -~~~~ - -## Requirements -A Solr instance with a Parallel SQL supported up and running. - -There are certain limitations that need to be taken into account when issuing queries to Solr. -Refer [https://solr.apache.org/guide/solr/latest/query-guide/sql-query.html#parallel-sql-queries](https://solr.apache.org/guide/solr/latest/query-guide/sql-query.html#parallel-sql-queries). - -Tip: Don't forget to put limit in the end of the SQL statement diff --git a/mindsdb/integrations/handlers/solr_handler/__about__.py b/mindsdb/integrations/handlers/solr_handler/__about__.py deleted file mode 100644 index 7588d2cbc0d..00000000000 --- a/mindsdb/integrations/handlers/solr_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Solr handler' -__package_name__ = 'mindsdb_solr_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Solr" -__author__ = 'Biswadip Paul' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/solr_handler/__init__.py b/mindsdb/integrations/handlers/solr_handler/__init__.py deleted file mode 100644 index c7301f8f9cf..00000000000 --- a/mindsdb/integrations/handlers/solr_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .solr_handler import SolrHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Apache Solr' -name = 'solr' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/solr_handler/connection_args.py b/mindsdb/integrations/handlers/solr_handler/connection_args.py deleted file mode 100644 index 706c3c4844f..00000000000 --- a/mindsdb/integrations/handlers/solr_handler/connection_args.py +++ /dev/null @@ -1,46 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - username={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Solr server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the Solr server.', - 'secret': True - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Solr server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the Solr server. Must be an integer.' - }, - server_path={ - 'type': ARG_TYPE.STR, - 'description': 'The server path connecting with the Solr server. Defaults to solr when not provided.' - }, - collection={ - 'type': ARG_TYPE.STR, - 'description': 'The collection name to use for the query in the Solr server.' - }, - use_ssl={ - 'type': ARG_TYPE.BOOL, - 'description': 'The flag to set ssl for the query in the Solr server.Defaults to false.' - } -) - -connection_args_example = OrderedDict( - username="demo_user", - password="demo_password", - host="127.0.0.1", - port=8981, - server_path="solr", - collection="gettingstarted", - use_ssl=False, -) diff --git a/mindsdb/integrations/handlers/solr_handler/icon.svg b/mindsdb/integrations/handlers/solr_handler/icon.svg deleted file mode 100644 index 2c468142129..00000000000 --- a/mindsdb/integrations/handlers/solr_handler/icon.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/solr_handler/requirements.txt b/mindsdb/integrations/handlers/solr_handler/requirements.txt deleted file mode 100644 index a5a42f708cd..00000000000 --- a/mindsdb/integrations/handlers/solr_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -sqlalchemy-solr -sqlparse>=0.5.4 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/mindsdb/integrations/handlers/solr_handler/solr_handler.py b/mindsdb/integrations/handlers/solr_handler/solr_handler.py deleted file mode 100644 index 5071f579a76..00000000000 --- a/mindsdb/integrations/handlers/solr_handler/solr_handler.py +++ /dev/null @@ -1,177 +0,0 @@ -from typing import Optional -import pandas as pd - -from sqlalchemy import create_engine - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - - -logger = log.getLogger(__name__) - - -class SolrHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Solr SQL statements. - """ - - name = 'solr' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - super().__init__(name) - self.parser = parse_sql - self.dialect = 'solr' - - if ('host' not in connection_data) or ('port' not in connection_data) or ('collection' not in connection_data): - raise Exception("The host, port and collection parameter should be provided!") - - optional_parameters = ['use_ssl', 'username', 'password'] - for parameter in optional_parameters: - if parameter not in connection_data: - connection_data[parameter] = None - - if connection_data.get('use_ssl', False): - connection_data['use_ssl'] = True - else: - connection_data['use_ssl'] = False - - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self): - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - if self.is_connected is True: - return self.connection - - config = { - 'username': self.connection_data.get('username'), - 'password': self.connection_data.get('password'), - 'host': self.connection_data.get('host'), - 'port': self.connection_data.get('port'), - 'server_path': self.connection_data.get('server_path', 'solr'), - 'collection': self.connection_data.get('collection'), - 'use_ssl': self.connection_data.get('use_ssl') - } - - connection = create_engine("solr://{username}:{password}@{host}:{port}/{server_path}/{collection}/sql?use_ssl={use_ssl}".format(**config)) - self.is_connected = True - self.connection = connection.connect() - return self.connection - - def disconnect(self): - """ - Close any existing connections. - """ - if self.is_connected is False: - return - self.connection.close() - self.is_connected = False - return - - def check_connection(self) -> StatusResponse: - """ - Check the connection of the Solr database - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to Solr {self.connection_data["host"]}, {e}!') - response.error_message = str(e) - - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> Response: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - - try: - result = connection.execute(query) - columns = list(result.keys()) - if result: - response = Response( - RESPONSE_TYPE.TABLE, - pd.DataFrame( - result, - columns=columns - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - - except Exception as e: - logger.error(f'Error running query: {query} on {self.connection_data["host"]}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Retrieve the data from the SQL statement. - """ - return self.native_query(query.to_string()) - - def get_tables(self) -> Response: - """ - Get a list with all of the tables in Solr - """ - result = {} - result['data_frame'] = pd.DataFrame([self.connection_data.get('collection')]) - df = result.data_frame - result.data_frame = df.rename(columns={df.columns[0]: 'table_name'}) - return result - - def get_columns(self, table_name) -> Response: - """ - Show details about the table - """ - q = f"select * from {table_name} limit 1" - result = self.native_query(q) - df = pd.DataFrame([[col] for col in result.data_frame.columns]) - result.data_frame = df.rename(columns={df.columns[0]: 'column_name'}) - return result diff --git a/mindsdb/integrations/handlers/solr_handler/tests/__init__.py b/mindsdb/integrations/handlers/solr_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/solr_handler/tests/test_solr_handler.py b/mindsdb/integrations/handlers/solr_handler/tests/test_solr_handler.py deleted file mode 100644 index 14abd69b72f..00000000000 --- a/mindsdb/integrations/handlers/solr_handler/tests/test_solr_handler.py +++ /dev/null @@ -1,39 +0,0 @@ -import unittest - -from mindsdb.integrations.handlers.solr_handler.solr_handler import SolrHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class SolrHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "username": "demo_user", - "password": "demo_password", - "host": "172.22.0.4", - "port": 8983, - "server_path": "solr", - "collection": "gettingstarted", - "use_ssl": False - } - cls.handler = SolrHandler('test_solr_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.check_connection() - - def test_2_get_tables(self): - tbls = self.handler.get_tables() - assert tbls['type'] is not RESPONSE_TYPE.ERROR - - def test_6_describe_table(self): - described = self.handler.get_columns("gettingstarted") - assert described['type'] is RESPONSE_TYPE.TABLE - - def test_7_select_query(self): - query = "SELECT * FROM gettingstarted WHERE id='apple' limit 1000" - result = self.handler.query(query) - assert result['type'] is RESPONSE_TYPE.TABLE - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/sqlany_handler/README.md b/mindsdb/integrations/handlers/sqlany_handler/README.md deleted file mode 100644 index deed1b39948..00000000000 --- a/mindsdb/integrations/handlers/sqlany_handler/README.md +++ /dev/null @@ -1,75 +0,0 @@ -# SAP SQL Anywhere Handler - -This is the implementation of the SAP SQL Anywhere handler for MindsDB. - -## SAP SQL Anywhere - - -SAP SQL Anywhere Embedded Database for Application Software -enables secure, reliable data management for servers where no DBA is available and synchronization for tens of thousands of mobile devices, Internet of Things (IoT) systems, and remote environments. [Read more](https://www.sap.com/products/technology-platform/sql-anywhere.html). - -## Implementation - -This handler was implemented using `sqlanydb` - the Python driver for SAP SQL Anywhere. - -The required arguments to establish a connection are, - -* `host`: the host name or IP address of the SAP SQL Anywhere instance -* `port`: the port number of the SAP SQL Anywhere instance -* `user`: specifies the user name -* `password`: specifies the password for the user -* `database`: sets the current database -* `server`: sets the current server - -## Usage - -Based on the current connected database we have a table called `TEST` that was created using -the following SQL statements: - -~~~~sql -CREATE TABLE TEST -( - ID INTEGER NOT NULL, - NAME NVARCHAR(1), - DESCRIPTION NVARCHAR(1) -); - -CREATE UNIQUE INDEX TEST_ID_INDEX - ON TEST (ID); - -ALTER TABLE TEST - ADD CONSTRAINT TEST_PK - PRIMARY KEY (ID); - -INSERT INTO TEST -VALUES (1, 'h', 'w'); -~~~~ - -In order to make use of this handler and connect to the SAP SQL Anywhere database in MindsDB, the following syntax can be used: - -~~~~sql -CREATE DATABASE sap_sqlany_trial -WITH ENGINE = 'sqlany', -PARAMETERS = { - "user": "DBADMIN", - "password": "password", - "host": "localhost", - "port": "55505", - "server": "TestMe", - "database": "MINDSDB" -}; -~~~~ - -**Note**: The above example assumes usage of SAP SQL Anywhere Cloud, which requires the `encrypt` parameter to be set to `true` and uses port `443`. - -Now, you can use this established connection to query your database as follows: - -~~~~sql -SELECT * FROM sap_sqlany_trial.test -~~~~ - -| ID | NAME | DESCRIPTION | -|----|------|-------------| -| 1 | h | w | - -![MindsDB using SAP SQL Anywhere Integration](https://imgur.com/a/sE9uQoL) diff --git a/mindsdb/integrations/handlers/sqlany_handler/__about__.py b/mindsdb/integrations/handlers/sqlany_handler/__about__.py deleted file mode 100644 index 9660683e899..00000000000 --- a/mindsdb/integrations/handlers/sqlany_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB SAP SQL Anywhere handler' -__package_name__ = 'mindsdb_sqlany_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for SAP SQL Anywhere" -__author__ = 'Michael Lantz' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022 - mindsdb' diff --git a/mindsdb/integrations/handlers/sqlany_handler/__init__.py b/mindsdb/integrations/handlers/sqlany_handler/__init__.py deleted file mode 100644 index a9eacd081c8..00000000000 --- a/mindsdb/integrations/handlers/sqlany_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .sqlany_handler import SQLAnyHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'SAP SQL Anywhere' -name = 'sqlany' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/sqlany_handler/connection_args.py b/mindsdb/integrations/handlers/sqlany_handler/connection_args.py deleted file mode 100644 index 919fcd82089..00000000000 --- a/mindsdb/integrations/handlers/sqlany_handler/connection_args.py +++ /dev/null @@ -1,46 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -# For complete list of parameters: https://infocenter.sybase.com/help/index.jsp?topic=/com.sybase.help.sqlanywhere.12.0.1/dbadmin/da-conparm.html -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The IP address/host name of the SAP SQL Anywhere instance host.' - }, - port={ - 'type': ARG_TYPE.STR, - 'description': 'The port number of the SAP SQL Anywhere instance.' - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'Specifies the user name.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'Specifies the password for the user.', - 'secret': True - }, - server={ - 'type': ARG_TYPE.STR, - 'description': 'Specifies the name of the server to connect to.' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'Specifies the name of the database to connect to.' - }, - encrypt={ - 'type': ARG_TYPE.BOOL, - 'description': 'Enables or disables TLS encryption.' - }, -) - -connection_args_example = OrderedDict( - host='localhost', - port=55505, - user='DBADMIN', - password='password', - serverName='TestMe', - database='MINDSDB' -) diff --git a/mindsdb/integrations/handlers/sqlany_handler/icon.svg b/mindsdb/integrations/handlers/sqlany_handler/icon.svg deleted file mode 100644 index e0e489c013b..00000000000 --- a/mindsdb/integrations/handlers/sqlany_handler/icon.svg +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/sqlany_handler/requirements.txt b/mindsdb/integrations/handlers/sqlany_handler/requirements.txt deleted file mode 100644 index f198424b491..00000000000 --- a/mindsdb/integrations/handlers/sqlany_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -sqlalchemy-sqlany -sqlanydb diff --git a/mindsdb/integrations/handlers/sqlany_handler/sqlany_handler.py b/mindsdb/integrations/handlers/sqlany_handler/sqlany_handler.py deleted file mode 100644 index d98e31cfc36..00000000000 --- a/mindsdb/integrations/handlers/sqlany_handler/sqlany_handler.py +++ /dev/null @@ -1,180 +0,0 @@ -from pandas import DataFrame - -import sqlanydb -import sqlalchemy_sqlany.base as sqlany_dialect - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender - -from mindsdb.utilities import log -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - - -logger = log.getLogger(__name__) - - -class SQLAnyHandler(DatabaseHandler): - """ - This handler handles connection and execution of the SAP SQL Anywhere statements. - """ - - name = 'sqlany' - - def __init__(self, name: str, connection_data: dict, **kwargs): - super().__init__(name) - - self.dialect = 'sqlany' - self.parser = parse_sql - self.connection_data = connection_data - self.renderer = SqlalchemyRender(sqlany_dialect.SQLAnyDialect) - self.host = self.connection_data.get('host') - self.port = self.connection_data.get('port') - self.userid = self.connection_data.get('user') - self.password = self.connection_data.get('password') - self.server = self.connection_data.get('server') - self.databaseName = self.connection_data.get('database') - self.encryption = self.connection_data.get('encrypt', False) - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self): - """ - Handles the connection to a SAP SQL Anywhere database insance. - """ - - if self.is_connected is True: - return self.connection - - if self.port.strip().isnumeric(): - self.host += ":" + self.port.strip() - - if self.encryption: - self.encryption = "SIMPLE" - else: - self.encryption = "NONE" - - connection = sqlanydb.connect( - host=self.host, - userid=self.userid, - password=self.password, - server=self.server, - databaseName=self.databaseName, - encryption=self.encryption - ) - self.is_connected = True - self.connection = connection - return self.connection - - def disconnect(self): - """ - Disconnects from the SAP SQL Anywhere database - """ - - if self.is_connected is True: - self.connection.close() - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """ - Check the connection of the SAP SQL Anywhere database - :return: success status and error message if error occurs - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - connection = self.connect() - cur = connection.cursor() - cur.execute('SELECT 1 FROM SYS.DUMMY;') - response.success = True - except sqlanydb.Error as e: - logger.error(f'Error connecting to SAP SQL Anywhere {self.host}, {e}!') - response.error_message = e - - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> Response: - """ - Receive SQL query and runs it - :param query: The SQL query to run in SAP SQL Anywhere - :return: returns the records from the current recordset - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - cur = connection.cursor() - try: - cur.execute(query) - if not cur.description: - response = Response(RESPONSE_TYPE.OK) - else: - result = cur.fetchall() - response = Response( - RESPONSE_TYPE.TABLE, - DataFrame( - result, - columns=[x[0] for x in cur.description] - ) - ) - connection.commit() - except Exception as e: - logger.error(f'Error running query: {query} on {self.connection}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_code=0, - error_message=str(e) - ) - connection.rollback() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Retrieve the data from the SQL statement with eliminated rows that dont satisfy the WHERE condition - """ - - query_str = self.renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - List all tables in SAP SQL Anywhere in the current schema - """ - - return self.native_query(""" - SELECT USER_NAME(ob.UID) AS SCHEMA_NAME - , st.TABLE_NAME - , st.TABLE_TYPE - FROM SYSOBJECTS ob - INNER JOIN SYS.SYSTABLE st on ob.ID = st.OBJECT_ID - WHERE ob.TYPE='U' AND st.TABLE_TYPE <> 'GBL TEMP' - """) - - def get_columns(self, table_name: str) -> Response: - """ - List all columns in a table in SAP SQL Anywhere in the current schema - :param table_name: the table name for which to list the columns - :return: returns the columns in the table - """ - - return self.renderer.dialect.get_columns(table_name) diff --git a/mindsdb/integrations/handlers/sqlany_handler/tests/test_sqlany_handler.py b/mindsdb/integrations/handlers/sqlany_handler/tests/test_sqlany_handler.py deleted file mode 100644 index 89856bf3150..00000000000 --- a/mindsdb/integrations/handlers/sqlany_handler/tests/test_sqlany_handler.py +++ /dev/null @@ -1,63 +0,0 @@ -import os -import unittest - -from mindsdb.integrations.handlers.sqlany_handler.sqlany_handler import SQLAnyHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -""" -create table TEST -( - ID INTEGER not null, - NAME NVARCHAR(1), - DESCRIPTION NVARCHAR(1) -); - -create unique index TEST_ID_INDEX - on TEST (ID); - -alter table TEST - add constraint TEST_PK - primary key (ID); - -insert into TEST -values (1, 'h', 'w'); -""" - - -class SQLAnyHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": os.environ.get('SQLANY_HOST', 'localhost'), - "port": os.environ.get('SQLANY_PORT', 55505), - "user": "DBA", - "password": os.environ.get('SQLANY_PASSWORD', 'password'), - "server": "TestMe", - "database": "MINDSDB" - } - cls.handler = SQLAnyHandler('test_sqlany_handler', cls.kwargs) - - def test_0_connect(self): - assert self.handler.connect() - - def test_1_check_connection(self): - assert self.handler.check_connection().success is True - - def test_2_get_columns(self): - assert self.handler.get_columns('TEST').resp_type is not RESPONSE_TYPE.ERROR - - def test_3_get_tables(self): - assert self.handler.get_tables().resp_type is not RESPONSE_TYPE.ERROR - - def test_4_select_query(self): - query = 'SELECT * FROM TEST WHERE ID=2' - assert self.handler.query(query).resp_type is RESPONSE_TYPE.TABLE - - def test_5_update_query(self): - query = 'UPDATE TEST SET NAME=\'s\' WHERE ID=1' - assert self.handler.query(query).resp_type is RESPONSE_TYPE.OK - - -if __name__ == "__main__": - unittest.main(failfast=True) diff --git a/mindsdb/integrations/handlers/sqlite_handler/README.md b/mindsdb/integrations/handlers/sqlite_handler/README.md deleted file mode 100644 index db5836911a4..00000000000 --- a/mindsdb/integrations/handlers/sqlite_handler/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# SQLite Handler - -This is the implementation of the SQLite handler for MindsDB. - -## SQLite -SQLite is an in-process library that implements a self-contained, serverless, zero-configuration, transactional SQL database engine. The code for SQLite is in the public domain and is thus free for use for any purpose, commercial or private. SQLite is the most widely deployed database in the world with more applications than we can count, including several high-profile projects. -https://www.sqlite.org/about.html - -## Implementation -This handler was implemented using the standard `sqlite3` library that comes with Python. - -The only required argument to establish a connection is `db_file`. This points to the database file that the connection is to be made to. - -Optionally, this may also be set to `:memory:`, which will create an in-memory database. - -## Usage -If you have local file that need to connect into MindsDB, you have to [deploy MindsDB locally](https://docs.mindsdb.com/setup/self-hosted/pip/source), ways like via Docker or via pip. Then copy the file into the desired folder in source folder. This way MindsDB can successfully access your file. - -In order to make use of this handler and connect to a SQLite database in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE sqlite_datasource -WITH -engine='sqlite', -parameters={ - "db_file":"example.db" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM sqlite_datasource.example_tbl -~~~~ \ No newline at end of file diff --git a/mindsdb/integrations/handlers/sqlite_handler/__about__.py b/mindsdb/integrations/handlers/sqlite_handler/__about__.py deleted file mode 100644 index 5513897e6f1..00000000000 --- a/mindsdb/integrations/handlers/sqlite_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB SQLite handler' -__package_name__ = 'mindsdb_sqlite_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for SQLite" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/sqlite_handler/__init__.py b/mindsdb/integrations/handlers/sqlite_handler/__init__.py deleted file mode 100644 index 9dd2b517ed3..00000000000 --- a/mindsdb/integrations/handlers/sqlite_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .sqlite_handler import SQLiteHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'SQLite' -name = 'sqlite' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/sqlite_handler/connection_args.py b/mindsdb/integrations/handlers/sqlite_handler/connection_args.py deleted file mode 100644 index 006cd49af9a..00000000000 --- a/mindsdb/integrations/handlers/sqlite_handler/connection_args.py +++ /dev/null @@ -1,16 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - db_file={ - 'type': ARG_TYPE.STR, - 'description': 'The database file where the data will be stored. The special path name :memory: can be provided' - ' to create a temporary database in RAM.' - } -) - -connection_args_example = OrderedDict( - db_file='chinook.db' -) diff --git a/mindsdb/integrations/handlers/sqlite_handler/icon.svg b/mindsdb/integrations/handlers/sqlite_handler/icon.svg deleted file mode 100644 index 9de0e827cf8..00000000000 --- a/mindsdb/integrations/handlers/sqlite_handler/icon.svg +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/sqlite_handler/sqlite_handler.py b/mindsdb/integrations/handlers/sqlite_handler/sqlite_handler.py deleted file mode 100644 index d72be9edd7f..00000000000 --- a/mindsdb/integrations/handlers/sqlite_handler/sqlite_handler.py +++ /dev/null @@ -1,183 +0,0 @@ -import os -from typing import Optional - -import pandas as pd -import sqlite3 - -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.base import DatabaseHandler - -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) - - -logger = log.getLogger(__name__) - - -class SQLiteHandler(DatabaseHandler): - """ - This handler handles connection and execution of the SQLite statements. - """ - - name = "sqlite" - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.parser = parse_sql - self.dialect = "sqlite" - self.connection_data = connection_data - self.kwargs = kwargs - - # SQLite objects created in a thread can only be used in that same thread. - self.thread_safe = False - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> StatusResponse: - """ - Set up the connection required by the handler. - Returns: - HandlerStatusResponse - """ - - if self.is_connected is True: - return self.connection - - self.connection = sqlite3.connect(self.connection_data["db_file"]) - self.is_connected = True - - return self.connection - - def disconnect(self): - """ - Close any existing connections. - """ - - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return self.is_connected - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - if not os.path.isfile(self.connection_data["db_file"]): - raise FileNotFoundError( - f"File '{self.connection_data['db_file']}' not found. Use ':memory:' to create an in-memory database if you don't have a file." - ) - self.connect() - response.success = True - except Exception as e: - logger.error(f"Error connecting to SQLite {self.connection_data['db_file']}, {e}!") - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """ - Receive raw query and act upon it somehow. - Args: - query (str): query in native format - Returns: - HandlerResponse - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - cursor = connection.cursor() - - try: - cursor.execute(query) - result = cursor.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(result, columns=[x[0] for x in cursor.description]) - ) - else: - connection.commit() - response = Response(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f"Error running query: {query} on {self.connection_data['db_file']}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - cursor.close() - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> StatusResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INTSERT, DELETE, etc - Returns: - HandlerResponse - """ - renderer = SqlalchemyRender("sqlite") - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> StatusResponse: - """ - Return list of entities that will be accessible as tables. - Returns: - HandlerResponse - """ - - query = "SELECT name from sqlite_master where type= 'table';" - result = self.native_query(query) - df = result.data_frame - result.data_frame = df.rename(columns={df.columns[0]: "table_name"}) - return result - - def get_columns(self, table_name: str) -> StatusResponse: - """ - Returns a list of entity columns. - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - HandlerResponse - """ - - query = f"PRAGMA table_info([{table_name}]);" - result = self.native_query(query) - df = result.data_frame - result.data_frame = df.rename(columns={"name": "column_name", "type": "data_type"}) - return result diff --git a/mindsdb/integrations/handlers/sqlite_handler/tests/__init__.py b/mindsdb/integrations/handlers/sqlite_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/sqlite_handler/tests/test_sqlite_handler.py b/mindsdb/integrations/handlers/sqlite_handler/tests/test_sqlite_handler.py deleted file mode 100644 index df4e2364118..00000000000 --- a/mindsdb/integrations/handlers/sqlite_handler/tests/test_sqlite_handler.py +++ /dev/null @@ -1,32 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.sqlite_handler.sqlite_handler import SQLiteHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class SQLiteHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "db_file": "chinook.db", - } - cls.handler = SQLiteHandler('test_sqlite_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM customers" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_4_get_columns(self): - columns = self.handler.get_columns('customers') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/README.md b/mindsdb/integrations/handlers/sqreamdb_handler/README.md deleted file mode 100644 index be28b93fa95..00000000000 --- a/mindsdb/integrations/handlers/sqreamdb_handler/README.md +++ /dev/null @@ -1,40 +0,0 @@ -# SQreamDB Handler - -This is the implementation of the SQreamDB handler for MindsDB. - -## SQreamDB -A SQL database that empowers organizations to perform complex analytics on a petabyte-scale of data and gain time-sensitive business insights faster and cheaper than from any other solution. - -## Implementation -This handler was implemented using the `pysqream`, a Python library that allows you to use Python code to run SQL commands on SQreamDB Database. - -The required arguments to establish a connection are, -* `user`: username asscociated with database -* `password`: password to authenticate your access -* `host`: host to server IP Address or hostname -* `port`: port through which sevice is exposed -* `database`: Database name to be connected -* `service`: Optional: service queue (default: "sqream") -* `use_ssl`: use SSL connection (default: False) -* `clustered`: Optional: Connect through load balancer, or direct to worker (Default: false - direct to worker) - - -## Usage -In order to make use of this handler and connect to SQreamDB in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE sqreamdb_datasource -WITH -engine='sqreamdb', -parameters={ - "user":"master", - "password":"sqream", - "host":"127.0.0.1", - "port":5000, - "database":"sqream" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM sqreamdb_datasource.sampledb; -~~~~ diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/__about__.py b/mindsdb/integrations/handlers/sqreamdb_handler/__about__.py deleted file mode 100644 index d9fa3bd21e9..00000000000 --- a/mindsdb/integrations/handlers/sqreamdb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB SQreamDB handler' -__package_name__ = 'mindsdb_sqreamdb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for SQreamDB" -__author__ = 'Parthiv Makwana' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/__init__.py b/mindsdb/integrations/handlers/sqreamdb_handler/__init__.py deleted file mode 100644 index ae4cb4252c5..00000000000 --- a/mindsdb/integrations/handlers/sqreamdb_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .sqreamdb_handler import SQreamDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'SQreamDB' -name = 'sqreamdb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/connection_args.py b/mindsdb/integrations/handlers/sqreamdb_handler/connection_args.py deleted file mode 100644 index 9de67479193..00000000000 --- a/mindsdb/integrations/handlers/sqreamdb_handler/connection_args.py +++ /dev/null @@ -1,48 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the SQreamDB server/database.' - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the SQreamDB server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the SQreamDB server.', - 'secret': True - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'Specify port to connect SQreamDB server' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'Specify database name to connect SQreamDB server' - }, - service={ - 'type': ARG_TYPE.STR, - 'description': 'Optional: service queue (default: "sqream")' - }, - use_ssl={ - 'type': ARG_TYPE.BOOL, - 'description': 'use SSL connection (default: False)' - }, - clustered={ - 'type': ARG_TYPE.BOOL, - 'description': 'Optional: Connect through load balancer, or direct to worker (Default: false - direct to worker)' - }, -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=5000, - password='sqream', - user='master', - database='sqream' -) diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/icon.svg b/mindsdb/integrations/handlers/sqreamdb_handler/icon.svg deleted file mode 100644 index cef3ffe5143..00000000000 --- a/mindsdb/integrations/handlers/sqreamdb_handler/icon.svg +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/requirements.txt b/mindsdb/integrations/handlers/sqreamdb_handler/requirements.txt deleted file mode 100644 index 3fa27e840dd..00000000000 --- a/mindsdb/integrations/handlers/sqreamdb_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -pysqream>=3.2.5 -pysqream_sqlalchemy>=0.8 \ No newline at end of file diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/sqreamdb_handler.py b/mindsdb/integrations/handlers/sqreamdb_handler/sqreamdb_handler.py deleted file mode 100644 index 23dbdc4a670..00000000000 --- a/mindsdb/integrations/handlers/sqreamdb_handler/sqreamdb_handler.py +++ /dev/null @@ -1,149 +0,0 @@ -from typing import Optional - -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.utilities import log -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -import pandas as pd -import pysqream as db - -from pysqream_sqlalchemy.dialect import SqreamDialect - -logger = log.getLogger(__name__) - - -class SQreamDBHandler(DatabaseHandler): - - name = 'sqreamdb' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ Initialize the handler - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - self.connection_data = connection_data - - self.connection = None - self.is_connected = False - - def connect(self): - """ - Handles the connection to a YugabyteSQL database insance. - """ - if self.is_connected is True: - return self.connection - - args = { - "database": self.connection_data.get('database'), - "host": self.connection_data.get('host'), - "port": self.connection_data.get('port'), - "username": self.connection_data.get('user'), - "password": self.connection_data.get('password'), - "clustered": self.connection_data.get('clustered', False), - "use_ssl": self.connection_data.get('use_ssl', False), - "service": self.connection_data.get('service', 'sqream') - } - - connection = db.connect(**args) - - self.is_connected = True - self.connection = connection - return self.connection - - def check_connection(self) -> StatusResponse: - """ - Check the connection of the SQreamDB database - :return: success status and error message if error occurs - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - connection = self.connect() - with connection.cursor() as cur: - cur.execute('select 1;') - response.success = True - except db.Error as e: - logger.error(f'Error connecting to SQreamDB {self.database}, {e}!') - response.error_message = e - - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive raw query and act upon it somehow. - Args: - query (Any): query in native format (str for sql databases, - dict for mongo, etc) - Returns: - HandlerResponse - """ - need_to_close = self.is_connected is False - conn = self.connect() - with conn.cursor() as cur: - try: - cur.execute(query) - - if cur.rowcount > 0 and query.upper().startswith('SELECT'): - result = cur.fetchall() - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, - columns=[x[0] for x in cur.description] - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - self.connection.commit() - except Exception as e: - logger.error(f'Error running query: {query} on {self.database}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - self.connection.rollback() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Retrieve the data from the SQL statement - """ - renderer = SqlalchemyRender(SqreamDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - List all tables in SQreamDB stored in 'sqream_catalog' - """ - - query = "SELECT table_name FROM sqream_catalog.tables" - - return self.query(query) - - def get_columns(self, table_name): - query = f"""SELECT column_name, type_name - FROM sqream_catalog.columns - WHERE table_name = '{table_name}'; - """ - return self.query(query) diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/tests/__init__.py b/mindsdb/integrations/handlers/sqreamdb_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/sqreamdb_handler/tests/test_sqreamdb_handler.py b/mindsdb/integrations/handlers/sqreamdb_handler/tests/test_sqreamdb_handler.py deleted file mode 100644 index afc5a608028..00000000000 --- a/mindsdb/integrations/handlers/sqreamdb_handler/tests/test_sqreamdb_handler.py +++ /dev/null @@ -1,45 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.sqreamdb_handler.sqreamdb_handler import SQreamDBHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class SQreamDBHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "127.0.0.1", - "port": "5000", - "user": "sqream", - "password": "sqream", - "database": "master" - } - } - cls.handler = SQreamDBHandler('test_sqreamdb_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_drop_table(self): - res = self.handler.query("DROP TABLE IF EXISTS LOVE") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_2_create_table(self): - res = self.handler.query("CREATE TABLE IF NOT EXISTS LOVE (LOVER varchar(20))") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_3_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_4_select_query(self): - query = "SELECT * FROM AUTHORS" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_check_connection(self): - self.handler.check_connection() - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/starrocks_handler/README.md b/mindsdb/integrations/handlers/starrocks_handler/README.md deleted file mode 100644 index aca5a1d7d4a..00000000000 --- a/mindsdb/integrations/handlers/starrocks_handler/README.md +++ /dev/null @@ -1,41 +0,0 @@ -# StarRocks Handler - -This is the implementation of the StarRocks Handler for MindsDB. - -## StarRocks -StarRocks is the next-generation data platform designed to make data-intensive real-time analytics fast and easy. It delivers query speeds 5 to 10 times faster than other popular solutions. StarRocks can perform real-time analytics well while updating historical records. It can also enhance real-time analytics with historical data from data lakes easily. With StarRocks, you can get rid of the de-normalized tables and get the best performance and flexibility. - - -## Implementation - -This handler was implemented by extending mysql connector. - -The required arguments to establish a connection are: - -* `host`: the host name of the StarRocks connection -* `port`: the port to use when connecting -* `user`: the user to authenticate -* `password`: the password to authenticate the user -* `database`: database name - -## Usage - -In order to make use of this handler and connect to a StarRocks server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE starrocks_datasource -WITH ENGINE = "starrocks", -PARAMETERS = { - "user": "root", - "password": "", - "host": "localhost", - "port": 9030, - "database": "starrocks " -} -``` - -Now, you can use this established connection to query your database as follows: - -```sql -SELECT * FROM starrocks_datasource.loveU LIMIT 10; -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/starrocks_handler/__about__.py b/mindsdb/integrations/handlers/starrocks_handler/__about__.py deleted file mode 100644 index a59a12a2bd9..00000000000 --- a/mindsdb/integrations/handlers/starrocks_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB StarRocks handler' -__package_name__ = 'mindsdb_starrocks_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for StarRocks" -__author__ = 'Parthiv Makwana' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/starrocks_handler/__init__.py b/mindsdb/integrations/handlers/starrocks_handler/__init__.py deleted file mode 100644 index 4efd81b8a0a..00000000000 --- a/mindsdb/integrations/handlers/starrocks_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -try: - from .starrocks_handler import StarRocksHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = 'StarRocks' -name = 'starrocks' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/starrocks_handler/icon.svg b/mindsdb/integrations/handlers/starrocks_handler/icon.svg deleted file mode 100644 index 8d5a6f8d5d5..00000000000 --- a/mindsdb/integrations/handlers/starrocks_handler/icon.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/starrocks_handler/requirements.txt b/mindsdb/integrations/handlers/starrocks_handler/requirements.txt deleted file mode 100644 index ee467569031..00000000000 --- a/mindsdb/integrations/handlers/starrocks_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/mysql_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/starrocks_handler/starrocks_handler.py b/mindsdb/integrations/handlers/starrocks_handler/starrocks_handler.py deleted file mode 100644 index 7c1a3a221c6..00000000000 --- a/mindsdb/integrations/handlers/starrocks_handler/starrocks_handler.py +++ /dev/null @@ -1,47 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE -from collections import OrderedDict - - -from mindsdb.integrations.handlers.mysql_handler import Handler as MysqlHandler - - -class StarRocksHandler(MysqlHandler): - """ - This handler handles connection and execution of the StarRocks statements. - """ - name = 'starrocks' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the StarRocks server.' - }, - password={ - 'type': ARG_TYPE.STR, - 'description': 'The password to authenticate the user with the StarRocks server.' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the StarRocks server.' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the StarRocks server.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the StarRocks server. Must be an integer.' - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=5432, - user='root', - password='', - database='database' -) diff --git a/mindsdb/integrations/handlers/starrocks_handler/tests/__init__.py b/mindsdb/integrations/handlers/starrocks_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/starrocks_handler/tests/test_starrocks_handler.py b/mindsdb/integrations/handlers/starrocks_handler/tests/test_starrocks_handler.py deleted file mode 100644 index 7620d137806..00000000000 --- a/mindsdb/integrations/handlers/starrocks_handler/tests/test_starrocks_handler.py +++ /dev/null @@ -1,54 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.starrocks_handler.starrocks_handler import StarRocksHandler -from mindsdb.integrations.libs.response import RESPONSE_TYPE - - -class StarRocksHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "localhost", - "port": 9030, - "user": "root", - "password": "", - "database": "starrocks", - } - } - cls.handler = StarRocksHandler('test_starrocks_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_connect(self): - assert self.handler.connect() - - def test_2_create_table(self): - query = "CREATE Table IF NOT EXISTS Lover(name varchar(101));" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_3_insert(self): - query = "INSERT INTO LOVER VALUES('Shiv Shakti');" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_4_native_query_select(self): - query = "SELECT * FROM LOVER;" - result = self.handler.query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is RESPONSE_TYPE.TABLE - - def test_6_get_columns(self): - columns = self.handler.get_columns('LOVER') - - query = "DROP Table IF EXISTS Lover;" - self.handler.query(query) - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/strapi_handler/README.md b/mindsdb/integrations/handlers/strapi_handler/README.md deleted file mode 100644 index 5595ce71c99..00000000000 --- a/mindsdb/integrations/handlers/strapi_handler/README.md +++ /dev/null @@ -1,146 +0,0 @@ -# Strapi Handler - -Strapi handler is a MindsDB handler for Strapi. It allows you to query Strapi collections using SQL. - -## What is Strapi ? - -Strapi is the leading open-source Headless CMS. Strapi gives developers the freedom to use their favorite tools and frameworks while allowing editors to easily manage their content and distribute it anywhere. - -## Strapi Handler Initialization - -The Strapi handler is initialized with the following parameters: - -- `host` - the host of the Strapi server -- `port` - the port of the Strapi server -- `api_token` - the api token of the Strapi server -- `plural_api_ids` - the list of plural api ids of the collections - -## Implemented Features - -- `SELECT` - select data from a collection -- `WHERE` - filter data from a collection -- `LIMIT` - limit data from a collection -- `INSERT` - insert data into a collection -- `UPDATE` - update data from a collection - -Note: We can use collection name as table name in SQL. - -## Example Usage - -The first step is to create a database with the new `Strapi` engine. - -```sql -CREATE DATABASE myshop --- display name for database. -WITH ENGINE = 'strapi', --- name of the mindsdb handler -PARAMETERS = { - "host" : "", --- host, it can be an ip or an url. - "port" : "", --- common port is 1337. - "api_token": "", --- api token of the strapi server. - "plural_api_ids" : [""] --- plural api ids of the collections. -}; -``` - -Example: - -```sql -CREATE DATABASE myshop -WITH ENGINE = 'strapi', -PARAMETERS = { - "host" : "localhost", - "port" : "1337", - "api_token": "c56c000d867e95848c", - "plural_api_ids" : ["products", "sellers"] -}; -``` - ---- - -### SELECT - -```sql -SELECT * -FROM myshop.; -``` - -Example: - -```sql -SELECT * -FROM myshop.products; -``` - ---- - -### WHERE - -```sql -SELECT * -FROM myshop. -WHERE = ; -``` - -Example: - -```sql -SELECT description, price -FROM myshop.products -WHERE id = 1; -``` - ---- - -### LIMIT - -```sql -SELECT * -FROM myshop. -LIMIT ; -``` - -Example: - -```sql -SELECT * -FROM myshop.products -LIMIT 10; -``` - ---- - -### INSERT - -```sql -INSERT INTO myshop. (, , ...) -VALUES (, , ...); -``` - -Example: - -```sql -INSERT INTO myshop.sellers (name, email, sellerid) -VALUES ('Ram', 'ram@gmail.com', 'ram'); -``` - -Note: You only able to insert data into the collection which has `create` permission. - ---- - -### UPDATE - -```sql -UPDATE myshop. -SET = , = , ... -WHERE = ; -``` - -Example - -```sql - -UPDATE myshop.products -SET price = 299, -avaiablity = false -WHERE id = 1; -``` - -Note: You only able to update data into the collection which has `update` permission. diff --git a/mindsdb/integrations/handlers/strapi_handler/__about__.py b/mindsdb/integrations/handlers/strapi_handler/__about__.py deleted file mode 100644 index 199f17ec162..00000000000 --- a/mindsdb/integrations/handlers/strapi_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Strapi handler" -__package_name__ = "mindsdb_strapi_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Strapi" -__author__ = "Ritwick Raj Makhal" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/strapi_handler/__init__.py b/mindsdb/integrations/handlers/strapi_handler/__init__.py deleted file mode 100644 index 6ce97fc3e59..00000000000 --- a/mindsdb/integrations/handlers/strapi_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version - -try: - from .strapi_handler import StrapiHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Strapi" -name = "strapi" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/strapi_handler/icon.svg b/mindsdb/integrations/handlers/strapi_handler/icon.svg deleted file mode 100644 index 1b42789ef0b..00000000000 --- a/mindsdb/integrations/handlers/strapi_handler/icon.svg +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/strapi_handler/strapi_handler.py b/mindsdb/integrations/handlers/strapi_handler/strapi_handler.py deleted file mode 100644 index 2338edc0f0d..00000000000 --- a/mindsdb/integrations/handlers/strapi_handler/strapi_handler.py +++ /dev/null @@ -1,153 +0,0 @@ -from mindsdb.integrations.handlers.strapi_handler.strapi_tables import StrapiTable -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities import log -import requests -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE -from collections import OrderedDict -import pandas as pd - -logger = log.getLogger(__name__) - - -class StrapiHandler(APIHandler): - def __init__(self, name: str, **kwargs) -> None: - """initializer method - - Args: - name (str): handler name - """ - super().__init__(name) - - self.connection = None - self.is_connected = False - args = kwargs.get('connection_data', {}) - if 'host' in args and 'port' in args: - self._base_url = f"http://{args['host']}:{args['port']}" - if 'api_token' in args: - self._api_token = args['api_token'] - if 'plural_api_ids' in args: - self._plural_api_ids = args['plural_api_ids'] - # Registers tables for each collections in strapi - for pluralApiId in self._plural_api_ids: - self._register_table(table_name=pluralApiId, table_class=StrapiTable(handler=self, name=pluralApiId)) - - def check_connection(self) -> StatusResponse: - """checking the connection - - Returns: - StatusResponse: whether the connection is still up - """ - response = StatusResponse(False) - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f'Error connecting to Strapi API: {e}!') - response.error_message = e - - self.is_connected = response.success - return response - - def connect(self) -> StatusResponse: - """making the connectino object - """ - if self.is_connected and self.connection: - return self.connection - - try: - headers = {"Authorization": f"Bearer {self._api_token}"} - response = requests.get(f"{self._base_url}", headers=headers) - if response.status_code == 200: - self.connection = response - self.is_connected = True - return StatusResponse(True) - else: - raise Exception(f"Error connecting to Strapi API: {response.status_code} - {response.text}") - except Exception as e: - logger.error(f'Error connecting to Strapi API: {e}!') - return StatusResponse(False, error_message=e) - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - - Parameters - ---------- - query : str - query in a native format - - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) - - def call_strapi_api(self, method: str, endpoint: str, params: dict = {}, json_data: dict = {}) -> pd.DataFrame: - headers = {"Authorization": f"Bearer {self._api_token}"} - url = f"{self._base_url}{endpoint}" - - if method.upper() in ('GET', 'POST', 'PUT', 'DELETE'): - headers['Content-Type'] = 'application/json' - - if method.upper() in ('POST', 'PUT', 'DELETE'): - response = requests.request(method, url, headers=headers, params=params, data=json_data) - else: - response = requests.get(url, headers=headers, params=params) - - if response.status_code == 200: - data = response.json() - # Create an empty DataFrame - df = pd.DataFrame() - if isinstance(data.get('data', None), list): - for item in data['data']: - # Add 'id' and 'attributes' to the DataFrame - row_data = {'id': item['id'], **item['attributes']} - df = df._append(row_data, ignore_index=True) - return df - elif isinstance(data.get('data', None), dict): - # Add 'id' and 'attributes' to the DataFrame - row_data = {'id': data['data']['id'], **data['data']['attributes']} - df = df._append(row_data, ignore_index=True) - return df - else: - raise Exception(f"Error connecting to Strapi API: {response.status_code} - {response.text}") - - return pd.DataFrame() - - -connection_args = OrderedDict( - api_token={ - "type": ARG_TYPE.PWD, - "description": "Strapi API key to use for authentication.", - "required": True, - "label": "Api token", - }, - host={ - "type": ARG_TYPE.URL, - "description": "Strapi API host to connect to.", - "required": True, - "label": "Host", - }, - port={ - "type": ARG_TYPE.INT, - "description": "Strapi API port to connect to.", - "required": True, - "label": "Port", - }, - plural_api_ids={ - "type": list, - "description": "Plural API id to use for querying.", - "required": True, - "label": "Plural API id", - }, -) - -connection_args_example = OrderedDict( - host="localhost", - port=1337, - api_token="c56c000d867e95848c", - plural_api_ids=["posts", "portfolios"], -) diff --git a/mindsdb/integrations/handlers/strapi_handler/strapi_tables.py b/mindsdb/integrations/handlers/strapi_handler/strapi_tables.py deleted file mode 100644 index 48b125ad976..00000000000 --- a/mindsdb/integrations/handlers/strapi_handler/strapi_tables.py +++ /dev/null @@ -1,131 +0,0 @@ -from typing import List -import pandas as pd -from mindsdb.integrations.libs.api_handler import APIHandler, APITable -from mindsdb_sql_parser import ast -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb_sql_parser.ast.select.constant import Constant -import json - - -class StrapiTable(APITable): - - def __init__(self, handler: APIHandler, name: str): - super().__init__(handler) - self.name = name - # get all the fields of a collection as columns - self.columns = self.handler.call_strapi_api(method='GET', endpoint=f'/api/{name}').columns - - def select(self, query: ast.Select) -> pd.DataFrame: - """Triggered at the SELECT query - - Args: - query (ast.Select): User's entered query - - Returns: - pd.DataFrame: The queried information - """ - # Initialize _id and selected_columns - _id = None - selected_columns = [] - - # Get id from where clause, if available - conditions = extract_comparison_conditions(query.where) - for op, arg1, arg2 in conditions: - if arg1 == 'id' and op == '=': - _id = arg2 - else: - raise ValueError("Unsupported condition in WHERE clause") - - # Get selected columns from query - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - # Initialize the result DataFrame - result_df = None - - if _id is not None: - # Fetch data using the provided endpoint for the specific id - df = self.handler.call_strapi_api(method='GET', endpoint=f'/api/{self.name}/{_id}') - - if len(df) > 0: - result_df = df[selected_columns] - else: - # Fetch data without specifying an id - page_size = 100 # The page size you want to use for API requests - limit = query.limit.value if query.limit else None - result_df = pd.DataFrame(columns=selected_columns) - - if limit: - # Calculate the number of pages required - page_count = (limit + page_size - 1) // page_size - else: - page_count = 1 - - for page in range(1, page_count + 1): - if limit: - # Calculate the page size for this request - current_page_size = min(page_size, limit) - else: - current_page_size = page_size - - df = self.handler.call_strapi_api(method='GET', endpoint=f'/api/{self.name}', params={'pagination[page]': page, 'pagination[pageSize]': current_page_size}) - - if len(df) == 0: - break - - result_df = pd.concat([result_df, df[selected_columns]], ignore_index=True) - - if limit: - limit -= current_page_size - - return result_df - - def insert(self, query: ast.Insert) -> None: - """triggered at the INSERT query - Args: - query (ast.Insert): user's entered query - """ - data = {'data': {}} - for column, value in zip(query.columns, query.values[0]): - if isinstance(value, Constant): - data['data'][column.name] = value.value - else: - data['data'][column.name] = value - self.handler.call_strapi_api(method='POST', endpoint=f'/api/{self.name}', json_data=json.dumps(data)) - - def update(self, query: ast.Update) -> None: - """triggered at the UPDATE query - - Args: - query (ast.Update): user's entered query - """ - conditions = extract_comparison_conditions(query.where) - # Get id from query - for op, arg1, arg2 in conditions: - if arg1 == 'id' and op == '=': - _id = arg2 - else: - raise NotImplementedError - data = {'data': {}} - for key, value in query.update_columns.items(): - if isinstance(value, Constant): - data['data'][key] = value.value - self.handler.call_strapi_api(method='PUT', endpoint=f'/api/{self.name}/{_id}', json_data=json.dumps(data)) - - def get_columns(self, ignore: List[str] = []) -> List[str]: - """columns - - Args: - ignore (List[str], optional): exclusion items. Defaults to []. - - Returns: - List[str]: available columns with `ignore` items removed from the list. - """ - - return [item for item in self.columns if item not in ignore] diff --git a/mindsdb/integrations/handlers/strapi_handler/tests/__init__.py b/mindsdb/integrations/handlers/strapi_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/strapi_handler/tests/test_strapi_handler.py b/mindsdb/integrations/handlers/strapi_handler/tests/test_strapi_handler.py deleted file mode 100644 index a7dd95481dc..00000000000 --- a/mindsdb/integrations/handlers/strapi_handler/tests/test_strapi_handler.py +++ /dev/null @@ -1,51 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.strapi_handler.strapi_handler import StrapiHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class StrapiHandlerTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - connection_data = { - 'host': 'localhost', - 'port': '1337', - 'api_token': 'c56c000d867e95848c', - 'plural_api_ids': ['products', 'sellers']} - cls.handler = StrapiHandler(name='myshop', connection_data=connection_data) - - def test_0_check_connection(self): - # Ensure the connection is successful - self.assertTrue(self.handler.check_connection()) - - def test_1_get_table(self): - assert self.handler.get_tables() is not RESPONSE_TYPE.ERROR - - def test_2_get_columns(self): - assert self.handler.get_columns('products') is not RESPONSE_TYPE.ERROR - - def test_3_get_data(self): - # Ensure that you can retrieve data from a table - data = self.handler.native_query('SELECT * FROM products') - assert data.type is not RESPONSE_TYPE.ERROR - - def test_4_get_data_with_condition(self): - # Ensure that you can retrieve data with a condition - data = self.handler.native_query('SELECT * FROM products WHERE id = 1') - assert data.type is not RESPONSE_TYPE.ERROR - - def test_5_insert_data(self): - # Ensure that data insertion is successful - query = "INSERT INTO myshop.sellers (name, email, sellerid) VALUES ('Ram', 'ram@gmail.com', 'ramu4')" - result = self.handler.native_query(query) - self.assertTrue(result) - - def test_6_update_data(self): - # Ensure that data updating is successful - query = "UPDATE products SET name = 'test2' WHERE id = 1" - result = self.handler.native_query(query) - self.assertTrue(result) - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/strava_handler/Readme.md b/mindsdb/integrations/handlers/strava_handler/Readme.md deleted file mode 100644 index e91d23343ed..00000000000 --- a/mindsdb/integrations/handlers/strava_handler/Readme.md +++ /dev/null @@ -1,73 +0,0 @@ -# Strava Handler - -Strava handler for MindsDB provides interfaces to connect with strava via APIs and pull the workout data of your fitness club into MindsDB. - -## Strava -Strava is app used for tracking physical exercise and share the data with your social network - -## Strava Handler Initialization - -The Strava handler is initialized with the following parameters: - -- `strava_api_token`: Strava API key to use for authentication - -Please follow [this link](https://developers.strava.com/docs/getting-started/) to generate the token for accessing strava API - -## Implemented Features - -- [x] Strava all_clubs table - - [x] Support LIMIT - - [x] Support ORDER BY - - [x] Support column selection - -- [x] Strava club_activities table - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - - -## Example Usage - -The first step is to create a database with the new `Strava` engine. - -~~~~sql -CREATE DATABASE mindsdb_strava -WITH ENGINE = 'strava', -PARAMETERS = { - "strava_client_id": "your-strava-client-id", - "strava_access_token": "your-strava-api-key-token" -}; -~~~~ - -Use the established connection to query the Strava all_clubs table - -~~~~sql -SELECT * FROM mindsdb_strava.all_clubs; -~~~~ - -Use the established connection to query the Strava club_activities table - -~~~~sql -SELECT * FROM mindsdb_strava.club_activities -WHERE strava_club_id = 195748; -~~~~ - - -Advanced queries for the strava handler - -~~~~sql -SELECT id,localized_sport_type,country,member_count FROM -mindsdb_strava.all_clubs -ORDER by id ASC -LIMIT 10; -~~~~~~~ - -~~~~sql -SELECT name, distance, sport_type -FROM -mindsdb_strava.club_activities -WHERE strava_club_id = 195748 -ORDER BY distance ASC -LIMIT 10; -~~~~ diff --git a/mindsdb/integrations/handlers/strava_handler/__about__.py b/mindsdb/integrations/handlers/strava_handler/__about__.py deleted file mode 100644 index f1142b5d0ba..00000000000 --- a/mindsdb/integrations/handlers/strava_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Strava handler" -__package_name__ = "mindsdb_strava_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Strava" -__author__ = "Balaji Seetharaman" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/strava_handler/__init__.py b/mindsdb/integrations/handlers/strava_handler/__init__.py deleted file mode 100644 index 5cb5523853b..00000000000 --- a/mindsdb/integrations/handlers/strava_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - - -from .__about__ import __version__ as version, __description__ as description - -try: - from .strava_handler import StravaHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Strava" -name = "strava" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/strava_handler/icon.svg b/mindsdb/integrations/handlers/strava_handler/icon.svg deleted file mode 100644 index 8ad79231341..00000000000 --- a/mindsdb/integrations/handlers/strava_handler/icon.svg +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/strava_handler/requirements.txt b/mindsdb/integrations/handlers/strava_handler/requirements.txt deleted file mode 100644 index 006c115a469..00000000000 --- a/mindsdb/integrations/handlers/strava_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -stravalib -urllib3>=2.2.2 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/mindsdb/integrations/handlers/strava_handler/strava_handler.py b/mindsdb/integrations/handlers/strava_handler/strava_handler.py deleted file mode 100644 index f7cff0f8ff3..00000000000 --- a/mindsdb/integrations/handlers/strava_handler/strava_handler.py +++ /dev/null @@ -1,109 +0,0 @@ -from mindsdb.integrations.handlers.strava_handler.strava_tables import StravaAllClubsTable -from mindsdb.integrations.handlers.strava_handler.strava_tables import StravaClubActivitesTable -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - -from collections import OrderedDict -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - -from stravalib.client import Client - -logger = log.getLogger(__name__) - - -class StravaHandler(APIHandler): - """Strava handler implementation""" - - def __init__(self, name=None, **kwargs): - """Initialize the Strava handler. - Parameters - ---------- - name : str - name of a handler instance - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - - self.parser = parse_sql - self.connection_data = connection_data - self.kwargs = kwargs - self.connection = None - self.is_connected = False - - strava_all_clubs_data = StravaAllClubsTable(self) - self._register_table("all_clubs", strava_all_clubs_data) - - strava_club_activites_data = StravaClubActivitesTable(self) - self._register_table("club_activities", strava_club_activites_data) - - def connect(self) -> StatusResponse: - """Set up the connection required by the handler. - Returns - ------- - StatusResponse - connection object - """ - if self.is_connected is True: - return self.connection - - client = Client() - client.access_token = self.connection_data['strava_access_token'] - self.connection = client - - return self.connection - - def check_connection(self) -> StatusResponse: - """Check connection to the handler. - Returns - ------- - StatusResponse - Status confirmation - """ - response = StatusResponse(False) - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f"Error connecting to Strava API: {e}!") - response.error_message = e - - self.is_connected = response.success - - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - Parameters - ---------- - query : str - query in a native format - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) - - -connection_args = OrderedDict( - strava_client_id={ - 'type': ARG_TYPE.STR, - 'description': 'Client id for accessing Strava Application API' - }, - strava_access_token={ - 'type': ARG_TYPE.STR, - 'description': 'Access Token for accessing Strava Application API' - } -) - -connection_args_example = OrderedDict( - strava_client_id='', - strava_access_token='' -) diff --git a/mindsdb/integrations/handlers/strava_handler/strava_tables.py b/mindsdb/integrations/handlers/strava_handler/strava_tables.py deleted file mode 100644 index 9784d4ec06d..00000000000 --- a/mindsdb/integrations/handlers/strava_handler/strava_tables.py +++ /dev/null @@ -1,239 +0,0 @@ -import pandas as pd - -from typing import List - -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb.utilities import log - -from mindsdb_sql_parser import ast - - -logger = log.getLogger(__name__) - - -class StravaAllClubsTable(APITable): - """Strava List all Clubs Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the Strava "getLoggedInAthleteClubs" API endpoint - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - Returns - ------- - pd.DataFrame - strava "List Athlete Clubs " matching the query - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - order_by_conditions = {} - - if query.order_by and len(query.order_by) > 0: - order_by_conditions["columns"] = [] - order_by_conditions["ascending"] = [] - - for an_order in query.order_by: - if an_order.field.parts[0] != "id": - next - if an_order.field.parts[1] in self.get_columns(): - order_by_conditions["columns"].append(an_order.field.parts[1]) - - if an_order.direction == "ASC": - order_by_conditions["ascending"].append(True) - else: - order_by_conditions["ascending"].append(False) - else: - raise ValueError( - f"Order by unknown column {an_order.field.parts[1]}" - ) - strava_clubs_df = self.call_strava_allclubs_api() - - selected_columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - if len(strava_clubs_df) == 0: - strava_clubs_df = pd.DataFrame([], columns=selected_columns) - else: - strava_clubs_df.columns = self.get_columns() - for col in set(strava_clubs_df.columns).difference(set(selected_columns)): - strava_clubs_df = strava_clubs_df.drop(col, axis=1) - - if len(order_by_conditions.get("columns", [])) > 0: - strava_clubs_df = strava_clubs_df.sort_values( - by=order_by_conditions["columns"], - ascending=order_by_conditions["ascending"], - ) - - if query.limit: - strava_clubs_df = strava_clubs_df.head(query.limit.value) - - return strava_clubs_df - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - Returns - ------- - List[str] - List of columns - """ - return [ - 'id', - 'name', - 'sport_type', - 'city', - 'state', - 'country', - 'member_count', - ] - - def call_strava_allclubs_api(self): - """Pulls all the records from the given and returns it select() - - Returns - ------- - pd.DataFrame of all the records of the "List Athlete Clubs" API end point - """ - - clubs = self.handler.connect().get_athlete_clubs() - - club_cols = self.get_columns() - data = [] - - for club in clubs: - club_dict = club.dict() - data.append([club_dict.get(x) for x in club_cols]) - - all_strava_clubs_df = pd.DataFrame(data, columns=club_cols) - - return all_strava_clubs_df - - -class StravaClubActivitesTable(APITable): - """Strava List Club Activities by id Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the Strava "List Club Activities " API endpoint - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - Returns - ------- - pd.DataFrame - strava "List Club Activities" matching the query - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - conditions = extract_comparison_conditions(query.where) - - order_by_conditions = {} - clubs_kwargs = {} - - if query.order_by and len(query.order_by) > 0: - order_by_conditions["columns"] = [] - order_by_conditions["ascending"] = [] - - for an_order in query.order_by: - if an_order.field.parts[0] != "id": - next - if an_order.field.parts[1] in self.get_columns(): - order_by_conditions["columns"].append(an_order.field.parts[1]) - - if an_order.direction == "ASC": - order_by_conditions["ascending"].append(True) - else: - order_by_conditions["ascending"].append(False) - else: - raise ValueError( - f"Order by unknown column {an_order.field.parts[1]}" - ) - - for a_where in conditions: - if a_where[1] == "strava_club_id": - if a_where[0] != "=": - raise ValueError("Unsupported where operation for strava_club_id ") - clubs_kwargs["type"] = a_where[2] - else: - raise ValueError(f"Unsupported where argument {a_where[1]}") - - strava_club_activities_df = self.call_strava_clubactivities_api(a_where[2]) - - selected_columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - selected_columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - else: - raise ValueError(f"Unknown query target {type(target)}") - - if len(strava_club_activities_df) == 0: - strava_club_activities_df = pd.DataFrame([], columns=selected_columns) - else: - strava_club_activities_df.columns = self.get_columns() - for col in set(strava_club_activities_df.columns).difference(set(selected_columns)): - strava_club_activities_df = strava_club_activities_df.drop(col, axis=1) - - if len(order_by_conditions.get("columns", [])) > 0: - strava_club_activities_df = strava_club_activities_df.sort_values( - by=order_by_conditions["columns"], - ascending=order_by_conditions["ascending"], - ) - - if query.limit: - strava_club_activities_df = strava_club_activities_df.head(query.limit.value) - - return strava_club_activities_df - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - Returns - ------- - List[str] - List of columns - """ - return [ - 'name', - 'distance', - 'moving_time', - 'elapsed_time', - 'total_elevation_gain', - 'sport_type', - 'athlete.firstname', - ] - - def call_strava_clubactivities_api(self, club_id): - """Pulls all the records from the given and returns it select() - - Returns - ------- - pd.DataFrame of all the records of the "getClubActivitiesById" API end point - """ - - club_activities = self.handler.connect().get_club_activities(club_id) - - club_cols = self.get_columns() - data = [] - - for club in club_activities: - club_dict = club.dict() - data.append([club_dict.get(x) for x in club_cols]) - - all_strava_club_activities_df = pd.DataFrame(data, columns=club_cols) - - return all_strava_club_activities_df diff --git a/mindsdb/integrations/handlers/stripe_handler/README.md b/mindsdb/integrations/handlers/stripe_handler/README.md deleted file mode 100644 index a71252d5a39..00000000000 --- a/mindsdb/integrations/handlers/stripe_handler/README.md +++ /dev/null @@ -1,156 +0,0 @@ -# Stripe Handler - -Stripe handler for MindsDB provides interfaces to connect to Stripe via APIs and pull store data into MindsDB. - ---- - -## Table of Contents - -- [Stripe Handler](#stripe-handler) - - [Table of Contents](#table-of-contents) - - [About Stripe](#about-stripe) - - [Stripe Handler Implementation](#stripe-handler-implementation) - - [Stripe Handler Initialization](#stripe-handler-initialization) - - [Implemented Features](#implemented-features) - - [TODO](#todo) - - [Example Usage](#example-usage) - ---- - -## About Stripe - -Stripe is a payment services provider that lets merchants accept credit and debit cards or other payments. -
-https://www.nerdwallet.com/article/small-business/what-is-stripe - -## Stripe Handler Implementation - -This handler was implemented using [stripe-python](https://github.com/stripe/stripe-python), the Python library for the Stripe API. - -## Stripe Handler Initialization - -The Stripe handler is initialized with the following parameters: - -- `api_key`: a Stripe API key. You can find your API keys in the Stripe Dashboard. [Read more](https://stripe.com/docs/keys). - -## Implemented Features - -- [x] Stripe Products Table for a given account - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - - [x] Support INSERT - - [x] Support UPDATE - - [x] Support DELETE -- [x] Stripe Customers Table for a given account - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection -- [x] Stripe Payment Intents Table for a given account - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection -- [x] Stripe Refunds Table for a given account - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection -- [x] Stripe Payouts Table for a given account - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - -## TODO - -- [ ] Support INSERT, UPDATE and DELETE for Customers and Payment Intents tables -- [ ] Stripe Charges table -- [ ] Stripe Balance table -- [ ] Many more - -## Example Usage - -The first step is to create a database with the new `stripe` engine by passing in the required `api_key` parameter: - -~~~~sql -CREATE DATABASE stripe_datasource -WITH ENGINE = 'stripe', -PARAMETERS = { - "api_key": "sk_..." -}; -~~~~ - -Use the established connection to query your database: - -### Querying the Customer Data -~~~~sql -SELECT * FROM stripe_datasource.customers -~~~~ - -or, for the `payouts` table -~~~~sql -SELECT * FROM stripe_datasource.payouts -~~~~ - -or, for the `products` table -~~~~sql -SELECT * FROM stripe_datasource.products -~~~~ - -Run more advanced queries: - -~~~~sql -SELECT name, email -FROM stripe_datasource.customers -WHERE currency = 'inr' -ORDER BY name -LIMIT 5 -~~~~ - -### Querying the Refund data -~~~~sql -SELECT * FROM stripe_datasource.refunds -~~~~ - -Run more advanced queries: - -~~~~sql -SELECT name, email -FROM stripe_datasource.refunds -WHERE currency = 'inr' -ORDER BY name -LIMIT 5 -~~~~ - - -~~~~sql -SELECT id, name, active -FROM stripe_datasource.products -WHERE active = true -ORDER BY name -LIMIT 5 -~~~~ - -~~~~sql -INSERT INTO stripe_datasource.products(name) -VALUES('product_name') -~~~~ - -~~~~sql -UPDATE stripe_datasource.products -SET name = 'product_name_updated' -WHERE name = 'product_name' -~~~~ - -~~~~sql -DELETE FROM stripe_datasource.products -WHERE name = 'product_name_updated' -~~~~ diff --git a/mindsdb/integrations/handlers/stripe_handler/__about__.py b/mindsdb/integrations/handlers/stripe_handler/__about__.py deleted file mode 100644 index cf073898a61..00000000000 --- a/mindsdb/integrations/handlers/stripe_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Stripe handler" -__package_name__ = "mindsdb_stripe_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Stripe" -__author__ = "Minura Punchihewa" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/stripe_handler/__init__.py b/mindsdb/integrations/handlers/stripe_handler/__init__.py deleted file mode 100644 index 3f39afb0ee8..00000000000 --- a/mindsdb/integrations/handlers/stripe_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .stripe_handler import StripeHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Stripe" -name = "stripe" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/stripe_handler/icon.svg b/mindsdb/integrations/handlers/stripe_handler/icon.svg deleted file mode 100644 index 49fcdfe093f..00000000000 --- a/mindsdb/integrations/handlers/stripe_handler/icon.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/stripe_handler/requirements.txt b/mindsdb/integrations/handlers/stripe_handler/requirements.txt deleted file mode 100644 index 233b8cec1c0..00000000000 --- a/mindsdb/integrations/handlers/stripe_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -stripe \ No newline at end of file diff --git a/mindsdb/integrations/handlers/stripe_handler/stripe_handler.py b/mindsdb/integrations/handlers/stripe_handler/stripe_handler.py deleted file mode 100644 index fbbe4b97d89..00000000000 --- a/mindsdb/integrations/handlers/stripe_handler/stripe_handler.py +++ /dev/null @@ -1,103 +0,0 @@ -import stripe -from mindsdb.integrations.handlers.stripe_handler.stripe_tables import CustomersTable, ProductsTable, PaymentIntentsTable, RefundsTable, PayoutsTable -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) - -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - -logger = log.getLogger(__name__) - - -class StripeHandler(APIHandler): - """ - The Stripe handler implementation. - """ - - name = 'stripe' - - def __init__(self, name: str, **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - customers_data = CustomersTable(self) - self._register_table("customers", customers_data) - - products_data = ProductsTable(self) - self._register_table("products", products_data) - - payment_intents_data = PaymentIntentsTable(self) - self._register_table("payment_intents", payment_intents_data) - - payouts_data = PayoutsTable(self) - self._register_table("payouts", payouts_data) - - refunds_data = RefundsTable(self) - self._register_table("refunds", refunds_data) - - def connect(self): - """ - Set up the connection required by the handler. - Returns - ------- - StatusResponse - connection object - """ - if self.is_connected is True: - return self.connection - - stripe.api_key = self.connection_data['api_key'] - - self.connection = stripe - self.is_connected = True - - return self.connection - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - - try: - stripe = self.connect() - stripe.Account.retrieve() - response.success = True - except Exception as e: - logger.error('Error connecting to Stripe!') - response.error_message = str(e) - - self.is_connected = response.success - - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - Parameters - ---------- - query : str - query in a native format - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/stripe_handler/stripe_tables.py b/mindsdb/integrations/handlers/stripe_handler/stripe_tables.py deleted file mode 100644 index 094de9f5dbd..00000000000 --- a/mindsdb/integrations/handlers/stripe_handler/stripe_tables.py +++ /dev/null @@ -1,492 +0,0 @@ -import pandas as pd -import stripe -from typing import Text, List, Dict, Any - -from mindsdb_sql_parser import ast -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.handlers.query_utilities import INSERTQueryParser, DELETEQueryParser, UPDATEQueryParser, DELETEQueryExecutor, UPDATEQueryExecutor -from mindsdb.integrations.utilities.handlers.query_utilities.select_query_utilities import SELECTQueryParser, SELECTQueryExecutor -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class CustomersTable(APITable): - """The Stripe Customers Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """ - Pulls Stripe Customer data. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Stripe Customers matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'customers', - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - customers_df = pd.json_normalize(self.get_customers(limit=result_limit)) - select_statement_executor = SELECTQueryExecutor( - customers_df, - selected_columns, - where_conditions, - order_by_conditions - ) - customers_df = select_statement_executor.execute_query() - - return customers_df - - def get_columns(self) -> List[Text]: - return pd.json_normalize(self.get_customers(limit=1)).columns.tolist() - - def get_customers(self, **kwargs) -> List[Dict]: - stripe = self.handler.connect() - customers = stripe.Customer.list(**kwargs) - return [customer.to_dict() for customer in customers] - - -class ProductsTable(APITable): - """The Stripe Products Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """ - Pulls Stripe Product data. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Stripe Products matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'products', - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - products_df = pd.json_normalize(self.get_products(limit=result_limit)) - select_statement_executor = SELECTQueryExecutor( - products_df, - selected_columns, - where_conditions, - order_by_conditions - ) - products_df = select_statement_executor.execute_query() - - return products_df - - def insert(self, query: ast.Insert) -> None: - """ - Inserts data into Stripe "POST v1/products" API endpoint. - - Parameters - ---------- - query : ast.Insert - Given SQL INSERT query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - insert_statement_parser = INSERTQueryParser( - query, - supported_columns=['id', 'name', 'active', 'description', 'metadata'], - mandatory_columns=['name'], - all_mandatory=False, - ) - product_data = insert_statement_parser.parse_query() - self.create_products(product_data) - - def update(self, query: ast.Update) -> None: - """ - Updates data from Stripe "POST v1/products/:id" API endpoint. - - Parameters - ---------- - query : ast.Update - Given SQL UPDATE query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - update_statement_parser = UPDATEQueryParser(query) - values_to_update, where_conditions = update_statement_parser.parse_query() - - products_df = pd.json_normalize(self.get_products()) - update_query_executor = UPDATEQueryExecutor( - products_df, - where_conditions - ) - - products_df = update_query_executor.execute_query() - product_ids = products_df['id'].tolist() - self.update_products(product_ids, values_to_update) - - def delete(self, query: ast.Delete) -> None: - """ - Deletes data from Stripe "DELETE v1/products/:id" API endpoint. - - Parameters - ---------- - query : ast.Delete - Given SQL DELETE query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - delete_statement_parser = DELETEQueryParser(query) - where_conditions = delete_statement_parser.parse_query() - - products_df = pd.json_normalize(self.get_products()) - delete_query_executor = DELETEQueryExecutor( - products_df, - where_conditions - ) - - products_df = delete_query_executor.execute_query() - product_ids = products_df['id'].tolist() - self.delete_products(product_ids) - - def get_columns(self) -> List[Text]: - return pd.json_normalize(self.get_products(limit=1)).columns.tolist() - - def get_products(self, **kwargs) -> List[Dict]: - stripe = self.handler.connect() - products = stripe.Product.list(**kwargs) - return [product.to_dict() for product in products] - - def create_products(self, product_data: List[Dict[Text, Any]]) -> None: - stripe = self.handler.connect() - for product in product_data: - created_product = stripe.Product.create(**product) - if 'id' not in created_product.to_dict(): - raise Exception('Product creation failed') - else: - logger.info(f'Product {created_product.to_dict()["id"]} created') - - def update_products(self, product_ids: List[Text], values_to_update: Dict[Text, Any]) -> None: - stripe = self.handler.connect() - for product_id in product_ids: - updated_product = stripe.Product.modify(product_id, **values_to_update) - if 'id' not in updated_product.to_dict(): - raise Exception('Product update failed') - else: - logger.info(f'Product {updated_product.to_dict()["id"]} updated') - - def delete_products(self, product_ids: List[Text]) -> None: - stripe = self.handler.connect() - for product_id in product_ids: - deleted_product = stripe.Product.delete(product_id) - if 'id' not in deleted_product.to_dict(): - raise Exception('Product deletion failed') - else: - logger.info(f'Product {deleted_product.to_dict()["id"]} deleted') - - -class PaymentIntentsTable(APITable): - """The Stripe Payment Intents Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """ - Pulls Stripe Payment Intents data. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Stripe Payment Intents matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'payment_intents', - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - payment_intents_df = pd.json_normalize(self.get_payment_intents(limit=result_limit)) - select_statement_executor = SELECTQueryExecutor( - payment_intents_df, - selected_columns, - where_conditions, - order_by_conditions - ) - payment_intents_df = select_statement_executor.execute_query() - - return payment_intents_df - - def delete(self, query: ast.Delete) -> None: - """ - Cancels Stripe Payment Intents and updates the local data. - - Parameters - ---------- - query : ast.Delete - Given SQL DELETE query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - delete_statement_parser = DELETEQueryParser(query) - where_conditions = delete_statement_parser.parse_query() - - if 'payment_intents_df' not in self.__dict__: - self.payment_intents_df = pd.json_normalize(self.get_payment_intents()) - - delete_query_executor = DELETEQueryExecutor( - self.payment_intents_df, - where_conditions - ) - - canceled_payment_intents_df = delete_query_executor.execute_query() - - payment_intent_ids = canceled_payment_intents_df['id'].tolist() - self.cancel_payment_intents(payment_intent_ids) - - self.payment_intents_df = self.payment_intents_df[~self.payment_intents_df['id'].isin(payment_intent_ids)] - - def cancel_payment_intents(self, payment_intent_ids: List[str]) -> None: - stripe = self.handler.connect() - for payment_intent_id in payment_intent_ids: - try: - - payment_intent = stripe.PaymentIntent.retrieve(payment_intent_id) - if payment_intent.status in ['requires_payment_method', 'requires_capture', 'requires_confirmation', 'requires_action', 'processing']: - stripe.PaymentIntent.cancel(payment_intent_id) - else: - logger.warning(f"Payment intent {payment_intent_id} is in status {payment_intent.status} and cannot be canceled.") - except stripe.error.StripeError as e: - logger.error(f"Error cancelling payment intent {payment_intent_id}: {str(e)}") - - def update(self, query: 'ast.Update') -> None: - """ - Updates data in Stripe "POST /v1/payment_intents/:id" API endpoint. - - Parameters - ---------- - query : ast.Update - Given SQL UPDATE query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - update_statement_parser = UPDATEQueryParser(query) - values_to_update, where_conditions = update_statement_parser.parse_query() - - payment_intents_df = pd.json_normalize(self.get_payment_intents()) - update_query_executor = UPDATEQueryExecutor( - payment_intents_df, - where_conditions - ) - - payment_intents_df = update_query_executor.execute_query() - payment_intent_ids = payment_intents_df['id'].tolist() - self.update_payment_intents(payment_intent_ids, values_to_update) - - def update_payment_intents(self, payment_intent_ids: list, values_to_update: dict) -> None: - for payment_intent_id in payment_intent_ids: - stripe.PaymentIntent.modify(payment_intent_id, **values_to_update) - - def insert(self, query: 'ast.Insert') -> None: - """ - Inserts data into Stripe "POST /v1/payment_intents" API endpoint. - - Parameters - ---------- - query : ast.Insert - Given SQL INSERT query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - insert_statement_parser = INSERTQueryParser( - query, - supported_columns=['amount', 'currency', 'description', 'payment_method_types'], - mandatory_columns=['amount', 'currency'], - all_mandatory=True - ) - payment_intent_data = insert_statement_parser.parse_query() - self.create_payment_intent(payment_intent_data) - - def create_payment_intent(self, payment_intent_data: list) -> None: - for data in payment_intent_data: - stripe.PaymentIntent.create(**data) - - def get_columns(self) -> List[Text]: - return pd.json_normalize(self.get_payment_intents(limit=1)).columns.tolist() - - def get_payment_intents(self, **kwargs) -> List[Dict]: - stripe = self.handler.connect() - payment_intents = stripe.PaymentIntent.list(**kwargs) - return [payment_intent.to_dict() for payment_intent in payment_intents] - - -class RefundsTable(APITable): - """The Stripe Refund Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """ - Pulls Stripe Refund data. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Stripe Refunds matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'refunds', - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - refunds_df = pd.json_normalize(self.get_refunds(limit=result_limit)) - select_statement_executor = SELECTQueryExecutor( - refunds_df, - selected_columns, - where_conditions, - order_by_conditions - ) - refunds_df = select_statement_executor.execute_query() - - return refunds_df - - def get_columns(self) -> List[Text]: - return pd.json_normalize(self.get_refunds(limit=1)).columns.tolist() - - def get_refunds(self, **kwargs) -> List[Dict]: - stripe = self.handler.connect() - refunds = stripe.Refund.list(**kwargs) - return [refund.to_dict() for refund in refunds] - - -class PayoutsTable(APITable): - """The Stripe Payouts Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """ - Pulls Stripe Payout data. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Stripe Payouts matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'payouts', - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - payouts_df = pd.json_normalize(self.get_payouts(limit=result_limit)) - select_statement_executor = SELECTQueryExecutor( - payouts_df, - selected_columns, - where_conditions, - order_by_conditions - ) - payouts_df = select_statement_executor.execute_query() - - return payouts_df - - def get_columns(self) -> List[Text]: - return pd.json_normalize(self.get_payouts(limit=1)).columns.tolist() - - def get_payouts(self, **kwargs) -> List[Dict]: - stripe = self.handler.connect() - payouts = stripe.Payout.list(**kwargs) - return [payout.to_dict() for payout in payouts] diff --git a/mindsdb/integrations/handlers/supabase_handler/README.md b/mindsdb/integrations/handlers/supabase_handler/README.md deleted file mode 100644 index 23cac452dd0..00000000000 --- a/mindsdb/integrations/handlers/supabase_handler/README.md +++ /dev/null @@ -1,41 +0,0 @@ -# Supabase Handler - -This is the implementation of the Supabase handler for MindsDB. - -## Supabase - -Supabase is an open source Firebase alternative. Start your project with a Postgres Database, Authentication, instant APIs, Realtime subscriptions and Storage. - -## Implementation - -This handler was implemented by extending postres connector. - -The required arguments to establish a connection are: - -* `host`: the host name of the Supabase connection -* `port`: the port to use when connecting -* `user`: the user to authenticate -* `password`: the password to authenticate the user -* `database`: database name - -## Usage - -In order to make use of this handler and connect to a Supabase server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE example_supabase_data -WITH ENGINE = "supabase", -PARAMETERS = { - "user": "root", - "password": "root", - "host": "hostname", - "port": "5432", - "database": "postgres" -} -``` - -Now, you can use this established connection to query your database as follows: - -```sql -SELECT * FROM example_supabase_data.public.rentals LIMIT 10; -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/supabase_handler/__about__.py b/mindsdb/integrations/handlers/supabase_handler/__about__.py deleted file mode 100644 index 3c7e24e244e..00000000000 --- a/mindsdb/integrations/handlers/supabase_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Supabase handler' -__package_name__ = 'mindsdb_supabase_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Supabase" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/supabase_handler/__init__.py b/mindsdb/integrations/handlers/supabase_handler/__init__.py deleted file mode 100644 index 13a43ce1105..00000000000 --- a/mindsdb/integrations/handlers/supabase_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -try: - from .supabase_handler import SupabaseHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = "Supabase" -name = "supabase" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", - "support_level", -] diff --git a/mindsdb/integrations/handlers/supabase_handler/icon.svg b/mindsdb/integrations/handlers/supabase_handler/icon.svg deleted file mode 100644 index 245ae5106fc..00000000000 --- a/mindsdb/integrations/handlers/supabase_handler/icon.svg +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/supabase_handler/supabase_handler.py b/mindsdb/integrations/handlers/supabase_handler/supabase_handler.py deleted file mode 100644 index 8236acade97..00000000000 --- a/mindsdb/integrations/handlers/supabase_handler/supabase_handler.py +++ /dev/null @@ -1,11 +0,0 @@ -from mindsdb.integrations.handlers.postgres_handler import Handler as PostgresHandler - - -class SupabaseHandler(PostgresHandler): - """ - This handler handles connection and execution of the Supabase statements. - """ - name = 'supabase' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/supabase_handler/tests/__init__.py b/mindsdb/integrations/handlers/supabase_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/supabase_handler/tests/test_supabase_handler.py b/mindsdb/integrations/handlers/supabase_handler/tests/test_supabase_handler.py deleted file mode 100644 index 311bf5190d7..00000000000 --- a/mindsdb/integrations/handlers/supabase_handler/tests/test_supabase_handler.py +++ /dev/null @@ -1,46 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.supabase_handler.supabase_handler import SupabaseHandler - - -class SupabaseHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "localhost", - "port": "3306", - "user": "root", - "password": "root", - "database": "test", - "ssl": False - } - } - cls.handler = SupabaseHandler('test_supabase_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_check_connection(self): - assert self.handler.check_connection() - - def test_2_native_query_show_dbs(self): - dbs = self.handler.native_query("SHOW DATABASES;") - assert isinstance(dbs, list) - - def test_3_get_tables(self): - tbls = self.handler.get_tables() - assert isinstance(tbls, list) - - def test_5_create_table(self): - try: - self.handler.native_query("CREATE TABLE test_mdb (test_col INT)") - except Exception: - pass - - def test_6_describe_table(self): - described = self.handler.get_columns("dt_test") - assert isinstance(described, list) - - def test_7_select_query(self): - query = "SELECT * FROM dt_test WHERE 'id'='a'" - self.handler.native_query(query) diff --git a/mindsdb/integrations/handlers/surrealdb_handler/README.md b/mindsdb/integrations/handlers/surrealdb_handler/README.md deleted file mode 100644 index 8423cf04e96..00000000000 --- a/mindsdb/integrations/handlers/surrealdb_handler/README.md +++ /dev/null @@ -1,108 +0,0 @@ -# Surrealdb Handler - -This is the implementation of the Surrealdb handler for MindsDB. - -## Surrealdb - -SurrealDB is an innovative NewSQL cloud database, suitable for serverless applications, -jamstack applications, single-page applications, and traditional applications. -It is unmatched in its versatility and financial value, with the ability for deployment on cloud, -on-premise, embedded, and edge computing environments. - -## Implementation - -This handler was implemented by using the python library `pysurrealdb`. - -The required arguments to establish a connection are: - -* `host`: the host name of the Surrealdb connection -* `port`: the port to use when connecting -* `user`: the user to authenticate -* `password`: the password to authenticate the user -* `database`: database name to be connected -* `namespace`: namespace name to be connected - -## Usage - -In order to make use of this handler and connect to a SurrealDB server. First you need to have [SurrealDB](https://surrealdb.com/install) installed and once you have it installed. - -To use SurrealDB we have to start the SurrealDB server in our local environment. For that you need to give following command in the terminal: -``` -surreal start --user root --pass root -``` - -This will start the server and start accepting requests from port `8000`. Now, in another terminal session, give the following command: -``` -surreal sql --conn http://localhost:8000 \ ---user root --pass root --ns testns --db testdb -``` - -This will create a namespace `testns` for your project and a database `testdb` in order to proceed further. - -Here, let's create a table in our newly created database with the following: -``` -CREATE dev SET name='again', status='founder'; -``` - -This will create a table named `dev` with column `name` and `status`. - -(If you want to use SurrealDB in public cloud editor, feel free to skip the following steps.) - -## Testing SurrealDB in the local environment - -Use the following query to create a SurrealDB database in the MindsDB environment. - -```sql -CREATE DATABASE exampledb -WITH ENGINE = 'surrealdb', -PARAMETERS = { - "host": "localhost", - "port": "8000", - "user": "root", - "password": "root", - "database": "testdb", - "namespace": "testns" -}; -``` - -Now, you can use this established connection to query your database tables as follows: - -```sql -SELECT * FROM exampledb.dev; -``` - -## Testing SurrealDB in the public cloud environment - -To establish a connection with our SurrealDB server which is running locally to the public cloud instance is not that simple. We are going to use `ngrok tunneling` to connect cloud instance to the local SurrealDB server. You can follow this [guide](https://docs.mindsdb.com/sql/create/database#making-your-local-database-available-to-mindsdb) for that. - -In our case with `ngrok` we will use: -``` -ngrok tcp 8000 -``` - -From there, it generated a forwarding dns for me which is: -``` -tcp://6.tcp.ngrok.io:17141 -> localhost:8000 -``` - -It will be different in your case. With this let's connect to the public cloud using - -```sql -CREATE DATABASE exampledb -WITH ENGINE = 'surrealdb', -PARAMETERS = { - "host": "6.tcp.ngrok.io", - "port": "17141", - "user": "root", - "password": "root", - "database": "testdb", - "namespace": "testns" -}; -``` - -Please change the `host` and `port` properties in the `PARAMETERS` clause based on the values which you got. - -We can also query the `dev` table which we created with -```sql -SELECT * FROM exampledb.dev; -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/surrealdb_handler/__about__.py b/mindsdb/integrations/handlers/surrealdb_handler/__about__.py deleted file mode 100644 index 782f87d5a99..00000000000 --- a/mindsdb/integrations/handlers/surrealdb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB SurrealDB handler' -__package_name__ = 'mindsdb_surrealdb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for SurrealDB" -__author__ = 'Emmanouil Dellatolas & Georgios Artopoulos' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/surrealdb_handler/__init__.py b/mindsdb/integrations/handlers/surrealdb_handler/__init__.py deleted file mode 100644 index 05b05c8d02e..00000000000 --- a/mindsdb/integrations/handlers/surrealdb_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .surrealdb_handler import SurrealDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'SurrealDB' -name = 'surrealdb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/surrealdb_handler/connection_args.py b/mindsdb/integrations/handlers/surrealdb_handler/connection_args.py deleted file mode 100644 index 5901f103833..00000000000 --- a/mindsdb/integrations/handlers/surrealdb_handler/connection_args.py +++ /dev/null @@ -1,52 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the SurrealDB server.', - 'required': True, - 'label': 'User' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the SurrealDB server.', - 'required': True, - 'label': 'Password', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the SurrealDB server.', - 'required': True, - 'label': 'Database name' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the SurrealDB server. ', - 'required': True, - 'label': 'Host' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the SurrealDB server. Must be an integer.', - 'required': True, - 'label': 'Port' - }, - namespace={ - 'type': ARG_TYPE.STR, - 'description': 'The namespace name to be connected', - 'required': True, - 'label': 'Namespace' - } -) -connection_args_example = OrderedDict( - host='localhost', - port=17141, - user='admin', - password='password', - database='test', - namespace='test' -) diff --git a/mindsdb/integrations/handlers/surrealdb_handler/icon.svg b/mindsdb/integrations/handlers/surrealdb_handler/icon.svg deleted file mode 100644 index 026ef43b8af..00000000000 --- a/mindsdb/integrations/handlers/surrealdb_handler/icon.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/surrealdb_handler/requirements.txt b/mindsdb/integrations/handlers/surrealdb_handler/requirements.txt deleted file mode 100644 index ac8efd5a7cb..00000000000 --- a/mindsdb/integrations/handlers/surrealdb_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pysurrealdb diff --git a/mindsdb/integrations/handlers/surrealdb_handler/surrealdb_handler.py b/mindsdb/integrations/handlers/surrealdb_handler/surrealdb_handler.py deleted file mode 100644 index 2b8e3b3ee89..00000000000 --- a/mindsdb/integrations/handlers/surrealdb_handler/surrealdb_handler.py +++ /dev/null @@ -1,196 +0,0 @@ -from typing import Optional -import pysurrealdb as surreal -import pandas as pd - -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) -from .utils.surreal_get_info import table_names, column_info - - -logger = log.getLogger(__name__) - - -class SurrealDBHandler(DatabaseHandler): - """ - This handler handles connection and execution of the SurrealDB statements. - """ - name = 'surrealdb' - - def __init__(self, name: str, connection_data: Optional[dict], **kwargs): - """ Initialize the handler - Args: - name (str): name of particular handler instance - connection_data (dict): parameters for connecting to the database - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - self.database = connection_data['database'] - self.parser = parse_sql - self.dialect = "surrealdb" - self.kwargs = kwargs - self.namespace = connection_data['namespace'] - self.user = connection_data['user'] - self.password = connection_data['password'] - self.host = connection_data['host'] - self.port = connection_data['port'] - - self.connection = None - self.is_connected = False - - def connect(self): - """ - Establishes a connection to the MindsDB database. - Returns: - HandlerStatusResponse - """ - if self.is_connected is True: - return self.connection - try: - self.connection = surreal.connect( - database=self.database, - host=self.host, - port=self.port, - user=self.user, - password=self.password, - namespace=self.namespace, - ) - self.is_connected = True - except Exception as e: - logger.error(f"Error while connecting to SurrealDB, {e}") - - return self.connection - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - response_code = StatusResponse(False) - need_to_close = self.is_connected is False - try: - self.connect() - response_code.success = True - except Exception as e: - logger.error(f'Error connecting to SurrealDB, {e}!') - response_code.error_message = str(e) - finally: - if response_code.success is True and need_to_close: - self.disconnect() - if response_code.success is False and self.is_connected is True: - self.is_connected = False - - return response_code - - def disconnect(self): - """ - Close the existing connection to the SurrealDB database - """ - if self.is_connected is False: - return - try: - self.connection.close() - self.is_connected = False - except Exception as e: - logger.error(f"Error while disconnecting to SurrealDB, {e}") - - return - - def native_query(self, query: str) -> Response: - """ - Receive raw query and act upon it somehow. - Args: - query (Any): query in SurrealQL to execute - Returns: - HandlerResponse - """ - need_to_close = self.is_connected is False - conn = self.connect() - cur = conn.cursor() - try: - cur.execute(query) - result = cur.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, - columns=[x[0] for x in cur.description], - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f'Error running query: {query} on SurrealDB!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - - cur.close() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. It may be any kind - of query: SELECT, INSERT, DELETE, etc - Returns: - HandlerResponse - """ - query_string = query.to_string() - - # ensure the correct query is passed - last_word = query_string.split()[-1] - query_string = query_string.replace(last_word + '.', "") - return self.native_query(query_string) - - def get_tables(self) -> Response: - """ - Get list of tables from the database that will be accessible. - Returns: - HandlerResponse - """ - conn = self.connect() - # get table names - tables = table_names(conn) - - # construct pandas dataframe - df = pd.DataFrame(tables, columns=['table_name']) - - response = Response( - RESPONSE_TYPE.TABLE, df - ) - return response - - def get_columns(self, table: str) -> Response: - """ Return list of columns in table - Args: - table (str): name of the table to get column names and types from. - Returns: - HandlerResponse - """ - conn = self.connect() - # get name and type of each column in the table - columns, types = column_info(conn, table) - - # construct pandas dataframe - df = pd.DataFrame(columns, columns=['table_name']) - df['data_type'] = types - - response = Response( - RESPONSE_TYPE.TABLE, df - ) - return response diff --git a/mindsdb/integrations/handlers/surrealdb_handler/tests/__init__.py b/mindsdb/integrations/handlers/surrealdb_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/surrealdb_handler/tests/test_surrealdb_handler.py b/mindsdb/integrations/handlers/surrealdb_handler/tests/test_surrealdb_handler.py deleted file mode 100644 index 9f7bf72585e..00000000000 --- a/mindsdb/integrations/handlers/surrealdb_handler/tests/test_surrealdb_handler.py +++ /dev/null @@ -1,57 +0,0 @@ -import unittest - -from mindsdb.integrations.handlers.surrealdb_handler.surrealdb_handler import SurrealDBHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class SurrealdbHandlerTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "localhost", - "port": "8000", - "user": "admin", - "password": "password", - "namespace": "test", - "database": "test" - } - } - cls.handler = SurrealDBHandler('test_surrealdb_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_create_table(self): - res = self.handler.native_query("CREATE person SET name = 'Tobie', company = 'SurrealDB', " - "skills = ['Rust', 'Go', 'JavaScript'];") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_2_insert(self): - res = self.handler.native_query("UPDATE person SET name = 'Jamie'") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_3_select_query(self): - query = "SELECT * FROM person" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_4_get_columns(self): - columns = self.handler.get_columns('person') - assert columns.type is not RESPONSE_TYPE.ERROR - - def test_5_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_6_drop_table(self): - res = self.handler.native_query("REMOVE table person") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_7_disconnect(self): - assert self.handler.disconnect() is None - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/surrealdb_handler/utils/__init__.py b/mindsdb/integrations/handlers/surrealdb_handler/utils/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/surrealdb_handler/utils/surreal_get_info.py b/mindsdb/integrations/handlers/surrealdb_handler/utils/surreal_get_info.py deleted file mode 100644 index 8dd98f22ff0..00000000000 --- a/mindsdb/integrations/handlers/surrealdb_handler/utils/surreal_get_info.py +++ /dev/null @@ -1,20 +0,0 @@ -def table_names(connection): - query = "INFO for DB" - dict_1 = connection.query(query) - dict_2 = dict_1['tb'] - tables = list(dict_2.keys()) - return tables - - -def column_info(connection, table): - query = "INFO FOR TABLE " + table - dict_1 = connection.query(query) - dict_2 = dict_1['fd'] - columns = list(dict_2.keys()) - types = [] - - for value in dict_2.values(): - a = value.split('TYPE ', 1)[1] - type = a.split()[0] - types.append(type) - return columns, types diff --git a/mindsdb/integrations/handlers/symbl_handler/README.md b/mindsdb/integrations/handlers/symbl_handler/README.md deleted file mode 100644 index 9a9633156df..00000000000 --- a/mindsdb/integrations/handlers/symbl_handler/README.md +++ /dev/null @@ -1,84 +0,0 @@ ---- -title: Symbl -sidebarTitle: Symbl ---- - -This documentation describes the integration of MindsDB with [Symbl](https://symbl.ai/), a platform with state-of-the-art and task-specific LLMs that enables businesses to analyze multi-party conversations at scale. -This integration allows MindsDB to process conversation data and extract insights from it. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect Symbl to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - - -Please note that in order to successfully install the dependencies for Symbl, it is necessary to install `portaudio` and few other Linux packages in the Docker container first. To do this, run the following commands: - -1. Start an interactive shell in the container: -```bash -docker exec -it mindsdb_container sh -``` -If you haven't specified a name when spinning up the MindsDB container with `docker run`, you can find it by running `docker ps`. - - -If you are using Docker Desktop, you can navigate to 'Containers', locate the multi-container application running the extension, click on the `mindsdb_service` container and then click on the 'Exec' tab to start an interactive shell. - - -2. Install the required packages: -```bash -apt-get update && apt-get install -y \ - libportaudio2 libportaudiocpp0 portaudio19-dev \ - python3-dev \ - build-essential \ - && rm -rf /var/lib/apt/lists/* -``` - - -## Connection - -Establish a connection to your Symbl from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE mindsdb_symbl -WITH ENGINE = 'symbl', -PARAMETERS = { - "app_id": "app_id", - "app_secret":"app_secret" -}; -``` - -Required connection parameters include the following: - -* `app_id`: The Symbl app identifier. -* `app_secret`: The Symbl app secret. - -## Usage - -First, process the conversation data and get the conversation ID via the `get_conversation_id` table: - -```sql -SELECT * -FROM mindsdb_symbl.get_conversation_id -WHERE audio_url="https://symbltestdata.s3.us-east-2.amazonaws.com/newPhonecall.mp3"; -``` - -Next, use the conversation ID to get the results of the above from the other supported tables: - -```sql -SELECT * -FROM mindsdb_symbl.get_messages -WHERE conversation_id="5682305049034752"; -``` - -Other supported tables include: - -* `get_topics` -* `get_questions` -* `get_analytics` -* `get_action_items` - - -The above examples utilize `mindsdb_symbl` as the datasource name, which is defined in the `CREATE DATABASE` command. - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/symbl_handler/__about__.py b/mindsdb/integrations/handlers/symbl_handler/__about__.py deleted file mode 100644 index d0a7a091e2b..00000000000 --- a/mindsdb/integrations/handlers/symbl_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Symbl handler" -__package_name__ = "mindsdb_symbl_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Symbl" -__author__ = "Abhilash K R" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/symbl_handler/__init__.py b/mindsdb/integrations/handlers/symbl_handler/__init__.py deleted file mode 100644 index e383d9d668f..00000000000 --- a/mindsdb/integrations/handlers/symbl_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .symbl_handler import SymblHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Symbl" -name = "symbl" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", - "connection_args_example", - "connection_args", -] diff --git a/mindsdb/integrations/handlers/symbl_handler/connection_args.py b/mindsdb/integrations/handlers/symbl_handler/connection_args.py deleted file mode 100644 index fd12e54fe9c..00000000000 --- a/mindsdb/integrations/handlers/symbl_handler/connection_args.py +++ /dev/null @@ -1,25 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - app_id={ - "type": ARG_TYPE.PWD, - "description": "App ID of symbl", - "required": True, - "label": "app_id", - }, - app_secret={ - "type": ARG_TYPE.PWD, - "description": "App Secret of symbl", - "required": True, - "label": "app_secret", - "secret": True - } -) - -connection_args_example = OrderedDict( - app_id="app_id", - app_secret="app_secret" -) diff --git a/mindsdb/integrations/handlers/symbl_handler/icon.svg b/mindsdb/integrations/handlers/symbl_handler/icon.svg deleted file mode 100644 index 48b5eaf4500..00000000000 --- a/mindsdb/integrations/handlers/symbl_handler/icon.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/symbl_handler/requirements.txt b/mindsdb/integrations/handlers/symbl_handler/requirements.txt deleted file mode 100644 index 143677d423b..00000000000 --- a/mindsdb/integrations/handlers/symbl_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -symbl \ No newline at end of file diff --git a/mindsdb/integrations/handlers/symbl_handler/symbl_handler.py b/mindsdb/integrations/handlers/symbl_handler/symbl_handler.py deleted file mode 100644 index 3f45f83b12f..00000000000 --- a/mindsdb/integrations/handlers/symbl_handler/symbl_handler.py +++ /dev/null @@ -1,106 +0,0 @@ -import symbl -from mindsdb.integrations.handlers.symbl_handler.symbl_tables import ( - GetConversationTable, - GetMessagesTable, - GetTopicsTable, - GetQuestionsTable, - GetAnalyticsTable, - GetActionItemsTable, - GetFollowUpsTable -) -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) - -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - - -logger = log.getLogger(__name__) - - -class SymblHandler(APIHandler): - """The Symbl handler implementation""" - - def __init__(self, name: str, **kwargs): - """Initialize the Symbl handler. - - Parameters - ---------- - name : str - name of a handler instance - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.credentials = {"app_id": self.connection_data.get("app_id"), "app_secret": self.connection_data.get("app_secret")} - self.kwargs = kwargs - self.is_connected = False - - conversation_id_data = GetConversationTable(self) - self._register_table("get_conversation_id", conversation_id_data) - - messages_data = GetMessagesTable(self) - self._register_table("get_messages", messages_data) - - topics_data = GetTopicsTable(self) - self._register_table("get_topics", topics_data) - - question_data = GetQuestionsTable(self) - self._register_table("get_questions", question_data) - - analytics_data = GetAnalyticsTable(self) - self._register_table("get_analytics", analytics_data) - - ai_data = GetActionItemsTable(self) - self._register_table("get_action_items", ai_data) - - follow_up_data = GetFollowUpsTable(self) - self._register_table("get_follow_ups", follow_up_data) - - def connect(self) -> StatusResponse: - """Set up the connection required by the handler. - - Returns - ------- - StatusResponse - connection object - """ - resp = StatusResponse(False) - try: - symbl.AuthenticationToken.get_access_token(self.credentials) - resp.success = True - self.is_connected = True - except Exception as ex: - resp.success = False - resp.error_message = ex - self.is_connected = False - return resp - - def check_connection(self) -> StatusResponse: - """Check connection to the handler. - - Returns - ------- - StatusResponse - Status confirmation - """ - return self.connect() - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - - Parameters - ---------- - query : str - query in a native format - - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/symbl_handler/symbl_tables.py b/mindsdb/integrations/handlers/symbl_handler/symbl_tables.py deleted file mode 100644 index d2a3ff601eb..00000000000 --- a/mindsdb/integrations/handlers/symbl_handler/symbl_tables.py +++ /dev/null @@ -1,648 +0,0 @@ -import pandas as pd -import json -import symbl -from typing import List -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor -from mindsdb.utilities import log -from mindsdb_sql_parser import ast - -logger = log.getLogger(__name__) - - -class GetConversationTable(APITable): - """The Get Conversation Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Get the conversation Id for the given audio file" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - conversation id - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'get_conversation_id', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'audio_url': - if op == '=': - search_params["audio_url"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for audio_url column.") - - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("audio_url" in search_params) - - if not filter_flag: - raise NotImplementedError("audio_url column has to be present in where clause.") - - df = pd.DataFrame(columns=self.get_columns()) - - payload = {"url": search_params.get("audio_url")} - conversation_object = symbl.Audio.process_url(payload=payload, credentials=self.handler.credentials) - - df = pd.json_normalize({"conversation_id": conversation_object.get_conversation_id()}) - - select_statement_executor = SELECTQueryExecutor( - df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "conversation_id" - ] - - -class GetMessagesTable(APITable): - """The Get Messages Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Get the messages for the given conversation id" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Messages - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'get_messages', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'conversation_id': - if op == '=': - search_params["conversation_id"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for conversation_id column.") - - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("conversation_id" in search_params) - - if not filter_flag: - raise NotImplementedError("conversation_id column has to be present in where clause.") - - df = pd.DataFrame(columns=self.get_columns()) - - resp = symbl.Conversations.get_messages(conversation_id=search_params.get("conversation_id"), credentials=self.handler.credentials) - - resp = self.parse_response(resp) - - df = pd.json_normalize(resp["messages"]) - - select_statement_executor = SELECTQueryExecutor( - df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def parse_response(self, res): - return json.loads(json.dumps(res.to_dict(), default=str)) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "conversation_id", - "end_time", - "id", - "phrases", - "sentiment", - "start_time", - "text", - "words", - "_from.email", - "_from.id", - "_from.name" - ] - - -class GetTopicsTable(APITable): - """The Get Topics Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Get the topics for the given conversation id" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Topics - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'get_topics', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'conversation_id': - if op == '=': - search_params["conversation_id"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for conversation_id column.") - - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("conversation_id" in search_params) - - if not filter_flag: - raise NotImplementedError("conversation_id column has to be present in where clause.") - - df = pd.DataFrame(columns=self.get_columns()) - - resp = symbl.Conversations.get_topics(conversation_id=search_params.get("conversation_id"), credentials=self.handler.credentials) - - resp = self.parse_response(resp) - - df = pd.json_normalize(resp["topics"]) - - select_statement_executor = SELECTQueryExecutor( - df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def parse_response(self, res): - return json.loads(json.dumps(res.to_dict(), default=str)) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "id", - "text", - "type", - "score", - "message_ids", - "entities", - "sentiment", - "parent_refs" - ] - - -class GetQuestionsTable(APITable): - """The Get Questions Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Get the questions for the given conversation id" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Questions - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'get_questions', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'conversation_id': - if op == '=': - search_params["conversation_id"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for conversation_id column.") - - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("conversation_id" in search_params) - - if not filter_flag: - raise NotImplementedError("conversation_id column has to be present in where clause.") - - df = pd.DataFrame(columns=self.get_columns()) - - resp = symbl.Conversations.get_questions(conversation_id=search_params.get("conversation_id"), credentials=self.handler.credentials) - - resp = self.parse_response(resp) - - df = pd.json_normalize(resp["questions"]) - - select_statement_executor = SELECTQueryExecutor( - df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def parse_response(self, res): - return json.loads(json.dumps(res.to_dict(), default=str)) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "id", - "text", - "type", - "score", - "message_ids", - "_from.id", - "_from.name", - "_from.user_id" - ] - - -class GetFollowUpsTable(APITable): - """The Get FollowUps Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Get the follow ups for the given conversation id" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - follow up Questions - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'get_follow_ups', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'conversation_id': - if op == '=': - search_params["conversation_id"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for conversation_id column.") - - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("conversation_id" in search_params) - - if not filter_flag: - raise NotImplementedError("conversation_id column has to be present in where clause.") - - df = pd.DataFrame(columns=self.get_columns()) - - resp = symbl.Conversations.get_follow_ups(conversation_id=search_params.get("conversation_id"), credentials=self.handler.credentials) - - resp = self.parse_response(resp) - - df = pd.json_normalize(resp["follow_ups"]) - - select_statement_executor = SELECTQueryExecutor( - df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def parse_response(self, res): - return json.loads(json.dumps(res.to_dict(), default=str)) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "id", - "text", - "type", - "score", - "message_ids", - "entities", - "phrases", - "definitive", - "due_by", - "_from.id", - "_from.name", - "_from.user_id", - "assignee.id", - "assignee.name", - "assignee.email" - ] - - -class GetActionItemsTable(APITable): - """The Get Action items Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Get the action items for the given conversation id" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - action items - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'get_action_items', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'conversation_id': - if op == '=': - search_params["conversation_id"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for conversation_id column.") - - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("conversation_id" in search_params) - - if not filter_flag: - raise NotImplementedError("conversation_id column has to be present in where clause.") - - df = pd.DataFrame(columns=self.get_columns()) - - resp = symbl.Conversations.get_action_items(conversation_id=search_params.get("conversation_id"), credentials=self.handler.credentials) - - resp = self.parse_response(resp) - - df = pd.json_normalize(resp["action_items"]) - - select_statement_executor = SELECTQueryExecutor( - df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def parse_response(self, res): - return json.loads(json.dumps(res.to_dict(), default=str)) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "id", - "text", - "type", - "score", - "message_ids", - "entities", - "phrases", - "definitive", - "due_by", - "_from.id", - "_from.name", - "_from.user_id", - "assignee.id", - "assignee.name", - "assignee.email" - ] - - -class GetAnalyticsTable(APITable): - """The Get Analytics Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Get the analytics for the given conversation id" API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - metrics - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'get_analytics', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - for op, arg1, arg2 in where_conditions: - if arg1 == 'conversation_id': - if op == '=': - search_params["conversation_id"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for conversation_id column.") - - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("conversation_id" in search_params) - - if not filter_flag: - raise NotImplementedError("conversation_id column has to be present in where clause.") - - df = pd.DataFrame(columns=self.get_columns()) - - resp = symbl.Conversations.get_analytics(conversation_id=search_params.get("conversation_id"), credentials=self.handler.credentials) - - resp = self.parse_response(resp) - - df = pd.json_normalize(resp["metrics"]) - - select_statement_executor = SELECTQueryExecutor( - df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def parse_response(self, res): - return json.loads(json.dumps(res.to_dict(), default=str)) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "type", - "percent", - "seconds" - ] diff --git a/mindsdb/integrations/handlers/tdengine_handler/README.md b/mindsdb/integrations/handlers/tdengine_handler/README.md deleted file mode 100644 index 5c88cc770d5..00000000000 --- a/mindsdb/integrations/handlers/tdengine_handler/README.md +++ /dev/null @@ -1,50 +0,0 @@ -# TDEngine Handler - -This is the implementation of the TDEngine handler for MindsDB. - -## TDEngine -TDengine is an open source, high-performance, cloud native time-series database optimized for Internet of Things (IoT), Connected Cars, and Industrial IoT. It enables efficient, real-time data ingestion, processing, and monitoring of TB and even PB scale data per day, generated by billions of sensors and data collectors. TDengine differentiates itself from other time-series databases with the following advantages: - -* High Performance -* Simplified Solution -* Cloud Native -* Ease of Use -* Easy Data Analytics -* Open Source - - - -## Implementation -This handler was implemented using the `taos/taosrest`, a Python library that allows you to use Python code to run SQL commands on TDEngine Server. - -The required arguments to establish a connection are, -* `user`: username asscociated with server -* `password`: password to authenticate your access -* `url`: Url to TDEngine server. For local server url is localhost:6041 (Default) -* `token`: Unique token provide while using TDEngine Cloud. -* `database`: Database name to be connected - - -## Usage -In order to make use of this handler and connect to TDEngine in MindsDB, the following syntax can be used, - -~~~~sql -CREATE DATABASE TDEngine_datasource -WITH -engine='tdengine', -parameters={ - "user":"root", - "password":"taosdata", - "url":"127.0.0.1:6041", - "database":"test" -}; -~~~~ - - -**Note :-> You can sepecify token instead of user and password while using TDEngine.** - - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM TDEngine_datasource.d0; -~~~~ diff --git a/mindsdb/integrations/handlers/tdengine_handler/__about__.py b/mindsdb/integrations/handlers/tdengine_handler/__about__.py deleted file mode 100644 index 04d11dbd1c6..00000000000 --- a/mindsdb/integrations/handlers/tdengine_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB TDEngine handler' -__package_name__ = 'mindsdb_tdengine_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for TDEngine" -__author__ = 'Parthiv Makwana' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/tdengine_handler/__init__.py b/mindsdb/integrations/handlers/tdengine_handler/__init__.py deleted file mode 100644 index 48ad9239d64..00000000000 --- a/mindsdb/integrations/handlers/tdengine_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .tdengine_handler import TDEngineHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'TDEngine' -name = 'tdengine' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/tdengine_handler/connection_args.py b/mindsdb/integrations/handlers/tdengine_handler/connection_args.py deleted file mode 100644 index 6c7cc1b4c00..00000000000 --- a/mindsdb/integrations/handlers/tdengine_handler/connection_args.py +++ /dev/null @@ -1,36 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the TDEngine server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the TDEngine server.', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the TDEngine server.' - }, - url={ - 'type': ARG_TYPE.STR, - 'description': 'The url of the TDEngine server Instance. ' - }, - token={ - 'type': ARG_TYPE.INT, - 'description': 'Unique Token to COnnect TDEngine' - }, -) - -connection_args_example = OrderedDict( - url='127.0.0.1:6041', - token='', - user='root', - password='taosdata', - database='test' -) diff --git a/mindsdb/integrations/handlers/tdengine_handler/icon.svg b/mindsdb/integrations/handlers/tdengine_handler/icon.svg deleted file mode 100644 index c1ddccb67d1..00000000000 --- a/mindsdb/integrations/handlers/tdengine_handler/icon.svg +++ /dev/null @@ -1,59 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mindsdb/integrations/handlers/tdengine_handler/requirements.txt b/mindsdb/integrations/handlers/tdengine_handler/requirements.txt deleted file mode 100644 index 7c95bc73ea5..00000000000 --- a/mindsdb/integrations/handlers/tdengine_handler/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -taospy -requests>=2.32.4 # not directly required, pinned by Snyk to avoid a vulnerability -urllib3>=2.5.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/mindsdb/integrations/handlers/tdengine_handler/tdengine_handler.py b/mindsdb/integrations/handlers/tdengine_handler/tdengine_handler.py deleted file mode 100644 index 99477597493..00000000000 --- a/mindsdb/integrations/handlers/tdengine_handler/tdengine_handler.py +++ /dev/null @@ -1,144 +0,0 @@ -from typing import Optional -import pandas as pd -import taosrest as td -from taosrest import sqlalchemy as SA - -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - - -logger = log.getLogger(__name__) - - -class TDEngineHandler(DatabaseHandler): - """ - This handler handles connection and execution of the TDEngine statements. - """ - - name = 'tdengine' - - def __init__(self, name, connection_data: Optional[dict], **kwargs): - super().__init__(name) - - self.parser = parse_sql - self.dialect = 'tdengine' - self.kwargs = kwargs - self.connection_data = connection_data - - self.connection = None - self.is_connected = False - - def connect(self): - if self.is_connected is True: - return self.connection - - config = { - 'url': self.connection_data.get('url', "http://localhost:6041"), - 'token': self.connection_data.get('token'), - 'user': self.connection_data.get('user', 'root'), - 'password': self.connection_data.get('password', 'taosdata'), - 'database': self.connection_data.get('database') - } - - connection = td.connect(**config) - self.is_connected = True - self.connection = connection - return self.connection - - def disconnect(self): - if self.is_connected is False: - return - self.connection.close() - self.is_connected = False - return - - def check_connection(self) -> StatusResponse: - - result = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - connection = self.connect() - result.success = connection is not None - except Exception as e: - logger.error(f'Error connecting to TDEngine {self.connection_data["database"]}, {e}!') - result.error_message = str(e) - - if result.success is True and need_to_close: - self.disconnect() - if result.success is False and self.is_connected is True: - self.is_connected = False - - return result - - def native_query(self, query: str) -> Response: - """ - Receive SQL query and runs it - :param query: The SQL query to run in TDEngine - :return: returns the records from the current recordset - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - cur = connection.cursor() - try: - cur.execute(query) - - if cur.rowcount != 0: - result = cur.fetchall() - response = Response( - RESPONSE_TYPE.TABLE, - pd.DataFrame( - result, - columns=[x[0] for x in cur.description] - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except Exception as e: - logger.error(f'Error running query: {query} on {self.connection_data["database"]}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - # connection.rollback() - cur.close() - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Retrieve the data from the SQL statement. - """ - renderer = SqlalchemyRender(SA.TaosRestDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Get a list with all of the tabels in TDEngine - """ - q = 'SHOW TABLES;' - - return self.native_query(q) - - def get_columns(self, table_name) -> Response: - """ - Show details about the table - """ - q = f'DESCRIBE {table_name};' - - return self.native_query(q) diff --git a/mindsdb/integrations/handlers/tdengine_handler/tests/__init__.py b/mindsdb/integrations/handlers/tdengine_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/tdengine_handler/tests/test_tdengine_handler.py b/mindsdb/integrations/handlers/tdengine_handler/tests/test_tdengine_handler.py deleted file mode 100644 index abdfbc01b90..00000000000 --- a/mindsdb/integrations/handlers/tdengine_handler/tests/test_tdengine_handler.py +++ /dev/null @@ -1,48 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.tdengine_handler.tdengine_handler import TDEngineHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class TDEngineHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "token": '', - "url": "********.cloud.tdengine.com", - "database": "temp" - } - cls.handler = TDEngineHandler('test_tdengine_handler', connection_data=cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_connect(self): - assert self.handler.connect() - - def test_2_create_table(self): - query = "CREATE Table `hari` USING `temp` (`id`) TAGS (0);" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - pass - - def test_3_insert(self): - query = "INSERT INTO hari VALUES (NOW, 12);" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_4_native_query_select(self): - query = "SELECT * FROM hari;" - result = self.handler.query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is RESPONSE_TYPE.TABLE - - def test_6_get_columns(self): - columns = self.handler.get_columns('hari') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/teradata_handler/README.md b/mindsdb/integrations/handlers/teradata_handler/README.md deleted file mode 100644 index aaa707b8ffb..00000000000 --- a/mindsdb/integrations/handlers/teradata_handler/README.md +++ /dev/null @@ -1,101 +0,0 @@ ---- -title: Teradata -sidebarTitle: Teradata ---- - -This documentation describes the integration of MindsDB with [Teradata](https://www.teradata.com/why-teradata), the complete cloud analytics and data platform for Trusted AI. -The integration allows MindsDB to access data from Teradata and enhance Teradata with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect Teradata to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to Teradata from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/teradata_handler) as an engine. - -```sql -CREATE DATABASE teradata_datasource -WITH - ENGINE = 'teradata', - PARAMETERS = { - "host": "192.168.0.41", - "user": "demo_user", - "password": "demo_password", - "database": "example_db" - }; -``` - -Required connection parameters include the following: - -* `host`: The hostname, IP address, or URL of the Teradata server. -* `user`: The username for the Teradata database. -* `password`: The password for the Teradata database. - -Optional connection parameters include the following: - -* `database`: The name of the Teradata database to connect to. Defaults is the user's default database. - -## Usage - -Retrieve data from a specified table by providing the integration, database and table names: - -```sql -SELECT * -FROM teradata_datasource.database_name.table_name -LIMIT 10; -``` - -Run Teradata SQL queries directly on the connected Teradata database: - -```sql -SELECT * FROM teradata_datasource ( - - --Native Query Goes Here - SELECT emp_id, emp_name, job_duration AS tsp - FROM employee - EXPAND ON job_duration AS tsp BY INTERVAL '1' YEAR - FOR PERIOD(DATE '2006-01-01', DATE '2008-01-01'); - -); -``` - - -The above examples utilize `teradata_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Teradata database. -* **Checklist**: - 1. Make sure the Teradata database is active. - 2. Confirm that host, user and password are correct. Try a direct connection using a client like DBeaver. - 3. Ensure a stable network between MindsDB and Teradata. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel-data - * Incorrect: SELECT * FROM integration.'travel-data' - * Correct: SELECT * FROM integration.\`travel-data\` - - - -`Connection Timeout Error` - -* **Symptoms**: Connection to the Teradata database times out or queries take too long to execute. -* **Checklist**: - 1. Ensure the Teradata server is running and accessible (if the server has been idle for a long time, it may have shut down automatically). - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/teradata_handler/__about__.py b/mindsdb/integrations/handlers/teradata_handler/__about__.py deleted file mode 100644 index 01ddccf8f86..00000000000 --- a/mindsdb/integrations/handlers/teradata_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Teradata handler' -__package_name__ = 'mindsdb_teradata_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Teradata" -__author__ = 'Sudipto Ghosh' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022 - mindsdb' diff --git a/mindsdb/integrations/handlers/teradata_handler/__init__.py b/mindsdb/integrations/handlers/teradata_handler/__init__.py deleted file mode 100644 index 25110d36467..00000000000 --- a/mindsdb/integrations/handlers/teradata_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .teradata_handler import TeradataHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Teradata' -name = 'teradata' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/teradata_handler/connection_args.py b/mindsdb/integrations/handlers/teradata_handler/connection_args.py deleted file mode 100644 index 5555fcd9ff7..00000000000 --- a/mindsdb/integrations/handlers/teradata_handler/connection_args.py +++ /dev/null @@ -1,39 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The hostname, IP address, or URL of the Teradata server.', - 'required': True, - 'label': 'Host' - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The username for the Teradata database.', - 'required': True, - 'label': 'User' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password for the Teradata database.', - 'secret': True, - 'required': True, - 'label': 'Password' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': "The name of the Teradata database to connect to. Defaults is the user's default database.", - 'required': False, - 'label': 'Database' - } -) - -connection_args_example = OrderedDict( - host='192.168.0.41', - user='dbc', - password='dbc', - database='HR' -) diff --git a/mindsdb/integrations/handlers/teradata_handler/icon.svg b/mindsdb/integrations/handlers/teradata_handler/icon.svg deleted file mode 100644 index 1f0a9890769..00000000000 --- a/mindsdb/integrations/handlers/teradata_handler/icon.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/teradata_handler/requirements.txt b/mindsdb/integrations/handlers/teradata_handler/requirements.txt deleted file mode 100644 index 4fbb8efdbbf..00000000000 --- a/mindsdb/integrations/handlers/teradata_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -teradatasql -teradatasqlalchemy diff --git a/mindsdb/integrations/handlers/teradata_handler/teradata_handler.py b/mindsdb/integrations/handlers/teradata_handler/teradata_handler.py deleted file mode 100644 index 1233dba64d6..00000000000 --- a/mindsdb/integrations/handlers/teradata_handler/teradata_handler.py +++ /dev/null @@ -1,245 +0,0 @@ -from typing import Any, Dict, Text - -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from pandas import DataFrame -import teradatasql -from teradatasql import OperationalError -import teradatasqlalchemy.dialect as teradata_dialect - -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class TeradataHandler(DatabaseHandler): - """ - This handler handles the connection and execution of SQL statements on Teradata. - """ - - name = 'teradata' - - def __init__(self, name: Text, connection_data: Dict, **kwargs: Any) -> None: - """ - Initializes the handler. - - Args: - name (Text): The name of the handler instance. - connection_data (Dict): The connection data required to connect to the Teradata database. - kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def __del__(self) -> None: - """ - Closes the connection when the handler instance is deleted. - """ - if self.is_connected is True: - self.disconnect() - - def connect(self) -> teradatasql.TeradataConnection: - """ - Establishes a connection to the Teradata database. - - Raises: - ValueError: If the expected connection parameters are not provided. - teradatasql.OperationalError: If an error occurs while connecting to the Teradata database. - - Returns: - teradatasql.TeradataConnection: A connection object to the Teradata database. - """ - if self.is_connected is True: - return self.connection - - # Mandatory connection parameters. - if not all(key in self.connection_data for key in ['host', 'user', 'password']): - raise ValueError('Required parameters (host, user, password) must be provided.') - - config = { - 'host': self.connection_data.get('host'), - 'user': self.connection_data.get('user'), - 'password': self.connection_data.get('password') - } - - # Optional connection parameters. - if 'database' in self.connection_data: - config['database'] = self.connection_data.get('database') - - try: - self.connection = teradatasql.connect( - **config, - ) - self.is_connected = True - return self.connection - except OperationalError as operational_error: - logger.error(f'Error connecting to Teradata, {operational_error}!') - raise - except Exception as unknown_error: - logger.error(f'Unknown error connecting to Teradata, {unknown_error}!') - raise - - def disconnect(self) -> None: - """ - Closes the connection to the Teradata database if it's currently open. - """ - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the Teradata database. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - connection = self.connect() - with connection.cursor() as cur: - cur.execute('SELECT 1 FROM (SELECT 1 AS "dual") AS "dual"') - response.success = True - except (OperationalError, ValueError) as known_error: - logger.error(f'Connection check to Teradata failed, {known_error}!') - response.error_message = str(known_error) - except Exception as unknown_error: - logger.error(f'Connection check to Teradata failed due to an unknown error, {unknown_error}!') - response.error_message = str(unknown_error) - - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: Text) -> Response: - """ - Executes a native SQL query on the Teradata database and returns the result. - - Args: - query (Text): The SQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - need_to_close = self.is_connected is False - - connection = self.connect() - with connection.cursor() as cur: - try: - cur.execute(query) - if not cur.description: - response = Response(RESPONSE_TYPE.OK) - else: - result = cur.fetchall() - response = Response( - RESPONSE_TYPE.TABLE, - DataFrame( - result, - columns=[x[0] for x in cur.description] - ) - ) - connection.commit() - except OperationalError as operational_error: - logger.error(f'Error running query: {query} on {self.connection_data["database"]}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(operational_error) - ) - connection.rollback() - except Exception as unknown_error: - logger.error(f'Unknown error running query: {query} on {self.connection_data["database"]}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(unknown_error) - ) - connection.rollback() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Executes a SQL query represented by an ASTNode on the Teradata database and retrieves the data (if any). - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. - """ - renderer = SqlalchemyRender(teradata_dialect.TeradataDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Retrieves a list of all non-system tables in the Teradata database. - - Returns: - Response: A response object containing a list of tables in the Teradata database. - """ - query = f""" - SELECT - TableName AS table_name, - TableKind AS table_type - FROM DBC.TablesV - WHERE DatabaseName = '{self.connection_data.get('database') if self.connection_data.get('database') else self.connection_data.get('user')}' - AND (TableKind = 'T' - OR TableKind = 'O' - OR TableKind = 'Q' - OR TableKind = 'V') - """ - result = self.native_query(query) - - df = result.data_frame - df['table_type'] = df['table_type'].apply(lambda x: 'VIEW' if x == 'V' else 'BASE TABLE') - - result.data_frame = df - return result - - def get_columns(self, table_name: Text) -> Response: - """ - Retrieves column details for a specified table in the Teradata database. - - Args: - table_name (Text): The name of the table for which to retrieve column information. - - Raises: - ValueError: If the 'table_name' is not a valid string. - - Returns: - Response: A response object containing the column details. - """ - if not table_name or not isinstance(table_name, str): - raise ValueError("Invalid table name provided.") - - query = f""" - SELECT ColumnName AS "Field", - ColumnType AS "Type" - FROM DBC.ColumnsV - WHERE DatabaseName = '{self.connection_data.get('database') if self.connection_data.get('database') else self.connection_data.get('user')}' - AND TableName = '{table_name}' - """ - - return self.native_query(query) diff --git a/mindsdb/integrations/handlers/teradata_handler/tests/test_teradata_handler.py b/mindsdb/integrations/handlers/teradata_handler/tests/test_teradata_handler.py deleted file mode 100644 index a1e7a085032..00000000000 --- a/mindsdb/integrations/handlers/teradata_handler/tests/test_teradata_handler.py +++ /dev/null @@ -1,69 +0,0 @@ -import os -import unittest - -from mindsdb.integrations.handlers.teradata_handler.teradata_handler import TeradataHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -""" -CREATE -DATABASE HR -AS PERMANENT = 60e6, -- 60MB - SPOOL = 120e6; -- 120MB - -CREATE -SET TABLE HR.Employees ( - GlobalID INTEGER, - FirstName VARCHAR(30), - LastName VARCHAR(30), - DateOfBirth DATE FORMAT 'YYYY-MM-DD', - JoinedDate DATE FORMAT 'YYYY-MM-DD', - DepartmentCode BYTEINT -) -UNIQUE PRIMARY INDEX ( GlobalID ); - -INSERT INTO HR.Employees (GlobalID, - FirstName, - LastName, - DateOfBirth, - JoinedDate, - DepartmentCode) -VALUES (101, - 'Adam', - 'Tworkowski', - '1980-01-05', - '2004-08-01', - 01); -""" - - -class TeradataHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": os.environ.get('TERADATA_HOST', 'localhost'), - "user": "dbc", - "password": "dbc", - "database": "HR" - } - cls.handler = TeradataHandler('test_teradata_handler', cls.kwargs) - - def test_0_connect(self): - assert self.handler.connect() - - def test_1_check_connection(self): - assert self.handler.check_connection().success is True - - def test_2_get_columns(self): - assert self.handler.get_columns('Employees').resp_type is not RESPONSE_TYPE.ERROR - - def test_3_get_tables(self): - assert self.handler.get_tables().resp_type is not RESPONSE_TYPE.ERROR - - def test_4_select_query(self): - query = 'SELECT * FROM HR.Employees WHERE GlobalID=101' - assert self.handler.query(query).resp_type is RESPONSE_TYPE.TABLE - - -if __name__ == "__main__": - unittest.main(failfast=True) diff --git a/mindsdb/integrations/handlers/tidb_handler/__about__.py b/mindsdb/integrations/handlers/tidb_handler/__about__.py deleted file mode 100644 index 1c45aea2316..00000000000 --- a/mindsdb/integrations/handlers/tidb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB TiDB handler' -__package_name__ = 'mindsdb_tidb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for TiDB" -__author__ = 'Ryan Leung' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/tidb_handler/__init__.py b/mindsdb/integrations/handlers/tidb_handler/__init__.py deleted file mode 100644 index 33613098b13..00000000000 --- a/mindsdb/integrations/handlers/tidb_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .tidb_handler import TiDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - - -title = 'TiDB' -name = 'tidb' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/tidb_handler/connection_args.py b/mindsdb/integrations/handlers/tidb_handler/connection_args.py deleted file mode 100644 index 9f3137cf664..00000000000 --- a/mindsdb/integrations/handlers/tidb_handler/connection_args.py +++ /dev/null @@ -1,36 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the TiDB server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the TiDB server.', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the TiDB server.' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the TiDB server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the TiDB server. Must be an integer.' - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=4000, - user='root', - password='password', - database='database' -) diff --git a/mindsdb/integrations/handlers/tidb_handler/icon.svg b/mindsdb/integrations/handlers/tidb_handler/icon.svg deleted file mode 100644 index 8d8b4c2f087..00000000000 --- a/mindsdb/integrations/handlers/tidb_handler/icon.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/tidb_handler/requirements.txt b/mindsdb/integrations/handlers/tidb_handler/requirements.txt deleted file mode 100644 index ee467569031..00000000000 --- a/mindsdb/integrations/handlers/tidb_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/mysql_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/tidb_handler/tests/__init__.py b/mindsdb/integrations/handlers/tidb_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/tidb_handler/tests/test_tidb_handler.py b/mindsdb/integrations/handlers/tidb_handler/tests/test_tidb_handler.py deleted file mode 100644 index 056d9368c21..00000000000 --- a/mindsdb/integrations/handlers/tidb_handler/tests/test_tidb_handler.py +++ /dev/null @@ -1,44 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.tidb_handler.tidb_handler import TiDBHandler - - -class TiDBHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "host": "localhost", - "port": "4000", - "user": "root", - "password": "root", - "database": "test", - "ssl": False - } - cls.handler = TiDBHandler('test_tidb_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_check_connection(self): - assert self.handler.check_connection() - - def test_2_native_query_show_dbs(self): - dbs = self.handler.native_query("SHOW DATABASES;") - assert isinstance(dbs, list) - - def test_3_get_tables(self): - tbls = self.handler.get_tables() - assert isinstance(tbls, list) - - def test_5_create_table(self): - try: - self.handler.native_query("CREATE TABLE test_tidb (test_col INT)") - except Exception: - pass - - def test_6_describe_table(self): - described = self.handler.get_columns("dt_test") - assert isinstance(described, list) - - def test_7_select_query(self): - query = "SELECT * FROM dt_test WHERE 'id'='a'" - self.handler.native_query(query) diff --git a/mindsdb/integrations/handlers/tidb_handler/tidb_handler.py b/mindsdb/integrations/handlers/tidb_handler/tidb_handler.py deleted file mode 100644 index 0fe85491b47..00000000000 --- a/mindsdb/integrations/handlers/tidb_handler/tidb_handler.py +++ /dev/null @@ -1,11 +0,0 @@ -from mindsdb.integrations.handlers.mysql_handler import Handler as MySQLHandler - - -class TiDBHandler(MySQLHandler): - """ - This handler handles connection and execution of the TiDB statements. - """ - name = 'tidb' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/timescaledb_handler/__init__.py b/mindsdb/integrations/handlers/timescaledb_handler/__init__.py index 1d89157bd60..189cb89b856 100644 --- a/mindsdb/integrations/handlers/timescaledb_handler/__init__.py +++ b/mindsdb/integrations/handlers/timescaledb_handler/__init__.py @@ -14,7 +14,7 @@ name = "timescaledb" type = HANDLER_TYPE.DATA icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY +support_level = HANDLER_SUPPORT_LEVEL.MINDSDB __all__ = [ "Handler", diff --git a/mindsdb/integrations/handlers/trino_handler/__about__.py b/mindsdb/integrations/handlers/trino_handler/__about__.py deleted file mode 100644 index 005535dc9dc..00000000000 --- a/mindsdb/integrations/handlers/trino_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Trino handler' -__package_name__ = 'mindsdb_trino_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Trino" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/trino_handler/__init__.py b/mindsdb/integrations/handlers/trino_handler/__init__.py deleted file mode 100644 index e8fb9797ded..00000000000 --- a/mindsdb/integrations/handlers/trino_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -try: - from .trino_handler import TrinoHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = 'Trino' -name = 'trino' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/trino_handler/icon.svg b/mindsdb/integrations/handlers/trino_handler/icon.svg deleted file mode 100644 index 9a8e83369bc..00000000000 --- a/mindsdb/integrations/handlers/trino_handler/icon.svg +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/trino_handler/requirements.txt b/mindsdb/integrations/handlers/trino_handler/requirements.txt deleted file mode 100644 index 08448033d7d..00000000000 --- a/mindsdb/integrations/handlers/trino_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -trino~=0.313.0 -pyhive diff --git a/mindsdb/integrations/handlers/trino_handler/tests/__init__.py b/mindsdb/integrations/handlers/trino_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/trino_handler/tests/test_trino_config.ini b/mindsdb/integrations/handlers/trino_handler/tests/test_trino_config.ini deleted file mode 100644 index 04cd5653138..00000000000 --- a/mindsdb/integrations/handlers/trino_handler/tests/test_trino_config.ini +++ /dev/null @@ -1,7 +0,0 @@ -[KERBEROS_CONFIG] -http_scheme = https -dialect = trino -ca_bundle = /etc/ssl/certs/ca-bundle.crt -hostname_override = GS.COM -port = 8080 -config = /etc/krb5.conf diff --git a/mindsdb/integrations/handlers/trino_handler/tests/test_trino_handler.py b/mindsdb/integrations/handlers/trino_handler/tests/test_trino_handler.py deleted file mode 100644 index 8d06bb1658e..00000000000 --- a/mindsdb/integrations/handlers/trino_handler/tests/test_trino_handler.py +++ /dev/null @@ -1,48 +0,0 @@ -import unittest - -from mindsdb.api.mysql.mysql_proxy.mysql_proxy import RESPONSE_TYPE -from mindsdb.integrations.handlers.trino_handler.trino_handler import TrinoHandler - - -class TrinoHandlerTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "qa.analytics.quantum.site.gs.com", - "port": "8090", - "user": "dqsvcuat", - "password": "", - "catalog": "gsam_dev2imddata_elastic", - "schema": "default", - "service_name": "HTTP/qa.analytics.quantum.site.gs.com", - "config_file_name": "test_trino_config.ini" - } - } - cls.handler = TrinoHandler('test_trino_handler', **cls.kwargs) - - def test_0_canary(self): - print('Running canary test') - assert True - print('Canary test ran successfully') - - def test_1_check_connection(self): - conn_status = self.handler.check_connection() - print('Trino connection status: ', conn_status) - assert conn_status.get('success') - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables - - def test_3_describe_table(self): - described = self.handler.get_columns("axioma_att_2021-12") - assert described['type'] is not RESPONSE_TYPE.ERROR - - # TODO: complete tests implementation - # def test_4_select_query(self): - # query = "SELECT * FROM data.test_mdb WHERE 'id'='1'" - # result = self.handler.query(query) - # assert result['type'] is RESPONSE_TYPE.TABLE - # diff --git a/mindsdb/integrations/handlers/trino_handler/trino_config.ini b/mindsdb/integrations/handlers/trino_handler/trino_config.ini deleted file mode 100644 index 7f16108bf45..00000000000 --- a/mindsdb/integrations/handlers/trino_handler/trino_config.ini +++ /dev/null @@ -1,7 +0,0 @@ -[KERBEROS_CONFIG] -http_scheme = https -dialect = trino -ca_bundle = /etc/ssl/certs/ca-bundle.crt -hostname_override = GS.COM -port = 8080 -config = /etc/krb5.conf \ No newline at end of file diff --git a/mindsdb/integrations/handlers/trino_handler/trino_config_provider.py b/mindsdb/integrations/handlers/trino_handler/trino_config_provider.py deleted file mode 100644 index 4231a4755fe..00000000000 --- a/mindsdb/integrations/handlers/trino_handler/trino_config_provider.py +++ /dev/null @@ -1,12 +0,0 @@ -from configparser import ConfigParser - - -class TrinoConfigProvider: - - def __init__(self, **kwargs): - self.config_file_name = kwargs.get('config_file_name') - self.config_parser = ConfigParser() - self.config_parser.read(self.config_file_name) - - def get_trino_kerberos_config(self): - return self.config_parser['KERBEROS_CONFIG'] diff --git a/mindsdb/integrations/handlers/trino_handler/trino_handler.py b/mindsdb/integrations/handlers/trino_handler/trino_handler.py deleted file mode 100644 index 8c5d88c90b2..00000000000 --- a/mindsdb/integrations/handlers/trino_handler/trino_handler.py +++ /dev/null @@ -1,182 +0,0 @@ -import re -from typing import Dict -import pandas as pd -from pyhive import sqlalchemy_trino -from mindsdb_sql_parser import parse_sql, ASTNode -from trino.auth import BasicAuthentication -from trino.dbapi import connect -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -logger = log.getLogger(__name__) - - -class TrinoHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Trino statements - - kerberos is not implemented yet - """ - - name = 'trino' - - def __init__(self, name, connection_data, **kwargs): - super().__init__(name) - self.parser = parse_sql - self.connection_data = connection_data - ''' - service_name = kwargs.get('service_name') - self.config_file_name = kwargs.get('config_file_name') - self.trino_config_provider = TrinoConfigProvider(config_file_name=self.config_file_name) - self.kerberos_config = self.trino_config_provider.get_trino_kerberos_config() - self.http_scheme = self.kerberos_config['http_scheme'] - self.dialect = self.kerberos_config['dialect'] - config = self.kerberos_config['config'] - hostname_override = self.kerberos_config['hostname_override'] - principal = f"{kwargs.get('user')}@{hostname_override}" - ca_bundle = self.kerberos_config['ca_bundle'] - self.auth_config = KerberosAuthentication(config=config, - service_name=service_name, - principal=principal, - ca_bundle=ca_bundle, - hostname_override=hostname_override) - ''' - self.connection = None - self.is_connected = False - self.with_clause = "" - - def connect(self): - """" - Handles the connection to a Trino instance. - """ - if self.is_connected is True: - return self.connection - - # option configuration - http_scheme = 'http' - auth = None - auth_config = None - password = None - - if 'auth' in self.connection_data: - auth = self.connection_data['auth'] - if 'password' in self.connection_data: - password = self.connection_data['password'] - if 'http_scheme' in self.connection_data: - http_scheme = self.connection_data['http_scheme'] - if 'with' in self.connection_data: - self.with_clause = self.connection_data['with'] - - if password and auth == 'kerberos': - raise Exception("Kerberos authorization doesn't support password.") - elif password: - auth_config = BasicAuthentication(self.connection_data['user'], password) - - if auth: - conn = connect( - host=self.connection_data['host'], - port=self.connection_data['port'], - user=self.connection_data['user'], - catalog=self.connection_data['catalog'], - schema=self.connection_data['schema'], - http_scheme=http_scheme, - auth=auth_config) - else: - conn = connect( - host=self.connection_data['host'], - port=self.connection_data['port'], - user=self.connection_data['user'], - catalog=self.connection_data['catalog'], - schema=self.connection_data['schema']) - - self.is_connected = True - self.connection = conn - return conn - - def check_connection(self) -> StatusResponse: - """ - Check the connection of the Trino instance - :return: success status and error message if error occurs - """ - response = StatusResponse(False) - - try: - connection = self.connect() - cur = connection.cursor() - cur.execute("SELECT 1") - response.success = True - except Exception as e: - logger.error(f'Error connecting to Trino {self.connection_data["schema"]}, {e}!') - response.error_message = str(e) - - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> Response: - """ - Receive SQL query and runs it - :param query: The SQL query to run in Trino - :return: returns the records from the current recordset - """ - try: - connection = self.connect() - cur = connection.cursor() - result = cur.execute(query) - if result and cur.description: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, - columns=[x[0] for x in cur.description] - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except Exception as e: - logger.error(f'Error connecting to Trino {self.connection_data["schema"]}, {e}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - return response - - def query(self, query: ASTNode) -> Response: - # Utilize trino dialect from sqlalchemy - # implement WITH clause as default for all table - # in future, this behavior should be changed to support more detail - # level - # also, for simple the current implement is using rendered query string - # another method that directly manipulate ASTNOde is prefered - renderer = SqlalchemyRender(sqlalchemy_trino.TrinoDialect) - query_str = renderer.get_string(query, with_failback=True) - modified_query_str = re.sub( - r"(?is)(CREATE.+TABLE.+\(.*\))", - f"\\1 {self.with_clause}", - query_str - ) - return self.native_query(modified_query_str) - - def get_tables(self) -> Response: - """ - List all tables in Trino - :return: list of all tables - """ - query = "SHOW TABLES" - response = self.native_query(query) - df = response.data_frame - response.data_frame = df.rename(columns={df.columns[0]: 'table_name'}) - return response - - def get_columns(self, table_name: str) -> Dict: - query = f'DESCRIBE "{table_name}"' - response = self.native_query(query) - return response diff --git a/mindsdb/integrations/handlers/tripadvisor_handler/README.md b/mindsdb/integrations/handlers/tripadvisor_handler/README.md deleted file mode 100644 index fc95f8114f8..00000000000 --- a/mindsdb/integrations/handlers/tripadvisor_handler/README.md +++ /dev/null @@ -1,153 +0,0 @@ -# TripAdvisor handler #5369 - -Custom Python Wrapper: tripadvisor_api.py - -This handler integrates with the [TripAdvisor API](https://tripadvisor-content-api.readme.io/reference/overview). This integration will enable TripAdvisor users to use ML to research travel destinations and more. The custom Python wrapper had to be made which can be found in the **tripadvisor_api.py**. - -Approved users of the Tripadvisor Content API can access the following business details for accommodations, restaurants, and attractions: - -- Location ID, name, address, latitude & longitude -- Read reviews link, write-a-review link -- Overall rating, ranking, subratings, awards, the number of reviews the rating is based on, rating bubbles image -- Price level symbol, accommodation category/subcategory, attraction type, restaurant cuisine(s) - -# Connecting to TripAdvisor and Calling APIs - -CREATE DATABASE my_tripadvisor -WITH -ENGINE = 'tripadvisor' -PARAMETERS = {'api_key': 'INSERT YOUR API KEY'}; - -This is how you start the connection to TripAdvisor API via MindsDB. - -## Find Search API Reference - -The Location Search request returns up to 10 locations found by the given search query. -You can use category ("hotels", "attractions", "restaurants", "geos"), phone number, address, and latitude/longtitude to search with more accuracy. - -``` -SELECT * -FROM my_tripadvisor.searchLocationTable -WHERE searchQuery = 'New York'; -``` - -The details of this API reference can be found here: https://tripadvisor-content-api.readme.io/reference/searchforlocations - -## Location Details API reference - -A Location Details request returns comprehensive information about a location (hotel, restaurant, or an attraction) such as name, address, rating, and URLs for the listing on Tripadvisor. - -``` -SELECT * -FROM my_tripadvisor.locationDetailsTable -WHERE locationId = '23322232'; -``` - -The details of this API reference can be found here: https://tripadvisor-content-api.readme.io/reference/getlocationdetails - -## Location Reviews API reference - -The Location Reviews request returns up to 5 of the most recent reviews for a specific location. - -``` -SELECT * -FROM my_tripadvisor.reviewsTable -WHERE locationId = '99288'; -``` - -The details of this API reference can be found here: https://tripadvisor-content-api.readme.io/reference/getlocationreviews - -## Location Photos API reference - -The Location Reviews request returns up to 5 of the most recent reviews for a specific location. - -``` -SELECT * -FROM my_tripadvisor.photosTable -WHERE location_id = '99288'; -``` - -The details of this API reference can be found here: https://tripadvisor-content-api.readme.io/reference/getlocationphotos - -## Location Nearby API reference - -The Location Reviews request returns up to 5 of the most recent reviews for a specific location. - -``` -SELECT * -FROM my_tripadvisor.nearbyLocationTable -WHERE latLong = '40.780825, -73.972781'; -``` - -The details of this API reference can be found here: https://tripadvisor-content-api.readme.io/reference/searchfornearbylocations - -# Use Case: Sentiment Analysis of Reviews from TripAdvisor with OpenAI - -## Search Hotels in New York and Choose one - -We will search for the hotels in New York in the area around the given longitude and latitude. If we execute the -SQL script then we will get the following result which is displayed below the SQL script. We will choose -"Arthouse Hotel New York City" to see the reviews. - -``` -SELECT * -FROM my_tripadvisor.searchLocationTable -WHERE searchQuery='New York' and latLong='40.780825, -73.972781' and category='hotels'; -``` - -### Result: - -| location_id | name | distance | rating | bearing | street1 | street2 | city | state | country | postalcode | address_string | phone | latitude | longitude | -| ----------- | ---------------------------- | ------------------ | ------ | ------- | --------------------------------- | ------- | ------------- | -------- | ------------- | ---------- | --------------------------------------------------------------- | ------ | -------- | --------- | -| **99288** | Arthouse Hotel New York City | 0.4528974982904253 | [NULL] | west | 2178 Broadway at West 77th Street | [NULL] | New York City | New York | United States | 10024-6647 | 2178 Broadway at West 77th Street, New York City, NY 10024-6647 | [NULL] | [NULL] | [NULL] | -| 112064 | Warwick New York | 1.300511583690564 | [NULL] | south | 65 W 54th St | | New York City | New York | United States | 10019 | 65 W 54th St, New York City, NY 10019 | [NULL] | [NULL] | [NULL] | -| 611947 | New York Hilton Midtown | 1.3284012949631072 | [NULL] | south | 1335 Avenue Of The Americas | [NULL] | New York City | New York | United States | 10019-6078 | 1335 Avenue Of The Americas, New York City, NY 10019-6078 | [NULL] | [NULL] | [NULL] | - -. -. -. - -## Creating the OpenAI model - -Creating the model and the prompt. - -``` -CREATE MODEL sentiment_classifier_gpt_tripadvisor -PREDICT sentiment -USING -engine = 'openai', -prompt_template = 'describe the sentiment of the reviews -strictly as "positive", "neutral", or "negative". -"I love the product":positive -"It is a scam":negative -"{{review}}.":', -api_key='Your API KEY'; -``` - -## Perform sentiment analysis - -``` -SELECT input.text_review, output.sentiment -FROM my_tripadvisor.reviewsTable AS input -JOIN sentiment_classifier_gpt_tripadvisor AS output -WHERE input.locationId = '99288'; -``` - -### Result: - -| text_review | sentiment | -| ----------- | --------- | - -| Stayed here numerous times over the years and loved the hotel. This visit was disappointing from start to finish. No hand soap in room and told to go to CVS to purchase some. Rude staff at check-in. Don't know what happened to the once best hotel on the Upper West! | negative | - -| Physically the room was appalling. The undersized bathroom you could not actually close the door without squeezing in between the metal (?) vanity and toilet. No chair or table nor room to do any exercising or stretching. You will need to stretch to be able to close the bath door and open the shower door. Most hotels will have auto close room doors balance so they auto lock. Not the Arthouse... here you better make sure you hear the click on the lock which takes considerable force. The poor balancing will leave your room wide open. The staff is courteous but wonder about their veracity as they told us our rooms were "upgraded" to a king room. If this was an upgrade ....sheesh hate to have seen the original room. No mention was made of the "urban fee" which provided dicey wifi with a deceptive title. The SeraFina restaurant was disappointing ans it appears wo be more rundown than the hotel with cracked vinyl benches for seating with average Italian food at best. NYC has many fine boutique Hotels..... this isn't one of them. | negative | - -| Hotel is just ok at best and is incredibly strict about cancellation policies. I would avoid booking here. It is not nice enough to warrant choosing this over another hotel in the area with better customer service. | negative | - -| Although we have stayed here before, this was a horrible experience! I booked the Broadway Deluxe King Room in the picture which looks very comfortable but they gave us a room 1/3 smaller and claimed it was the "same category" as the one I booked. We've stayed in the room before and it was quite a good size, had an extra chair, good sized closet and bathroom. This room was tiny, had no extra chair and the bathroom was so small one could barely turn around in it. When I complained to the manager he gave me a credit of the "resort fee" and said the exact line of rooms in the picture wasn't available. They shouldn't show a picture of a room on the website that is substantially different than the one a guest stays in. - -In addition, the shade in the room was broken, it took 3 requests over 2 days to get a few extra towels and there wasn't hot (only lukewarm) water for the first two days of our stay despite multiple complaints. - -I would avoid this hotel in the future. | negative | - -| We were in the neighborhood and dropped into Arthouse's lobby bar with live music. We ended up staying for a few hours and had an absolutely lovely time. Shout out to our server Elviz for the quick service and to the piano player Greg for a memorable night! The best music we've heard in a long time. Lovely hotel and would highly recommend! We will be back | positive | diff --git a/mindsdb/integrations/handlers/tripadvisor_handler/__about__.py b/mindsdb/integrations/handlers/tripadvisor_handler/__about__.py deleted file mode 100644 index 1f17bf29a54..00000000000 --- a/mindsdb/integrations/handlers/tripadvisor_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB TripAdvisor handler" -__package_name__ = "mindsdb_tripadvisor_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Tripadvisor" -__author__ = "Ton Hoang Nguyen (Bill)" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/tripadvisor_handler/__init__.py b/mindsdb/integrations/handlers/tripadvisor_handler/__init__.py deleted file mode 100644 index 1a5bd7d000e..00000000000 --- a/mindsdb/integrations/handlers/tripadvisor_handler/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .tripadvisor_handler import TripAdvisorHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "TripAdvisor" -name = "tripadvisor" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/tripadvisor_handler/icon.svg b/mindsdb/integrations/handlers/tripadvisor_handler/icon.svg deleted file mode 100644 index f5107c072f8..00000000000 --- a/mindsdb/integrations/handlers/tripadvisor_handler/icon.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_api.py b/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_api.py deleted file mode 100644 index c2a47ecb461..00000000000 --- a/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_api.py +++ /dev/null @@ -1,215 +0,0 @@ -import requests -from requests import Response -from enum import Enum - - -class TripAdvisorAPICall(Enum): - """TripAdvisor API references""" - - SEARCH_LOCATION = 1 - LOCATION_DETAILS = 2 - PHOTOS = 3 - REVIEWS = 4 - NEARBY_SEARCH = 5 - - -class TripAdvisorAPI: - """A class for checking the connection to the TripAdvisor Content API and making requests. - - Attributes: - api_key (str): The unique API key to access Tripadvisor content. - """ - - def __init__(self, api_key): - self.api_key = api_key - - def checkTripAdvisorConnection(self): - """ - Check the connection with TripAdvisor - """ - url = "https://api.content.tripadvisor.com/api/v1/location/search?language=en&key={api_key}&searchQuery={searchQuery}".format( - api_key=self.api_key, searchQuery="London" - ) - - headers = {"accept": "application/json"} - response = requests.get(url, headers=headers) - status_code = response.status_code - - if status_code >= 400 and status_code <= 499: - raise Exception("Client error: " + response.text) - - if status_code >= 500 and status_code <= 599: - raise Exception("Server error: " + response.text) - - def getResponse(self, url: str) -> Response: - """ - Getting a response from the API call - """ - headers = {"accept": "application/json"} - response = requests.get(url, headers=headers) - return response - - def getURLQuery(self, url: str, params_dict: dict) -> str: - """ - Processing the query and adding parameters to the URL - """ - for idx, (queryParam, value) in enumerate(params_dict.items()): - if value is not None or value != "": - if value != "" and any( - next_value != "" or value is not None - for next_value in list(params_dict.values())[idx + 1:] - ): - url += "{queryParam}={value}&".format( - queryParam=queryParam, value=value - ) - else: - url += "{queryParam}={value}".format( - queryParam=queryParam, value=value - ) - return url - - def location_search( - self, - url: str, - params_dict: dict, - language: str = "en", - ) -> Response: - """ - The Location Search request returns up to 10 locations found by the given search query. You can use category ("hotels", "attractions", "restaurants", "geos"), - phone number, address, and latitude/longtitude to search with more accuracy. - - Args: - searchQuery (str): Text to use for searching based on the name of the location. - category (str): Filters result set based on property type. Valid options are "hotels", "attractions", "restaurants", and "geos". - phone (str): Phone number to filter the search results by (this can be in any format with spaces and dashes but without the "+" sign at the beginning). - address (str): Address to filter the search results by. - latLong (str): Latitude/Longitude pair to scope down the search around a specifc point - eg. "42.3455,-71.10767". - radius (int): Length of the radius from the provided latitude/longitude pair to filter results. - radiusUnit (str): Unit for length of the radius. Valid options are "km", "mi", "m" (km=kilometers, mi=miles, m=meters. - language (str): The language in which to return results (e.g. "en" for English or "es" for Spanish) from the list of our Supported Languages. - - Returns: - response: Response object with response data as application/json - """ - - url = url + "search?language={language}&key={api_key}&".format( - api_key=self.api_key, language=language - ) - - url = self.getURLQuery(url, params_dict) - response = self.getResponse(url) - - return response - - def location_details( - self, url: str, params_dict: dict, locationId: str, language: str = "en" - ) -> Response: - """ - A Location Details request returns comprehensive information about a location (hotel, restaurant, or an attraction) such as name, address, rating, and URLs for the listing - on Tripadvisor. - - Args: - locationId (str): A unique identifier for a location on Tripadvisor. The location ID can be obtained using the Location Search. - language (str): The language in which to return results (e.g. "en" for English or "es" for Spanish) from the list of our Supported Languages. - currency (str): The currency code to use for request and response (should follow ISO 4217). - - Returns: - response (Response): Response object with response data as application/json - """ - url = url + "{locationId}/details?language={language}&key={api_key}&".format( - locationId=locationId, api_key=self.api_key, language=language - ) - url = self.getURLQuery(url, params_dict) - response = self.getResponse(url) - return response - - def location_reviews( - self, url: str, locationId: str, language: str = "en" - ) -> Response: - """ - The Location Reviews request returns up to 5 of the most recent reviews for a specific location. Please note that the limits are different for the beta subscribers. - - Args: - locationId (str): A unique identifier for a location on Tripadvisor. The location ID can be obtained using the Location Search. - language (str): The language in which to return results (e.g. "en" for English or "es" for Spanish) from the list of our Supported Languages. - - Returns: - response: Response object with response data as application/json - """ - url = url + "{locationId}/reviews?language={language}&key={api_key}&".format( - locationId=locationId, api_key=self.api_key, language=language - ) - - response = self.getResponse(url) - return response - - def location_photos(self, url: str, locationId: str, language: str = "en") -> Response: - """ - The Location Photos request returns up to 5 high-quality photos for a specific location. Please note that the limits are different for the beta subscribers. - You need to upgrade to get the higher limits mentioned here. The photos are ordered by recency. - - Args: - locationId (str): A unique identifier for a location on Tripadvisor. The location ID can be obtained using the Location Search. - language (str): The language in which to return results (e.g. "en" for English or "es" for Spanish) from the list of our Supported Languages. - - Returns: - response: Response object with response data as application/json - """ - url = url + "{locationId}/photos?language={language}&key={api_key}".format(locationId=locationId, language=language, api_key=self.api_key) - response = self.getResponse(url) - return response - - def location_nearby_search(self, url: str, params_dict: dict, language: str = "en") -> Response: - """ - The Nearby Location Search request returns up to 10 locations found near the given latitude/longtitude. - You can use category ("hotels", "attractions", "restaurants", "geos"), phone number, address to search with more accuracy. - - Args: - latLong (str): Latitude/Longitude pair to scope down the search around a specifc point - eg. "42.3455,-71.10767". - category (str): Filters result set based on property type. Valid options are "hotels", "attractions", "restaurants", and "geos". - phone (str): Phone number to filter the search results by (this can be in any format with spaces and dashes but without the "+" sign at the beginning). - address (str): Address to filter the search results by. - radius (int): Length of the radius from the provided latitude/longitude pair to filter results. - radiusUnit (str): Unit for length of the radius. Valid options are "km", "mi", "m" (km=kilometers, mi=miles, m=meters. - language (str): The language in which to return results (e.g. "en" for English or "es" for Spanish) from the list of our Supported Languages. - - Returns: - response: Response object with response data as application/json - """ - - url = url + "nearby_search?language={language}&key={api_key}&".format( - api_key=self.api_key, language=language - ) - - url = self.getURLQuery(url, params_dict) - response = self.getResponse(url) - return response - - def getTripAdvisorData(self, apiCall, **params): - """ - Making a request based on the query and receive data from TripAdvisor. - """ - url = "https://api.content.tripadvisor.com/api/v1/location/" - params_dict = params - - if apiCall == TripAdvisorAPICall.SEARCH_LOCATION: - response = self.location_search(url, params_dict) - return response.json()["data"] - - elif apiCall == TripAdvisorAPICall.LOCATION_DETAILS: - response = self.location_details( - url, params_dict, params_dict["locationId"] - ) - return response.json() - - elif apiCall == TripAdvisorAPICall.REVIEWS: - response = self.location_reviews(url, params_dict["locationId"]) - return response.json()["data"] - - elif apiCall == TripAdvisorAPICall.PHOTOS: - response = self.location_photos(url, params_dict["locationId"]) - return response.json()["data"] - - elif apiCall == TripAdvisorAPICall.NEARBY_SEARCH: - response = self.location_nearby_search(url, params_dict) - return response.json()["data"] diff --git a/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_handler.py b/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_handler.py deleted file mode 100644 index 28980d7acff..00000000000 --- a/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_handler.py +++ /dev/null @@ -1,275 +0,0 @@ -import os - -import pandas as pd - -from mindsdb.utilities import log -from mindsdb.utilities.config import Config - -from mindsdb.integrations.libs.api_handler import APIHandler - -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) - -from .tripadvisor_table import SearchLocationTable -from .tripadvisor_table import LocationDetailsTable -from .tripadvisor_table import ReviewsTable -from .tripadvisor_table import PhotosTable -from .tripadvisor_table import NearbyLocationTable -from .tripadvisor_api import TripAdvisorAPI -from .tripadvisor_api import TripAdvisorAPICall - -logger = log.getLogger(__name__) - - -class TripAdvisorHandler(APIHandler): - """A class for handling connections and interactions with the TripAdvisor Content API. - - Attributes: - api_key (str): The unique API key to access Tripadvisor content. - api (TripAdvisorAPI): The `TripAdvisorAPI` object for checking the connection to the TripAdvisor API. - """ - - def __init__(self, name=None, **kwargs): - super().__init__(name) - - args = kwargs.get("connection_data", {}) - self._tables = {} - - self.connection_args = {} - handler_config = Config().get("tripadvisor_handler", {}) - for k in ["api_key"]: - if k in args: - self.connection_args[k] = args[k] - elif f"TRIPADVISOR_{k.upper()}" in os.environ: - self.connection_args[k] = os.environ[f"TRIPADVISOR_{k.upper()}"] - elif k in handler_config: - self.connection_args[k] = handler_config[k] - - self.api = None - self.is_connected = False - - tripAdvisor = SearchLocationTable(self) - self._register_table("searchLocationTable", tripAdvisor) - - tripAdvisorLocationDetails = LocationDetailsTable(self) - self._register_table("locationDetailsTable", tripAdvisorLocationDetails) - - tripAdvisorReviews = ReviewsTable(self) - self._register_table("reviewsTable", tripAdvisorReviews) - - tripAdvisorPhotos = PhotosTable(self) - self._register_table("photosTable", tripAdvisorPhotos) - - tripAdvisorNearbyLocation = NearbyLocationTable(self) - self._register_table("nearbyLocationTable", tripAdvisorNearbyLocation) - - def connect(self, api_version=2): - """Check the connection with TripAdvisor API""" - - if self.is_connected is True: - return self.api - - self.api = TripAdvisorAPI(api_key=self.connection_args["api_key"]) - - self.is_connected = True - return self.api - - def check_connection(self) -> StatusResponse: - """This function evaluates if the connection is alive and healthy""" - response = StatusResponse(False) - - try: - api = self.connect() - - # make a random http call with searching a location. - # it raises an error in case if auth is not success and returns not-found otherwise - api.connectTripAdvisor() - response.success = True - - except Exception as e: - response.error_message = f"Error connecting to TripAdvisor api: {e}" - logger.error(response.error_message) - - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def call_tripadvisor_searchlocation_api( - self, method_name: str = None, params: dict = None - ) -> pd.DataFrame: - """It processes the JSON data from the call and transforms it into pandas.Dataframe""" - if self.is_connected is False: - self.connect() - - locations = self.api.getTripAdvisorData( - TripAdvisorAPICall.SEARCH_LOCATION, **params - ) - result = [] - - for loc in locations: - data = { - "location_id": loc.get("location_id"), - "name": loc.get("name"), - "distance": loc.get("distance"), - "rating": loc.get("rating"), - "bearing": loc.get("bearing"), - "street1": loc.get("address_obj").get("street1"), - "street2": loc.get("address_obj").get("street2"), - "city": loc.get("address_obj").get("city"), - "state": loc.get("address_obj").get("state"), - "country": loc.get("address_obj").get("country"), - "postalcode": loc.get("address_obj").get("postalcode"), - "address_string": loc.get("address_obj").get("address_string"), - "phone": loc.get("address_obj").get("phone"), - "latitude": loc.get("address_obj").get("latitude"), - "longitude": loc.get("address_obj").get("longitude"), - } - result.append(data) - result = pd.DataFrame(result) - return result - - def call_tripadvisor_location_details_api( - self, method_name: str = None, params: dict = None - ) -> pd.DataFrame: - """It processes the JSON data from the call and transforms it into pandas.Dataframe""" - if self.is_connected is False: - self.connect() - - loc = self.api.getTripAdvisorData(TripAdvisorAPICall.LOCATION_DETAILS, **params) - result = [] - - data = { - "location_id": loc.get("location_id"), - "name": loc.get("name"), - "distance": loc.get("distance"), - "rating": loc.get("rating"), - "bearing": loc.get("bearing"), - "street1": loc.get("address_obj").get("street1"), - "street2": loc.get("address_obj").get("street2"), - "city": loc.get("address_obj").get("city"), - "state": loc.get("address_obj").get("state"), - "country": loc.get("address_obj").get("country"), - "postalcode": loc.get("address_obj").get("postalcode"), - "address_string": loc.get("address_obj").get("address_string"), - "phone": loc.get("address_obj").get("phone"), - "latitude": loc.get("address_obj").get("latitude"), - "longitude": loc.get("address_obj").get("longitude"), - "web_url": loc.get("web_url"), - "timezone": loc.get("timezone"), - "email": loc.get("email"), - "website": loc.get("website"), - "write_review": loc.get("write_review"), - "ranking_data": str(loc.get("ranking_data")), - "rating_image_url": loc.get("rating_image_url"), - "num_reviews": loc.get("num_reviews"), - "review_rating_count": loc.get("review_rating_count"), - "subratings": loc.get("subratings"), - "photo_count": loc.get("photo_count"), - "see_all_photos": loc.get("see_all_photos"), - "price_level": loc.get("price_level"), - "parent_brand": loc.get("parent_brand"), - "brand": loc.get("brand"), - "ancestors": str(loc.get("ancestors")), - "periods": str(loc.get("hours").get("periods")) - if loc.get("hours") is not None - else None, - "weekday": str(loc.get("hours").get("weekday_text")) - if loc.get("weekday") is not None - else None, - "amenities": str(loc.get("amenities")), - "features": str(loc.get("features")), - "cuisines": str(loc.get("cuisine")), - "styles": str(loc.get("styles")), - "neighborhood_info": str(loc.get("neighborhood_info")), - "awards": str(loc.get("awards")), - "trip_types": str(loc.get("trip_types")), - "groups": str(loc.get("groups")), - } - - result.append(data) - - result = pd.DataFrame(result) - return result - - def call_tripadvisor_reviews_api( - self, method_name: str = None, params: dict = None - ) -> pd.DataFrame: - """It processes the JSON data from the call and transforms it into pandas.Dataframe""" - if self.is_connected is False: - self.connect() - - locations = self.api.getTripAdvisorData(TripAdvisorAPICall.REVIEWS, **params) - result = [] - - for loc in locations: - data = { - "id": loc.get("id"), - "lang": loc.get("lang"), - "location_id": loc.get("location_id"), - "published_date": loc.get("published_date"), - "rating": loc.get("rating"), - "helpful_votes": loc.get("helpful_votes"), - "rating_image_url": loc.get("rating_image_url"), - "url": loc.get("url"), - "trip_type": loc.get("trip_type"), - "travel_date": loc.get("travel_date"), - "text_review": loc.get("text"), - "title": loc.get("title"), - "owner_response": loc.get("owner_response"), - "is_machine_translated": loc.get("is_machine_translated"), - "user": str(loc.get("user")), - "subratings": str(loc.get("subratings")), - } - result.append(data) - result = pd.DataFrame(result) - return result - - def call_tripadvisor_photos_api( - self, method_name: str = None, params: dict = None - ) -> pd.DataFrame: - """It processes the JSON data from the call and transforms it into pandas.Dataframe""" - if self.is_connected is False: - self.connect() - - locations = self.api.getTripAdvisorData(TripAdvisorAPICall.PHOTOS, **params) - result = [] - - for loc in locations: - data = { - "id": loc.get("id"), - "is_blessed": loc.get("is_blessed"), - "album": loc.get("album"), - "caption": loc.get("caption"), - "published_date": loc.get("published_date"), - "images": str(loc.get("images")), - "source": str(loc.get("source")), - "user": str(loc.get("user")), - } - result.append(data) - result = pd.DataFrame(result) - return result - - def call_tripadvisor_nearby_location_api( - self, method_name: str = None, params: dict = None - ) -> pd.DataFrame: - """It processes the JSON data from the call and transforms it into pandas.Dataframe""" - if self.is_connected is False: - self.connect() - - locations = self.api.getTripAdvisorData(TripAdvisorAPICall.NEARBY_SEARCH, **params) - result = [] - - for loc in locations: - data = { - "location_id": loc.get("location_id"), - "name": loc.get("name"), - "distance": loc.get("distance"), - "rating": loc.get("rating"), - "bearing": loc.get("bearing"), - "address_obj": str(loc.get("address_obj")), - } - result.append(data) - result = pd.DataFrame(result) - return result diff --git a/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_table.py b/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_table.py deleted file mode 100644 index 4462d287a0a..00000000000 --- a/mindsdb/integrations/handlers/tripadvisor_handler/tripadvisor_table.py +++ /dev/null @@ -1,477 +0,0 @@ -import pandas as pd -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb_sql_parser import ast -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions - - -class SearchLocationTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - """Select data from the search_location table and return it as a pandas DataFrame. - - Args: - query (ast.Select): The SQL query to be executed. - - Returns: - pandas.DataFrame: A pandas DataFrame containing the selected data. - """ - - self.handler.connect() - - conditions = extract_comparison_conditions(query.where) - - allowed_keys = set( - [ - "searchQuery", - "category", - "phone", - "address", - "latLong", - "radius", - "radiusUnit", - "language", - ] - ) - - params = {} - filters = [] - for op, arg1, arg2 in conditions: - if op == "or": - raise NotImplementedError("OR is not supported") - elif op == "=" and arg1 in allowed_keys: - params[arg1] = arg2 - elif op != "=": - raise NotImplementedError(f"Unknown op: {op}") - else: - filters.append([op, arg1, arg2]) - - if query.limit is not None: - params["max_results"] = query.limit.value - - if "searchQuery" not in params and "latLong" not in params: - # search not works without searchQuery, use 'London' - params["searchQuery"] = "London" - - result = self.handler.call_tripadvisor_searchlocation_api(params=params) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - return result - - def get_columns(self): - """Get the list of column names for the search_location table. - - Returns: - list: A list of column names for the search_location table. - """ - return [ - "location_id", - "name", - "distance", - "rating", - "bearing", - "street1", - "street2", - "city", - "state", - "country", - "postalcode", - "address_string", - "phone", - "latitude", - "longitude", - ] - - -class LocationDetailsTable(APITable): - result_json = [] - - def select(self, query: ast.Select) -> pd.DataFrame: - """Select data from the location_details table and return it as a pandas DataFrame. - - Args: - query (ast.Select): The SQL query to be executed. - - Returns: - pandas.DataFrame: A pandas DataFrame containing the selected data. - """ - - self.handler.connect() - - conditions = extract_comparison_conditions(query.where) - - allowed_keys = set(["locationId", "currency", "language"]) - - params = {} - filters = [] - for op, arg1, arg2 in conditions: - if op == "or": - raise NotImplementedError("OR is not supported") - elif op == "=" and arg1 in allowed_keys: - params[arg1] = arg2 - elif op != "=": - raise NotImplementedError(f"Unknown op: {op}") - else: - filters.append([op, arg1, arg2]) - - if query.limit is not None: - params["max_results"] = query.limit.value - - if "locationId" not in params: - # search not works without searchQuery, use 'London' - params["locationId"] = "23322232" - - result = self.handler.call_tripadvisor_location_details_api(params=params) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - return result - - def get_columns(self): - """Get the list of column names for the location_details table. - - Returns: - list: A list of column names for the location_details table. - """ - return [ - "location_id", - "distance", - "name", - "description", - "web_url", - "street1", - "street2", - "city", - "state", - "country", - "postalcode", - "address_string", - "latitude", - "longitude", - "timezone", - "email", - "phone", - "website", - "write_review", - "ranking_data", - "rating", - "rating_image_url", - "num_reviews", - "photo_count", - "see_all_photos", - "price_level", - "brand", - "parent_brand", - "ancestors", - "periods", - "weekday", - "features", - "cuisines", - "amenities", - "trip_types", - "styles", - "awards", - "neighborhood_info", - "parent_brand", - "brand", - "groups", - ] - - -class ReviewsTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - """Select data from the reviews table and return it as a pandas DataFrame. - - Args: - query (ast.Select): The SQL query to be executed. - - Returns: - pandas.DataFrame: A pandas DataFrame containing the selected data. - """ - - self.handler.connect() - - conditions = extract_comparison_conditions(query.where) - - allowed_keys = set(["locationId", "language"]) - - params = {} - filters = [] - for op, arg1, arg2 in conditions: - if op == "or": - raise NotImplementedError("OR is not supported") - elif op == "=" and arg1 in allowed_keys: - params[arg1] = arg2 - elif op != "=": - raise NotImplementedError(f"Unknown op: {op}") - else: - filters.append([op, arg1, arg2]) - - if query.limit is not None: - params["max_results"] = query.limit.value - - if "locationId" not in params: - # search not works without searchQuery, use 'London' - params["locationId"] = "23322232" - - result = self.handler.call_tripadvisor_reviews_api(params=params) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - return result - - def get_columns(self): - """Get the list of column names for the reviews table. - - Returns: - list: A list of column names for the reviews table. - """ - return [ - "id", - "lang", - "location_id", - "published_date", - "rating", - "helpful_votes", - "rating_image_url", - "url", - "trip_type", - "travel_date", - "text_review", - "title", - "owner_response", - "is_machine_translated", - "user", - "subratings", - ] - - -class PhotosTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - """Select data from the photos table and return it as a pandas DataFrame. - - Args: - query (ast.Select): The SQL query to be executed. - - Returns: - pandas.DataFrame: A pandas DataFrame containing the selected data. - """ - - conditions = extract_comparison_conditions(query.where) - - allowed_keys = set(["locationId", "language"]) - - params = {} - filters = [] - for op, arg1, arg2 in conditions: - if op == "or": - raise NotImplementedError("OR is not supported") - elif op == "=" and arg1 in allowed_keys: - params[arg1] = arg2 - elif op != "=": - raise NotImplementedError(f"Unknown op: {op}") - else: - filters.append([op, arg1, arg2]) - - if query.limit is not None: - params["max_results"] = query.limit.value - - if "locationId" not in params: - params["locationId"] = "23322232" - - result = self.handler.call_tripadvisor_photos_api(params=params) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - return result - - def get_columns(self): - """Get the list of column names for the photos table. - - Returns: - list: A list of column names for the photos table. - """ - return [ - "id", - "is_blessed", - "album", - "caption", - "published_date", - "images", - "source", - "user", - ] - - -class NearbyLocationTable(APITable): - def select(self, query: ast.Select) -> pd.DataFrame: - """Select data from the nearby_location table and return it as a pandas DataFrame. - - Args: - query (ast.Select): The SQL query to be executed. - - Returns: - pandas.DataFrame: A pandas DataFrame containing the selected data. - """ - - conditions = extract_comparison_conditions(query.where) - - allowed_keys = set(["latLong", "language", "category", "phone", "address", "radius", "radiusUnit"]) - - params = {} - filters = [] - for op, arg1, arg2 in conditions: - if op == "or": - raise NotImplementedError("OR is not supported") - elif op == "=" and arg1 in allowed_keys: - params[arg1] = arg2 - elif op != "=": - raise NotImplementedError(f"Unknown op: {op}") - else: - filters.append([op, arg1, arg2]) - - if query.limit is not None: - params["max_results"] = query.limit.value - - if "latLong" not in params: - params["latLong"] = "40.780825, -73.972781" - - result = self.handler.call_tripadvisor_nearby_location_api(params=params) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - return result - - def get_columns(self): - """Get the list of column names for the nearby_location table. - - Returns: - list: A list of column names for the nearby_location table. - """ - return [ - "location_id", - "name", - "distance", - "rating", - "bearing", - "address_obj", - ] diff --git a/mindsdb/integrations/handlers/twilio_handler/README.md b/mindsdb/integrations/handlers/twilio_handler/README.md deleted file mode 100644 index 70671efd31a..00000000000 --- a/mindsdb/integrations/handlers/twilio_handler/README.md +++ /dev/null @@ -1,96 +0,0 @@ - - -# Twilio Handler - -Twilio handler for MindsDB provides interfaces to connect to Twilio via APIs and send or retrieve SMS data into MindsDB. - ---- - -## Table of Contents - -- [Twilio Handler](#twilio-handler) - - [Table of Contents](#table-of-contents) - - [About Twilio](#about-twilio) - - [Twilio Handler Implementation](#twilio-handler-implementation) - - [Twilio Handler Initialization](#twilio-handler-initialization) - - [How to get your Twilio credentials](#how-to-get-your-twilio-credentials) - - [Implemented Features](#implemented-features) - - [TODO](#todo) - - [Example Usage](#example-usage) - ---- - -## About Twilio - -Twilio provides a cloud communication platform which allows software developers to programmatically make and receive phone calls, send and receive text messages, and perform other communication functions using its web service APIs. - -## Twilio Handler Implementation - -This handler was implemented using the [Twilio Python SDK](https://www.twilio.com/docs/libraries/python). The SDK provides a simple and efficient way to interact with the Twilio API. - -## Twilio Handler Initialization - -The Twilio handler is initialized with the following parameters: - -- `account_sid`: a required Twilio Account SID -- `auth_token`: a required Twilio Authentication Token -- `phone_number`: a required Twilio phone number - -## How to get your Twilio credentials - -1. Sign up for a Twilio account or log into your existing account. -2. Navigate to the [Twilio Console Dashboard](https://www.twilio.com/console). -3. Here you will find your `ACCOUNT SID` and `AUTH TOKEN`. -4. To get a Twilio phone number, navigate to the "Phone Numbers" section and either use an existing number or buy a new one. -5. Store these as environment variables: `TWILIO_ACCOUNT_SID`, `TWILIO_AUTH_TOKEN`, and `TWILIO_PHONE_NUMBER` respectively. - -## Implemented Features - -- Send an SMS to a given number with a specified body. -- Fetch the last `n` messages sent or received by the Twilio phone number. - -## TODO - -- Implement support for making and receiving calls. -- Add more detailed logging and error handling. - -## Example Usage - -```sql --- To send an SMS -CREATE DATABASE my_twilio -With - ENGINE = 'twilio', - PARAMETERS = { - "account_sid":"YOUR_ACCOUNT_SID", - "auth_token":"YOUR_AUTH_TOKEN" - }; -``` - -You can now run queries as follows: - -```sql --- Get all messages -SELECT * FROM my_twilio.messages LIMIT 100; - --- get all messages sent for all numbers in this account -SELECT * FROM - my_twilio.phone_numbers - LEFT JOIN my_twilio.messages - ON my_twilio.phone_numbers.phone_number = my_twilio.messages.from_number; - --- Get message with sid -SELECT sid, to_number, from_number FROM my_twilio.messages where sid="SMbefbd64e3caa7d4c147a0aab82d47"; - --- filter to and from -SELECT * FROM my_twilio.messages where from_number="+15129222338"; -SELECT * FROM my_twilio.messages where to_number="+15129222338"; - - --- send messages: -INSERT INTO my_twilio.messages (to_number, from_number, body) -values("+15129222338", "+16122530327", "wow! testing this"); - -select * from my_twilio( -fetch_messages(date_sent_after='2022-10-29 09:46:29.000000') -``` diff --git a/mindsdb/integrations/handlers/twilio_handler/__about__.py b/mindsdb/integrations/handlers/twilio_handler/__about__.py deleted file mode 100644 index 0b7b8c6b3bf..00000000000 --- a/mindsdb/integrations/handlers/twilio_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Twilio handler' -__package_name__ = 'mindsdb_twilio_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Twilio" -__author__ = 'Lizzie Siegle' -__github__ = 'https://github.com/elizabethsiegle' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/twilio_handler/__init__.py b/mindsdb/integrations/handlers/twilio_handler/__init__.py deleted file mode 100644 index ac08eb0ff34..00000000000 --- a/mindsdb/integrations/handlers/twilio_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .twilio_handler import ( - TwilioHandler as Handler - ) - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Twilio' -name = 'twilio' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/twilio_handler/icon.svg b/mindsdb/integrations/handlers/twilio_handler/icon.svg deleted file mode 100644 index 99adef76aef..00000000000 --- a/mindsdb/integrations/handlers/twilio_handler/icon.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/twilio_handler/requirements.txt b/mindsdb/integrations/handlers/twilio_handler/requirements.txt deleted file mode 100644 index 1e2071a390f..00000000000 --- a/mindsdb/integrations/handlers/twilio_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -twilio \ No newline at end of file diff --git a/mindsdb/integrations/handlers/twilio_handler/twilio_handler.py b/mindsdb/integrations/handlers/twilio_handler/twilio_handler.py deleted file mode 100644 index 0a6708d5034..00000000000 --- a/mindsdb/integrations/handlers/twilio_handler/twilio_handler.py +++ /dev/null @@ -1,385 +0,0 @@ -import os - -import re -from twilio.rest import Client -import pandas as pd -from mindsdb.integrations.libs.api_handler import APIHandler, APITable -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) -from mindsdb.utilities.config import Config -from mindsdb.utilities import log -from mindsdb.integrations.utilities.date_utils import parse_local_date -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions, project_dataframe, filter_dataframe - -from mindsdb_sql_parser import ast - -logger = log.getLogger(__name__) - - -class PhoneNumbersTable(APITable): - - def select(self, query: ast.Select) -> Response: - - conditions = extract_comparison_conditions(query.where) - - params = {} - filters = [] - for op, arg1, arg2 in conditions: - - if op == 'or': - raise NotImplementedError('OR is not supported') - else: - filters.append([op, arg1, arg2]) - - if query.limit is not None: - params['limit'] = query.limit.value - - result = self.handler.list_phone_numbers(params, df=True) - - # filter targets - result = filter_dataframe(result, filters) - - # project targets - result = project_dataframe(result, query.targets, self.get_columns()) - - return result - - def get_columns(self): - return [ - 'sid', - 'date_created', - 'date_updated', - 'phone_number', - 'friendly_name', - 'account_sid', - 'capabilities', - 'number_status', - 'api_version', - 'voice_url', - 'sms_url', - 'uri' - ] - - -class MessagesTable(APITable): - - def select(self, query: ast.Select) -> Response: - - conditions = extract_comparison_conditions(query.where) - - params = {} - filters = [] - for op, arg1, arg2 in conditions: - - if op == 'or': - raise NotImplementedError('OR is not supported') - if arg1 == 'sent_at' and arg2 is not None: - - date = parse_local_date(arg2) - - if op == '>': - params['date_sent_after'] = date - elif op == '<': - params['date_sent_before'] = date - else: - raise NotImplementedError - - # also add to post query filter because date_sent_after=date1 will include date1 - filters.append([op, arg1, arg2]) - - elif arg1 == 'sid': - if op == '=': - params['sid'] = arg2 - # TODO: implement IN - else: - NotImplementedError('Only "from_number=" is implemented') - elif arg1 == 'from_number': - if op == '=': - params['from_number'] = arg2 - # TODO: implement IN - else: - NotImplementedError('Only "from_number=" is implemented') - - elif arg1 == 'to_number': - if op == '=': - params['to_number'] = arg2 - # TODO: implement IN - else: - NotImplementedError('Only "to_number=" is implemented') - - else: - filters.append([op, arg1, arg2]) - - result = self.handler.fetch_messages(params, df=True) - - # filter targets - result = filter_dataframe(result, filters) - - if query.limit is not None: - result = result[:int(query.limit.value)] - - # project targets - result = project_dataframe(result, query.targets, self.get_columns()) - - return result - - def get_columns(self): - return [ - 'sid', - 'from_number', - 'to_number', - 'body', - 'direction', - 'msg_status', - 'sent_at', # datetime.strptime(str(msg.date_sent), '%Y-%m-%d %H:%M:%S%z'), - 'account_sid', - 'price', - 'price_unit', - 'api_version', - 'uri' - ] - - def insert(self, query: ast.Insert): - # https://docs.tweepy.org/en/stable/client.html#tweepy.Client.create_tweet - columns = [col.name for col in query.columns] - - ret = [] - - insert_params = ["to_number", "from_number", "body", 'media_url'] - for row in query.values: - params = dict(zip(columns, row)) - - # split long text over 1500 symbols - max_text_len = 1500 - text = params['body'] - words = re.split('( )', text) - messages = [] - - text2 = '' - pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' - for word in words: - # replace the links in word to string with the length as twitter short url (23) - word2 = re.sub(pattern, '-' * 23, word) - if len(text2) + len(word2) > max_text_len - 3 - 7: # 3 is for ..., 7 is for (10/11) - messages.append(text2.strip()) - - text2 = '' - text2 += word - - # the last message - if text2.strip() != '': - messages.append(text2.strip()) - - len_messages = len(messages) - - for i, text in enumerate(messages): - if i < len_messages - 1: - text += '...' - else: - text += ' ' - - if i >= 1: - text += f'({i + 1}/{len_messages})' - # only send image on first url - if 'media_url' in params: - del params['media_url'] - - params['body'] = text - params_to_send = {key: params[key] for key in insert_params if (key in params)} - ret_row = self.handler.send_sms(params_to_send, ret_as_dict=True) - ret_row['body'] = text - ret.append(ret_row) - - return pd.DataFrame(ret) - - -class TwilioHandler(APIHandler): - - def __init__(self, name=None, **kwargs): - super().__init__(name) - - args = kwargs.get('connection_data', {}) - - self.connection_args = {} - handler_config = Config().get('twilio_handler', {}) - for k in ['account_sid', 'auth_token']: - if k in args: - self.connection_args[k] = args[k] - elif f'TWILIO_{k.upper()}' in os.environ: - self.connection_args[k] = os.environ[f'TWILIO_{k.upper()}'] - elif k in handler_config: - self.connection_args[k] = handler_config[k] - - self.client = None - self.is_connected = False - - messages = MessagesTable(self) - phone_numbers = PhoneNumbersTable(self) - self._register_table('messages', messages) - self._register_table('phone_numbers', phone_numbers) - - def connect(self): - """Authenticate with the Twilio API using the account_sid and auth_token provided in the constructor.""" - if self.is_connected is True: - return self.client - - self.client = Client( - self.connection_args['account_sid'], - self.connection_args['auth_token'] - ) - - self.is_connected = True - return self.client - - def check_connection(self) -> StatusResponse: - '''It evaluates if the connection with Twilio API is alive and healthy.''' - response = StatusResponse(False) - - try: - self.connect() - # Maybe make a harmless API request to verify connection, but be mindful of rate limits and costs - response.success = True - - except Exception as e: - response.error_message = f'Error connecting to Twilio api: {e}. ' - logger.error(response.error_message) - - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def parse_native_query(self, query_string: str): - """Parses the native query string of format method(arg1=val1, arg2=val2, ...) and returns the method name and arguments.""" - - # Adjust regex to account for the possibility of no arguments inside the parenthesis - match = re.match(r'(\w+)\(([^)]*)\)', query_string) - if not match: - raise ValueError(f"Invalid query format: {query_string}") - - method_name = match.group(1) - arg_string = match.group(2) - - # Extract individual arguments - args = {} - if arg_string: # Check if there are any arguments - for arg in arg_string.split(','): - arg = arg.strip() - key, value = arg.split('=') - args[key.strip()] = value.strip() - - return method_name, args - - def native_query(self, query_string: str = None): - '''It parses any native statement string and acts upon it (for example, raw syntax commands).''' - - method_name, params = self.parse_native_query(query_string) - if method_name == 'send_sms': - response = self.send_sms(params) - elif method_name == 'fetch_messages': - response = self.fetch_messages(params) - elif method_name == 'list_phone_numbers': - response = self.list_phone_numbers(params) - else: - raise ValueError(f"Method '{method_name}' not supported by TwilioHandler") - - return response - - def send_sms(self, params, ret_as_dict=False): - message = self.client.messages.create( - to=params.get("to_number"), - from_=params.get('from_number'), - body=params.get("body"), - media_url=params.get("media_url") - ) - - if ret_as_dict is True: - return {'sid': message.sid, 'status': message.status} - return Response( - RESPONSE_TYPE.MESSAGE, - sid=message.sid, - status=message.status - ) - - def fetch_messages(self, params, df=False): - limit = int(params.get('limit', 1000)) - sid = params.get('sid', None) - # Convert date strings to datetime objects if provided - date_sent_after = params.get('date_sent_after', None) - date_sent_before = params.get('date_sent_before', None) - # Extract 'from_' and 'body' search criteria from params - from_number = params.get('from_number', None) - to_number = params.get('to_number', None) - args = { - 'limit': limit, - 'date_sent_after': date_sent_after, - 'date_sent_before': date_sent_before, - 'from_': from_number, - 'to': to_number - } - - args = {arg: val for arg, val in args.items() if val is not None} - if sid: - messages = [self.client.messages(sid).fetch()] - else: - messages = self.client.messages.list(**args) - - # Extract all possible properties for each message - data = [] - for msg in messages: - msg_data = { - 'sid': msg.sid, - 'to_number': msg.to, - 'from_number': msg.from_, - 'body': msg.body, - 'direction': msg.direction, - 'msg_status': msg.status, - 'sent_at': msg.date_created.replace(tzinfo=None), - 'account_sid': msg.account_sid, - 'price': msg.price, - 'price_unit': msg.price_unit, - 'api_version': msg.api_version, - 'uri': msg.uri, - # 'media_url': [media.uri for media in msg.media.list()] - # ... Add other properties as needed - } - data.append(msg_data) - - if df is True: - return pd.DataFrame(data) - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(data)) - - def list_phone_numbers(self, params, df=False): - limit = int(params.get('limit', 100)) - args = { - 'limit': limit - } - args = {arg: val for arg, val in args.items() if val is not None} - phone_numbers = self.client.incoming_phone_numbers.list(**args) - - # Extract properties for each phone number - data = [] - for number in phone_numbers: - num_data = { - 'sid': number.sid, - 'date_created': number.date_created, - 'date_updated': number.date_updated, - 'phone_number': number.phone_number, - 'friendly_name': number.friendly_name, - 'account_sid': number.account_sid, - 'capabilities': number.capabilities, - 'number_status': number.status, - 'api_version': number.api_version, - 'voice_url': number.voice_url, - 'sms_url': number.sms_url, - 'uri': number.uri, - # ... Add other properties as needed - } - data.append(num_data) - - if df is True: - return pd.DataFrame(data) - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(data)) diff --git a/mindsdb/integrations/handlers/twitter_handler/README.md b/mindsdb/integrations/handlers/twitter_handler/README.md deleted file mode 100644 index 9e3b41c8301..00000000000 --- a/mindsdb/integrations/handlers/twitter_handler/README.md +++ /dev/null @@ -1,130 +0,0 @@ -# Build your own Twitter AI agent - -Imagine you want to engage with your followers on Twitter and answer all their questions promptly. -Thanks to MindsDB, you can train an AI system that helps you manage automation on Twitter responses. - - -For this particular example, we would like to automatically respond to the people that tweet about us as follows: -- If the message is positive, write a short thank you note. -- If the message is negative or the message has a question, invite them to our slack channel. - - -We will build our Twitter AI tool in a few SQL commands in MindsDB. - - -Let's start by connecting our Twitter account. To do that, you can follow these [steps](https://developer.twitter.com/en/docs/authentication/oauth-2-0/bearer-tokens) to obtain a BEARER TOKEN from twitter. - - -``` -# Should be able to create a twitter database -CREATE DATABASE my_twitter -With - ENGINE = 'twitter', - PARAMETERS = { - "bearer_token": "twitter bearer TOKEN" - }; -``` - -This creates a database called my_twitter. This database ships with a table called tweets that we can use to search for tweets as well as to write tweets. - - -## Searching for tweets in SQL - -Let's get a list of tweets that contain or hashtag the word mindsdb - -``` -SELECT - id, created_at, author_username, text -FROM my_twitter.tweets -WHERE - query = '(mindsdb OR #mindsdb) -is:retweet -is:reply' - AND created_at > '2023-02-16' -LIMIT 20; -``` - -MindsDB Twitter integration also supports native queries, which in this case, will be calling any function in [tweepy] -(https://docs.tweepy.org/en/stable/client.html) -``` -# this should handle authentication and pagination for us -SELECT * FROM my_twitter ( - search_recent_tweets( - query = '(mindsdb OR #mindsdb) -is:retweet -is:reply'', - start_time = '2023-02-16T00:00:00.000Z', - max_results = 2 - ) -); -``` - -## Writing tweets using SQL - -Let's test by tweeting a few things. - -``` -INSERT INTO my_twitter.tweets (reply_to_tweet_id, text) -VALUES - (1626198053446369280, 'MindsDB is great! now its super simple to build ML powered apps'), - (1626198053446369280, 'Holy!! MindsDB is the best thing they have invented for developers doing ML'), -``` - -Those tweets should be live now on Twitter, like magic, right? - -## Let's use AI to write responses for us - -To do this, we would like to create a machine-learning model that can write responses. -We will be using OpenAI GPT-3 for this. The way it works is that we will create a model that can take a prompt and give a message based on that prompt. -The query looks like: - -``` -CREATE MODEL mindsdb.twitter_response_model -PREDICT response -USING - engine = 'openai', - max_tokens = 200, - prompt_template = 'from tweet "{{text}}" by "{{author_username}}", if their comment is a question, invite them to join the MindsDB slack using this link http://bitly.com/abc. Otherwise, simply write a thank you message'; -``` - -This created a virtual AI table called twitter_response_model. We can query this model as if it was a table, but it will generate responses for us, as follows: - -``` -SELECT response FROM mindsdb.twitter_response_model -WHERE author_username = '@pedro' and text = 'I love this, can I learn more?'; -``` - -Now, let's test it with some of the tweets on Twitter. So, we are going to join that model with the query that we had worked on before that gets positive and neutral comments: - -``` -SELECT t.id, t.author_username, t.text, r.response -FROM my_twitter.tweets t -JOIN mindsdb.twitter_response_model r -WHERE - t.query = '(mindsdb OR #mindsdb) -is:retweet -is:reply'' -LIMIT 2 -``` -# Schedule a job - -Finally, we can now automate the responses by writing a job that: -- Checks for new tweets -- Generates a response using the OpenAI model -- Tweets the responses back - -All this in one SQL command: - -``` -CREATE JOB auto_respond AS ( - - INSERT INTO my_twitter.tweets (in_reply_to_tweet_id, text) - SELECT - t.id AS in_reply_to_tweet_id, - r.response AS text - FROM my_twitter.tweets t - JOIN mindsdb.twitter_response_model r - WHERE - t.query = '(mindsdb OR #mindsdb) -is:retweet -is:reply'' - AND t.created_at > "{{PREVIOUS_START_DATETIME}}" - limit 2 -) -EVERY HOUR -``` - -And there it is every hour, we will be checking for the new tweets created_at = {{PREVIOUS_START_DATETIME}}, and insert into tweets, the responses generated by OpenAI GPT-3. - diff --git a/mindsdb/integrations/handlers/twitter_handler/__about__.py b/mindsdb/integrations/handlers/twitter_handler/__about__.py deleted file mode 100644 index 4a9f0c28453..00000000000 --- a/mindsdb/integrations/handlers/twitter_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Twitter handler' -__package_name__ = 'mindsdb_twitter_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Twitter" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/twitter_handler/__init__.py b/mindsdb/integrations/handlers/twitter_handler/__init__.py deleted file mode 100644 index f7225d1cbc3..00000000000 --- a/mindsdb/integrations/handlers/twitter_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .twitter_handler import ( - TwitterHandler as Handler - ) - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Twitter' -name = 'twitter' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/twitter_handler/icon.svg b/mindsdb/integrations/handlers/twitter_handler/icon.svg deleted file mode 100644 index 4797d6b315d..00000000000 --- a/mindsdb/integrations/handlers/twitter_handler/icon.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/twitter_handler/requirements.txt b/mindsdb/integrations/handlers/twitter_handler/requirements.txt deleted file mode 100644 index 6a62bcc576e..00000000000 --- a/mindsdb/integrations/handlers/twitter_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -tweepy diff --git a/mindsdb/integrations/handlers/twitter_handler/twitter_handler.py b/mindsdb/integrations/handlers/twitter_handler/twitter_handler.py deleted file mode 100644 index da83eaa7dc2..00000000000 --- a/mindsdb/integrations/handlers/twitter_handler/twitter_handler.py +++ /dev/null @@ -1,469 +0,0 @@ -import re -import os -import datetime as dt -import time -from collections import defaultdict -import io -import requests - -import pandas as pd -import tweepy - -from mindsdb.utilities import log -from mindsdb.utilities.config import Config - -from mindsdb_sql_parser import ast - -from mindsdb.integrations.libs.api_handler import APIHandler, APITable, FuncParser -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb.integrations.utilities.date_utils import parse_utc_date - -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -logger = log.getLogger(__name__) - - -class TweetsTable(APITable): - - def select(self, query: ast.Select) -> Response: - - conditions = extract_comparison_conditions(query.where) - - params = {} - filters = [] - for op, arg1, arg2 in conditions: - - if op == 'or': - raise NotImplementedError('OR is not supported') - if arg1 == 'created_at': - date = parse_utc_date(arg2) - if op == '>': - # "tweets/search/recent" doesn't accept dates earlier than 7 days - if (dt.datetime.now(dt.timezone.utc) - date).days > 7: - # skip this condition - continue - params['start_time'] = date - elif op == '<': - params['end_time'] = date - else: - raise NotImplementedError - - elif arg1 == 'query': - if op == '=': - params[arg1] = arg2 - else: - NotImplementedError(f'Unknown op: {op}') - - elif arg1 == 'id': - if op == '>': - params['since_id'] = arg2 - elif op == '>=': - raise NotImplementedError("Please use 'id > value'") - elif op == '<': - params['until_id'] = arg2 - elif op == '<=': - raise NotImplementedError("Please use 'id < value'") - else: - NotImplementedError('Search with "id=" is not implemented') - - else: - filters.append([op, arg1, arg2]) - - if query.limit is not None: - params['max_results'] = query.limit.value - - params['expansions'] = ['author_id', 'in_reply_to_user_id'] - params['tweet_fields'] = ['created_at', 'conversation_id', 'referenced_tweets'] - params['user_fields'] = ['name', 'username'] - - if 'query' not in params: - # search not works without query, use 'mindsdb' - params['query'] = 'mindsdb' - - result = self.handler.call_twitter_api( - method_name='search_recent_tweets', - params=params, - filters=filters - ) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = [] - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError - - if len(columns) == 0: - columns = self.get_columns() - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - result = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - return result - - def get_columns(self): - return [ - 'id', - 'created_at', - 'text', - 'edit_history_tweet_ids', - 'author_id', - 'author_name', - 'author_username', - 'conversation_id', - 'in_reply_to_tweet_id', - 'in_retweeted_to_tweet_id', - 'in_quote_to_tweet_id', - 'in_reply_to_user_id', - 'in_reply_to_user_name', - 'in_reply_to_user_username', - ] - - def insert(self, query: ast.Insert): - # https://docs.tweepy.org/en/stable/client.html#tweepy.Client.create_tweet - columns = [col.name for col in query.columns] - - insert_params = ('consumer_key', 'consumer_secret', 'access_token', 'access_token_secret') - for p in insert_params: - if p not in self.handler.connection_args: - raise Exception(f'To insert data into Twitter, you need to provide the following parameters when connecting it to MindsDB: {insert_params}') # noqa - - for row in query.values: - params = dict(zip(columns, row)) - - # split long text over 280 symbols - max_text_len = 280 - text = params['text'] - - # Post image if column media_url is provided, only do this on last tweet - media_ids = None - if 'media_url' in params: - media_url = params.pop('media_url') - - # create an in memory file - resp = requests.get(media_url) - img = io.BytesIO(resp.content) - - # upload media to twitter - api_v1 = self.handler.create_connection(api_version=1) - content_type = resp.headers['Content-Type'] - file_type = content_type.split('/')[-1] - media = api_v1.media_upload(filename="img.{file_type}".format(file_type=file_type), file=img) - - media_ids = [media.media_id] - - words = re.split('( )', text) - - messages = [] - - text2 = '' - pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' - for word in words: - # replace the links in word to string with the length as twitter short url (23) - word2 = re.sub(pattern, '-' * 23, word) - if len(text2) + len(word2) > max_text_len - 3 - 7: # 3 is for ..., 7 is for (10/11) - messages.append(text2.strip()) - - text2 = '' - text2 += word - - # the last message - if text2.strip() != '': - messages.append(text2.strip()) - - len_messages = len(messages) - for i, text in enumerate(messages): - if i < len_messages - 1: - text += '...' - else: - text += ' ' - # publish media with the last message - if media_ids is not None: - params['media_ids'] = media_ids - - text += f'({i + 1}/{len_messages})' - - params['text'] = text - ret = self.handler.call_twitter_api('create_tweet', params) - inserted_id = ret.id[0] - params['in_reply_to_tweet_id'] = inserted_id - - -class TwitterHandler(APIHandler): - """A class for handling connections and interactions with the Twitter API. - - Attributes: - bearer_token (str): The consumer key for the Twitter app. - api (tweepy.API): The `tweepy.API` object for interacting with the Twitter API. - - """ - - def __init__(self, name=None, **kwargs): - super().__init__(name) - - args = kwargs.get('connection_data', {}) - - self.connection_args = {} - handler_config = Config().get('twitter_handler', {}) - for k in ['bearer_token', 'consumer_key', 'consumer_secret', - 'access_token', 'access_token_secret', 'wait_on_rate_limit']: - if k in args: - self.connection_args[k] = args[k] - elif f'TWITTER_{k.upper()}' in os.environ: - self.connection_args[k] = os.environ[f'TWITTER_{k.upper()}'] - elif k in handler_config: - self.connection_args[k] = handler_config[k] - - self.api = None - self.is_connected = False - - tweets = TweetsTable(self) - self._register_table('tweets', tweets) - - def create_connection(self, api_version=2): - if api_version == 1: - auth = tweepy.OAuthHandler( - self.connection_args['consumer_key'], - self.connection_args['consumer_secret'] - ) - auth.set_access_token( - self.connection_args['access_token'], - self.connection_args['access_token_secret'] - ) - return tweepy.API(auth) - - return tweepy.Client(**self.connection_args) - - def connect(self, api_version=2): - """Authenticate with the Twitter API using the API keys and secrets stored in the `consumer_key`, `consumer_secret`, `access_token`, and `access_token_secret` attributes.""" # noqa - - if self.is_connected is True: - return self.api - - self.api = self.create_connection() - - self.is_connected = True - return self.api - - def check_connection(self) -> StatusResponse: - - response = StatusResponse(False) - - try: - api = self.connect() - - # call get_user with unknown id. - # it raises an error in case if auth is not success and returns not-found otherwise - # api.get_me() is not exposed for OAuth 2.0 App-only authorisation - api.get_user(id=1) - response.success = True - - except tweepy.Unauthorized as e: - response.error_message = f'Error connecting to Twitter api: {e}. Check bearer_token' - logger.error(response.error_message) - - if response.success is True and len(self.connection_args) > 1: - # not only bearer_token, check read-write mode (OAuth 2.0 Authorization Code with PKCE) - try: - api = self.connect() - - api.get_me() - - except tweepy.Unauthorized as e: - keys = 'consumer_key', 'consumer_secret', 'access_token', 'access_token_secret' - response.error_message = f'Error connecting to Twitter api: {e}. Check' + ', '.join(keys) - logger.error(response.error_message) - - response.success = False - - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query_string: str = None): - method_name, params = FuncParser().from_string(query_string) - - df = self.call_twitter_api(method_name, params) - - return Response( - RESPONSE_TYPE.TABLE, - data_frame=df - ) - - def _apply_filters(self, data, filters): - if not filters: - return data - - data2 = [] - for row in data: - add = False - for op, key, value in filters: - value2 = row.get(key) - if isinstance(value, int): - # twitter returns ids as string - value = str(value) - - if op in ('!=', '<>'): - if value == value2: - break - elif op in ('==', '='): - if value != value2: - break - elif op == 'in': - if not isinstance(value, list): - value = [value] - if value2 not in value: - break - elif op == 'not in': - if not isinstance(value, list): - value = [value] - if value2 in value: - break - else: - raise NotImplementedError(f'Unknown filter: {op}') - # only if there wasn't breaks - add = True - if add: - data2.append(row) - return data2 - - def call_twitter_api(self, method_name: str = None, params: dict = None, filters: list = None): - - # method > table > columns - expansions_map = { - 'search_recent_tweets': { - 'users': ['author_id', 'in_reply_to_user_id'], - }, - 'search_all_tweets': { - 'users': ['author_id'], - }, - } - - api = self.connect() - method = getattr(api, method_name) - - # pagination handle - - count_results = None - if 'max_results' in params: - count_results = params['max_results'] - - data = [] - includes = defaultdict(list) - - max_page_size = 100 - min_page_size = 10 - left = None - - limit_exec_time = time.time() + 60 - - if filters: - # if we have filters: do big page requests - params['max_results'] = max_page_size - - while True: - if time.time() > limit_exec_time: - raise RuntimeError('Handler request timeout error') - - if count_results is not None: - left = count_results - len(data) - if left == 0: - break - elif left < 0: - # got more results that we need - data = data[:left] - break - - if left > max_page_size: - params['max_results'] = max_page_size - elif left < min_page_size: - params['max_results'] = min_page_size - else: - params['max_results'] = left - - logger.debug(f'>>>twitter in: {method_name}({params})') - resp = method(**params) - - if hasattr(resp, 'includes'): - for table, records in resp.includes.items(): - includes[table].extend([r.data for r in records]) - - if isinstance(resp.data, list): - chunk = [r.data for r in resp.data] - else: - if isinstance(resp.data, dict): - data.append(resp.data) - if hasattr(resp.data, 'data') and isinstance(resp.data.data, dict): - data.append(resp.data.data) - break - - # unwind columns - for row in chunk: - if 'referenced_tweets' in row: - refs = row['referenced_tweets'] - if isinstance(refs, list) and len(refs) > 0: - if refs[0]['type'] == 'replied_to': - row['in_reply_to_tweet_id'] = refs[0]['id'] - if refs[0]['type'] == 'retweeted': - row['in_retweeted_to_tweet_id'] = refs[0]['id'] - if refs[0]['type'] == 'quoted': - row['in_quote_to_tweet_id'] = refs[0]['id'] - - if filters: - chunk = self._apply_filters(chunk, filters) - - # limit output - if left is not None: - chunk = chunk[:left] - - data.extend(chunk) - # next page ? - if count_results is not None and hasattr(resp, 'meta') and 'next_token' in resp.meta: - params['next_token'] = resp.meta['next_token'] - else: - break - - df = pd.DataFrame(data) - - # enrich - expansions = expansions_map.get(method_name) - if expansions is not None: - for table, records in includes.items(): - df_ref = pd.DataFrame(records) - - if table not in expansions: - continue - - for col_id in expansions[table]: - col = col_id[:-3] # cut _id - if col_id not in df.columns: - continue - - col_map = { - col_ref: f'{col}_{col_ref}' - for col_ref in df_ref.columns - } - df_ref2 = df_ref.rename(columns=col_map) - df_ref2 = df_ref2.drop_duplicates(col_id) - - df = df.merge(df_ref2, on=col_id, how='left') - - return df diff --git a/mindsdb/integrations/handlers/vertex_handler/README.md b/mindsdb/integrations/handlers/vertex_handler/README.md deleted file mode 100644 index c20ce65b813..00000000000 --- a/mindsdb/integrations/handlers/vertex_handler/README.md +++ /dev/null @@ -1,95 +0,0 @@ -[Vertex AI](https://cloud.google.com/vertex-ai) offers everything you need to build and use generative AIβ€”from AI solutions, to Search and Conversation, to 130+ foundation models, to a unified AI platform. - -## Setup - -MindsDB provides the Vertex handler that enables you to connect Vertex AI models within MindsDB. - - -### AI Engine - -Before creating a model, it is required to create an AI engine based on the provided handler. - -> If you installed MindsDB locally, make sure to install all Vertex dependencies by running `pip install mindsdb[vertex]` or `pip install .[vertex]`. - -You can create an Vertex engine using this command: - -```sql -CREATE ML_ENGINE vertex FROM vertex -USING - project_id="vertex-1111", - location="us-central1", - staging_bucket="gs://my_staging_bucket", - experiment="my-experiment", - experiment_description="my experiment description", - service_account_key_json = { - "type": "service_account", - "project_id": "vertex-1111", - "private_key_id": "aaaaaaaaaa", - "private_key": "---------BIG STRING WITH KEY-------\n", - "client_email": "testvertexvaitest-11111.iam.gserviceaccount.com", - "client_id": "1111111111111", - "auth_uri": "https://accounts.google.com/o/oauth2/auth", - "token_uri": "https://oauth2.googleapis.com/token", - "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", - "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/testbigquery%40bgtest-11111.iam.gserviceaccount.com", - "universe_domain": "googleapis.com" - }; -``` - -> Please note that you need to provide your service_account key here. It is also possible to pass your service account key as either a file path or a URL using the 'service_account_key_file` and `service_account_key_url` parameters respectively. - -```sql -CREATE ML_ENGINE vertex FROM vertex -USING - project_id="vertex-1111", - location="us-central1", - staging_bucket="gs://my_staging_bucket", - experiment="my-experiment", - experiment_description="my experiment description", - service_account_key_file="/home/user/MyProjects/vertex-1111.json"; -``` - -```sql -CREATE ML_ENGINE vertex FROM vertex -USING - project_id="vertex-1111", - location="us-central1", - staging_bucket="gs://my_staging_bucket", - experiment="my-experiment", - experiment_description="my experiment description", - service_account_key_url="https://storage.googleapis.com/vertex-1111.json?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=example%40example.iam.gserviceaccount.com%2F20220215%2Fus-central1%2Fstorage%2Fgoog4_request&X-Goog-Date=20220215T000000Z&X-Goog-Expires=3600&X-Goog-SignedHeaders=host&X-Goog-Signature=abcd1234"; -``` - - -The name of the engine (here, `vertex`) should be used as a value for the `engine` parameter in the `USING` clause of the `CREATE MODEL` statement. - -### AI Model - -The [`CREATE MODEL`](/sql/create/model) statement is used to create, train, and deploy models within MindsDB. - -```sql -CREATE MODEL mindsdb.vertex_anomaly_detection_model -PREDICT cut -USING - engine = 'vertex', - model_name = 'diamonds_anomaly_detection', - custom_model = True; -``` - -Where: - -| Name | Description | -|-------------------|---------------------------------------------------------------------------| -| `engine` | It defines the Vertex engine. | -| `model_name` | It is used to provide the name of the model. | -| `custom_model` | Is it custom model or not | - -## Usage - -Once you have created an Vertex model, you can use it to make predictions. - -```sql -SELECT t.cut, m.cut as anomaly -FROM files.vertex_anomaly_detection as t -JOIN mindsdb.vertex_anomaly_detection_model as m; -``` diff --git a/mindsdb/integrations/handlers/vertex_handler/__about__.py b/mindsdb/integrations/handlers/vertex_handler/__about__.py deleted file mode 100644 index 16ce5f91660..00000000000 --- a/mindsdb/integrations/handlers/vertex_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Vertex handler" -__package_name__ = "mindsdb_vertex_handler" -__version__ = "0.0.0" -__description__ = "MindsDB handler for Google Vertex AI API" -__author__ = "MindsDB Inc" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/vertex_handler/__init__.py b/mindsdb/integrations/handlers/vertex_handler/__init__.py deleted file mode 100644 index 0ff792afcf3..00000000000 --- a/mindsdb/integrations/handlers/vertex_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .vertex_handler import VertexHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Google Vertex AI" -name = "vertex" -type = HANDLER_TYPE.ML -icon_path = "icon.png" -permanent = False - -__all__ = ["Handler", "version", "name", "type", "title", "description", "import_error", "icon_path"] diff --git a/mindsdb/integrations/handlers/vertex_handler/icon.png b/mindsdb/integrations/handlers/vertex_handler/icon.png deleted file mode 100644 index f20026f37b3..00000000000 Binary files a/mindsdb/integrations/handlers/vertex_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/vertex_handler/requirements.txt b/mindsdb/integrations/handlers/vertex_handler/requirements.txt deleted file mode 100644 index afe78fb59a1..00000000000 --- a/mindsdb/integrations/handlers/vertex_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -google-cloud-aiplatform>=1.35.0 --r mindsdb/integrations/utilities/handlers/auth_utilities/google/requirements.txt \ No newline at end of file diff --git a/mindsdb/integrations/handlers/vertex_handler/vertex_client.py b/mindsdb/integrations/handlers/vertex_handler/vertex_client.py deleted file mode 100755 index 9f5d98529c5..00000000000 --- a/mindsdb/integrations/handlers/vertex_handler/vertex_client.py +++ /dev/null @@ -1,95 +0,0 @@ -from mindsdb.utilities import log -from google.cloud.aiplatform import init, TabularDataset, Model, Endpoint -import pandas as pd - -from mindsdb.integrations.utilities.handlers.auth_utilities.google import GoogleServiceAccountOAuth2Manager - -logger = log.getLogger(__name__) - - -class VertexClient: - """A class to interact with Vertex AI""" - - def __init__(self, args_json, credentials_url=None, credentials_file=None, credentials_json=None): - google_sa_oauth2_manager = GoogleServiceAccountOAuth2Manager( - credentials_url=credentials_url, - credentials_file=credentials_file, - credentials_json=credentials_json, - ) - credentials = google_sa_oauth2_manager.get_oauth2_credentials() - - init( - credentials=credentials, - project=args_json["project_id"], - location=args_json["location"], - staging_bucket=args_json["staging_bucket"], - # the name of the experiment to use to track - # logged metrics and parameters - experiment=args_json["experiment"], - # description of the experiment above - experiment_description=args_json["experiment_description"], - ) - - def print_datasets(self): - """Print all datasets and dataset ids in the project""" - for dataset in TabularDataset.list(): - logger.info(f"Dataset display name: {dataset.display_name}, ID: {dataset.name}") - - def print_models(self): - """Print all model names and model ids in the project""" - for model in Model.list(): - logger.info(f"Model display name: {model.display_name}, ID: {model.name}") - - def print_endpoints(self): - """Print all endpoints and endpoint ids in the project""" - for endpoint in Endpoint.list(): - logger.info(f"Endpoint display name: {endpoint.display_name}, ID: {endpoint.name}") - - def get_model_by_display_name(self, display_name): - """Get a model by its display name""" - try: - return Model.list(filter=f"display_name={display_name}")[0] - except IndexError: - logger.info(f"Model with display name {display_name} not found") - - def get_endpoint_by_display_name(self, display_name): - """Get an endpoint by its display name""" - try: - return Endpoint.list(filter=f"display_name={display_name}")[0] - except IndexError: - logger.info(f"Endpoint with display name {display_name} not found") - - def get_model_by_id(self, model_id): - """Get a model by its ID""" - try: - return Model(model_name=model_id) - except IndexError: - logger.info(f"Model with ID {model_id} not found") - - def deploy_model(self, model): - """Deploy a model to an endpoint - long runtime""" - endpoint = model.deploy() - return endpoint - - def predict_from_df(self, endpoint_display_name, df, custom_model=False): - """Make a prediction from a Pandas dataframe""" - endpoint = self.get_endpoint_by_display_name(endpoint_display_name) - if custom_model: - records = df.values.tolist() - else: - records = df.astype(str).to_dict(orient="records") # list of dictionaries - prediction = endpoint.predict(instances=records) - return prediction - - def predict_from_csv(self, endpoint_display_name, csv_to_predict): - """Make a prediction from a CSV file""" - df = pd.read_csv(csv_to_predict) - return self.predict_from_df(endpoint_display_name, df) - - def predict_from_dict(self, endpoint_display_name, data): - - # convert to list of dictionaries - instances = [dict(zip(data.keys(), values)) for values in zip(*data.values())] - endpoint = self.get_endpoint_by_display_name(endpoint_display_name) - prediction = endpoint.predict(instances=instances) - return prediction diff --git a/mindsdb/integrations/handlers/vertex_handler/vertex_handler.py b/mindsdb/integrations/handlers/vertex_handler/vertex_handler.py deleted file mode 100644 index d2bef2a2be9..00000000000 --- a/mindsdb/integrations/handlers/vertex_handler/vertex_handler.py +++ /dev/null @@ -1,87 +0,0 @@ -import pandas as pd -from mindsdb.integrations.libs.base import BaseMLEngine -from mindsdb.integrations.handlers.vertex_handler.vertex_client import VertexClient -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class VertexHandler(BaseMLEngine): - """Handler for the Vertex Google AI cloud API""" - - name = "Vertex" - - def create(self, target, args=None, **kwargs): - """Logs in to Vertex and deploy a pre-trained model to an endpoint. - - If the endpoint already exists for the model, we do nothing. - - If the endpoint does not exist, we create it and deploy the model to it. - The runtime for this is long, it took 15 minutes for a small model. - """ - assert "using" in args, "Must provide USING arguments for this handler" - args = args["using"] - - model_name = args.pop("model_name") - custom_model = args.pop("custom_model", False) - - # get credentials from engine - credentials_url, credentials_file, credentials_json = self._get_credentials_from_engine() - - # get vertex args from handler then update args from model - vertex_args = self.engine_storage.json_get('args') - vertex_args.update(args) - - vertex = VertexClient(vertex_args, credentials_url, credentials_file, credentials_json) - - model = vertex.get_model_by_display_name(model_name) - if not model: - raise Exception(f"Vertex model {model_name} not found") - endpoint_name = model_name + "_endpoint" - if vertex.get_endpoint_by_display_name(endpoint_name): - logger.info(f"Endpoint {endpoint_name} already exists, skipping deployment") - else: - logger.info(f"Starting deployment at {endpoint_name}") - endpoint = vertex.deploy_model(model) - endpoint.display_name = endpoint_name - endpoint.update() - logger.info(f"Endpoint {endpoint_name} deployed") - - predict_args = {} - predict_args["target"] = target - predict_args["endpoint_name"] = endpoint_name - predict_args["custom_model"] = custom_model - self.model_storage.json_set("predict_args", predict_args) - self.model_storage.json_set("vertex_args", vertex_args) - - def predict(self, df, args=None): - """Predict using the deployed model by calling the endpoint.""" - - if "__mindsdb_row_id" in df.columns: - df.drop("__mindsdb_row_id", axis=1, inplace=True) # TODO is this required? - - predict_args = self.model_storage.json_get("predict_args") - vertex_args = self.model_storage.json_get("vertex_args") - - # get credentials from engine - credentials_url, credentials_file, credentials_json = self._get_credentials_from_engine() - - vertex = VertexClient(vertex_args, credentials_url, credentials_file, credentials_json) - results = vertex.predict_from_df(predict_args["endpoint_name"], df, custom_model=predict_args["custom_model"]) - - if predict_args["custom_model"]: - return pd.DataFrame(results.predictions, columns=[predict_args["target"]]) - else: - return pd.DataFrame(results.predictions) - - def create_engine(self, connection_args): - # check if one of credentials_url, credentials_file, or credentials_json is provided - if 'service_account_key_url' not in connection_args and 'service_account_key_file' not in connection_args and 'service_account_key_json' not in connection_args: - raise KeyError('Either service_account_key_url, service_account_key_file, or service_account_key_json must be provided') - - self.engine_storage.json_set('args', connection_args) - - def _get_credentials_from_engine(self): - engine_args = self.engine_storage.json_get('args') - - return engine_args.get('service_account_key_url'), engine_args.get('service_account_key_file'), engine_args.get('service_account_key_json') diff --git a/mindsdb/integrations/handlers/vertica_handler/README.md b/mindsdb/integrations/handlers/vertica_handler/README.md deleted file mode 100644 index c488b3b628e..00000000000 --- a/mindsdb/integrations/handlers/vertica_handler/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# Vertica Handler - -This is the implementation of the Vertica handler for MindsDB. - -## Vertica -The column-oriented Vertica Analytics Platform was designed to manage large, fast-growing volumes of data and with fast query performance for data warehouses and other query-intensive applications. The product claims to greatly improve query performance over traditional relational database systems, and to provide high availability and exabyte scalability on commodity enterprise servers. Vertica runs on multiple cloud computing systems as well as on Hadoop nodes. Vertica's Eon Mode separates compute from storage, using S3 object storage and dynamic allocation of compute notes - -## Implementation -This handler was implemented using the `vertica-python`, a Python library that allows you to use Python code to run SQL commands on Vertica Database. - -The required arguments to establish a connection are, -* `user`: username associated with database -* `password`: password to authenticate your access -* `host`: host to server IP Address or hostname -* `port`: port through which TCPIP connection is to be made -* `database`: Database name to be connected -* `schema`: schema name to get tables - -## Usage -In order to make use of this handler and connect to Vertica in MindsDB, the following syntax can be used, -~~~~sql -CREATE DATABASE vertica_datasource -WITH -engine='vertica', -parameters={ - "user":"dbadmin", - "password":"password", - "host":"127.0.0.1", - "port":5433, - "schema_name":"public", - "database":"VMart" -}; -~~~~ - -Now, you can use this established connection to query your database as follows, -~~~~sql -SELECT * FROM vertica_datasource.TEST; -~~~~ diff --git a/mindsdb/integrations/handlers/vertica_handler/__about__.py b/mindsdb/integrations/handlers/vertica_handler/__about__.py deleted file mode 100644 index 471521dca24..00000000000 --- a/mindsdb/integrations/handlers/vertica_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Vertica handler' -__package_name__ = 'mindsdb_vertica_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Vertica" -__author__ = 'Parthiv Makwana' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/vertica_handler/__init__.py b/mindsdb/integrations/handlers/vertica_handler/__init__.py deleted file mode 100644 index 3eb42e18d09..00000000000 --- a/mindsdb/integrations/handlers/vertica_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .vertica_handler import VerticaHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Vertica' -name = 'vertica' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/vertica_handler/connection_args.py b/mindsdb/integrations/handlers/vertica_handler/connection_args.py deleted file mode 100644 index 31c319fe63a..00000000000 --- a/mindsdb/integrations/handlers/vertica_handler/connection_args.py +++ /dev/null @@ -1,41 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Vertica server.' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the VERTICA server.', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the VERTICA server.' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the VERTICA server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the VERTICA server. Must be an integer.' - }, - schema_name={ - 'type': ARG_TYPE.STR, - 'description': 'Table are listed according to schema name (it is optional). Note: Default value is "public"' - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=5433, - user='root', - password='password', - database='database', - schema_name='xyz' -) diff --git a/mindsdb/integrations/handlers/vertica_handler/icon.svg b/mindsdb/integrations/handlers/vertica_handler/icon.svg deleted file mode 100644 index 8238e34339f..00000000000 --- a/mindsdb/integrations/handlers/vertica_handler/icon.svg +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/vertica_handler/requirements.txt b/mindsdb/integrations/handlers/vertica_handler/requirements.txt deleted file mode 100644 index 175898794d5..00000000000 --- a/mindsdb/integrations/handlers/vertica_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -vertica-python -sqlalchemy-vertica-python \ No newline at end of file diff --git a/mindsdb/integrations/handlers/vertica_handler/tests/__init__.py b/mindsdb/integrations/handlers/vertica_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/vertica_handler/tests/test_vertica_handler.py b/mindsdb/integrations/handlers/vertica_handler/tests/test_vertica_handler.py deleted file mode 100644 index d7d09122a22..00000000000 --- a/mindsdb/integrations/handlers/vertica_handler/tests/test_vertica_handler.py +++ /dev/null @@ -1,53 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.vertica_handler.vertica_handler import VerticaHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class VerticaHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": '127.0.0.1', - "port": 5433, - "user": 'dbadmin', - "password": '', - "database": 'VMart', - "schema_name": 'public' - } - - } - cls.handler = VerticaHandler('test_vertica_handler', cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_connect(self): - assert self.handler.connect() - - def test_2_create_table(self): - query = "CREATE Table TEST(id Number(1),Name Varchar(33))" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_3_insert(self): - query = "INSERT INTO TEST (1,'lOVe yOU)" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_4_native_query_select(self): - query = "SELECT * FROM TEST;" - result = self.handler.query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.TABLE - - def test_6_get_columns(self): - columns = self.handler.get_columns('TEMP') - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/vertica_handler/vertica_handler.py b/mindsdb/integrations/handlers/vertica_handler/vertica_handler.py deleted file mode 100644 index 2df20f6ed98..00000000000 --- a/mindsdb/integrations/handlers/vertica_handler/vertica_handler.py +++ /dev/null @@ -1,157 +0,0 @@ -from typing import Optional - -import pandas as pd -import vertica_python as vp - -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -# from sqlalchemy_vertica.dialect_pyodbc import VerticaDialect -from sqla_vertica_python.vertica_python import VerticaDialect - -logger = log.getLogger(__name__) - - -class VerticaHandler(DatabaseHandler): - """ - This handler handles connection and execution of the Vertica statements. - """ - - name = 'vertica' - - def __init__(self, name, connection_data: Optional[dict], **kwargs): - super().__init__(name) - - self.parser = parse_sql - self.dialect = 'vertica' - self.kwargs = kwargs - self.connection_data = connection_data - self.schema_name = connection_data['schema_name'] if 'schema_name' in connection_data else "public" - - self.connection = None - self.is_connected = False - - def connect(self): - if self.is_connected is True: - return self.connection - - config = { - 'host': self.connection_data['host'], - 'port': self.connection_data['port'], - 'user': self.connection_data['user'], - 'password': self.connection_data['password'], - 'database': self.connection_data['database'] - } - - connection = vp.connect(**config) - self.is_connected = True - self.connection = connection - return self.connection - - def disconnect(self): - if self.is_connected is False: - return - self.connection.close() - self.is_connected = False - return - - def check_connection(self) -> StatusResponse: - - result = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - connection = self.connect() - result.success = connection.opened() - except Exception as e: - logger.error(f'Error connecting to Vertica {self.connection_data["database"]}, {e}!') - result.error_message = str(e) - - if result.success is True and need_to_close: - self.disconnect() - if result.success is False and self.is_connected is True: - self.is_connected = False - - return result - - def native_query(self, query: str) -> Response: - """ - Receive SQL query and runs it - :param query: The SQL query to run in VERTICA - :return: returns the records from the current recordset - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - with connection.cursor() as cur: - try: - e = cur.execute(query) - result = e.fetchall() - if e.rowcount != -1: - - response = Response( - RESPONSE_TYPE.TABLE, - pd.DataFrame( - result, - columns=[x.name for x in cur.description] - ) - ) - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except Exception as e: - logger.error(f'Error running query: {query} on {self.connection_data["database"]}!') - response = Response( - RESPONSE_TYPE.ERROR, - error_message=str(e) - ) - connection.rollback() - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Retrieve the data from the SQL statement. - """ - renderer = SqlalchemyRender(VerticaDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Get a list with all of the tabels in VERTICA - """ - q = f'''SELECT - TABLE_NAME, - TABLE_SCHEMA - from v_catalog.tables - WHERE table_schema='{self.schema_name}' - order by - table_name;''' - - return self.native_query(q) - - def get_columns(self, table_name) -> Response: - """ - Show details about the table - """ - q = f'''SELECT - column_name , - data_type - FROM v_catalog.columns - WHERE table_name='{table_name}';''' - - return self.native_query(q) diff --git a/mindsdb/integrations/handlers/vitess_handler/README.md b/mindsdb/integrations/handlers/vitess_handler/README.md deleted file mode 100644 index 722096ceab5..00000000000 --- a/mindsdb/integrations/handlers/vitess_handler/README.md +++ /dev/null @@ -1,44 +0,0 @@ -# Vitess Handler - -This is the implementation of the Vitess Handler for MindsDB. - -## Vitess -Vitess is a database solution for deploying, scaling and managing large clusters of open-source database instances. It currently supports MySQL and Percona Server for MySQL. It's architected to run as effectively in a public or private cloud architecture as it does on dedicated hardware. It combines and extends many important SQL features with the scalability of a NoSQL database. Vitess can help you with the following problems: - - * Scaling a SQL database by allowing you to shard it, while keeping application changes to a minimum. - * Migrating from baremetal to a private or public cloud. - * Deploying and managing a large number of SQL database instances. - -## Implementation - -This handler was implemented by extending mysql connector. - -The required arguments to establish a connection are: - -* `host`: the host name of the Vitess connection -* `port`: the port to use when connecting -* `user`: the user to authenticate -* `password`: the password to authenticate the user -* `database`: database name - -## Usage - -In order to make use of this handler and connect to a Vitess server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE vitess_datasource -WITH ENGINE = "vitess", -PARAMETERS = { - "user": "root", - "password": "", - "host": "localhost", - "port": 33577, - "database": "commerce" -} -``` - -Now, you can use this established connection to query your database as follows: - -```sql -SELECT * FROM vitess_datasource.product LIMIT 10; -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/vitess_handler/__about__.py b/mindsdb/integrations/handlers/vitess_handler/__about__.py deleted file mode 100644 index 44a40e45250..00000000000 --- a/mindsdb/integrations/handlers/vitess_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Vitess handler' -__package_name__ = 'mindsdb_vitess_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Vitess" -__author__ = 'Parthiv Makwana' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/vitess_handler/__init__.py b/mindsdb/integrations/handlers/vitess_handler/__init__.py deleted file mode 100644 index eef59b931a9..00000000000 --- a/mindsdb/integrations/handlers/vitess_handler/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -try: - from .vitess_handler import VitessHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = 'Vitess' -name = 'vitess' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/vitess_handler/icon.svg b/mindsdb/integrations/handlers/vitess_handler/icon.svg deleted file mode 100644 index b1b42d53774..00000000000 --- a/mindsdb/integrations/handlers/vitess_handler/icon.svg +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/vitess_handler/requirements.txt b/mindsdb/integrations/handlers/vitess_handler/requirements.txt deleted file mode 100644 index ee467569031..00000000000 --- a/mindsdb/integrations/handlers/vitess_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/mysql_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/vitess_handler/tests/__init__.py b/mindsdb/integrations/handlers/vitess_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/vitess_handler/tests/test_vitess_handler.py b/mindsdb/integrations/handlers/vitess_handler/tests/test_vitess_handler.py deleted file mode 100644 index 2caaeb88fcf..00000000000 --- a/mindsdb/integrations/handlers/vitess_handler/tests/test_vitess_handler.py +++ /dev/null @@ -1,54 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.vitess_handler.vitess_handler import VitessHandler -from mindsdb.integrations.libs.response import RESPONSE_TYPE - - -class VitessHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "localhost", - "port": 33577, - "user": "root", - "password": "", - "database": "vitess", - } - } - cls.handler = VitessHandler('test_vitess_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_connect(self): - assert self.handler.connect() - - def test_2_create_table(self): - query = "CREATE Table IF NOT EXISTS Lover(name varchar(101));" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_3_insert(self): - query = "INSERT INTO LOVER VALUES('Shiv Shakti');" - result = self.handler.query(query) - assert result.type is not RESPONSE_TYPE.ERROR - - def test_4_native_query_select(self): - query = "SELECT * FROM LOVER;" - result = self.handler.query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is RESPONSE_TYPE.TABLE - - def test_6_get_columns(self): - columns = self.handler.get_columns('LOVER') - - query = "DROP Table IF EXISTS Lover;" - self.handler.query(query) - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/vitess_handler/vitess_handler.py b/mindsdb/integrations/handlers/vitess_handler/vitess_handler.py deleted file mode 100644 index b541648970d..00000000000 --- a/mindsdb/integrations/handlers/vitess_handler/vitess_handler.py +++ /dev/null @@ -1,47 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE -from collections import OrderedDict - - -from mindsdb.integrations.handlers.mysql_handler import Handler as MysqlHandler - - -class VitessHandler(MysqlHandler): - """ - This handler handles connection and execution of the Vitess statements. - """ - name = 'vitess' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Vitess server.' - }, - password={ - 'type': ARG_TYPE.STR, - 'description': 'The password to authenticate the user with the Vitess server.' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the Vitess server.' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Vitess server.' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the Vitess server. Must be an integer.' - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=5432, - user='root', - password='', - database='database' -) diff --git a/mindsdb/integrations/handlers/weaviate_handler/README.md b/mindsdb/integrations/handlers/weaviate_handler/README.md deleted file mode 100644 index 05c095fde10..00000000000 --- a/mindsdb/integrations/handlers/weaviate_handler/README.md +++ /dev/null @@ -1,115 +0,0 @@ -# Weaviate Handler - -This is the implementation of the Weaviate for MindsDB. - -## Weaviate - -Weaviate is an open-source vector database. It allows you to store data objects and vector embeddings from your favorite ML-models, and scale seamlessly into billions of data objects. -_ -## Implementation - -This handler uses `weaviate-client` python library connect to a weaviate instance. - -The required arguments to establish a connection are: - -* `weaviate_url`: url of the weaviate database -* `weaviate_api_key`: API key to authenticate with weaviate (in case of cloud instance). -* `persistence_directory`: directory to be used in case of local storage - - -### Creating connection - -In order to make use of this handler and connect to a Weaviate server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE weaviate_datasource - WITH ENGINE = "weaviate", - PARAMETERS = { - "weaviate_url" : "https://sample.weaviate.network", - "weaviate_api_key": "api-key" -}; -``` - -```sql -CREATE DATABASE weaviate_datasource - WITH ENGINE = "weaviate", - PARAMETERS = { - "weaviate_url" : "https://localhost:8080", -}; -``` - -```sql -CREATE DATABASE weaviate_datasource - WITH ENGINE = "weaviate", - PARAMETERS = { - "persistence_directory" : "db_path", -}; -``` - -### Dropping connection - -To drop the connection, use this command - -```sql -DROP DATABASE weaviate_datasource; -``` - -### Creating tables - -To insert data from a pre-existing table, use `CREATE` - -```sql -CREATE TABLE weaviate_datascource.test -(SELECT * FROM sqlitedb.test); -``` -As weaviate currently doesn't support json field. -So, this creates another table for the "metadata" field and a reference is created in the original table which points to -its metadata entry. - -Weaviate follows GraphQL conventions where classes (which are table schemas) start with a capital letter and -properties start with a lowercase letter. - -So whenever we create a table, the table's name gets capitalized. - -### Dropping collections - -To drop a Weaviate table use this command - -```sql -DROP TABLE weaviate_datasource.tablename; -``` - -### Querying and selecting - -To query database using a search vector, you can use `search_vector` or `embeddings` in `WHERE` clause - -```sql -SELECT * from weaviate_datasource.test -WHERE search_vector = '[3.0, 1.0, 2.0, 4.5]' -LIMIT 10; -``` - -Basic query - -```sql -SELECT * from weaviate_datasource.test -``` - -You can use `WHERE` clause on dynamic fields like normal SQL - -```sql -SELECT * FROM weaviate_datasource.createtest -WHERE category = "science"; -``` - -### Deleting records - -You can delete entries using `DELETE` just like in SQL. - - -```sql -DELETE FROM weaviate_datasource.test -WHERE id IN (1, 2, 3); -``` - -Update is not supported by mindsdb vector database diff --git a/mindsdb/integrations/handlers/weaviate_handler/__about__.py b/mindsdb/integrations/handlers/weaviate_handler/__about__.py deleted file mode 100644 index 300463b217d..00000000000 --- a/mindsdb/integrations/handlers/weaviate_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Weaviate handler" -__package_name__ = "mindsdb_weaviate_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for weaviate" -__author__ = "Abhijit Pal" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/weaviate_handler/__init__.py b/mindsdb/integrations/handlers/weaviate_handler/__init__.py deleted file mode 100644 index 9d0c4e9cd66..00000000000 --- a/mindsdb/integrations/handlers/weaviate_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version -from .connection_args import connection_args, connection_args_example -try: - from .weaviate_handler import WeaviateDBHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Weaviate" -name = "weaviate" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/weaviate_handler/connection_args.py b/mindsdb/integrations/handlers/weaviate_handler/connection_args.py deleted file mode 100644 index 0ea92e43e8c..00000000000 --- a/mindsdb/integrations/handlers/weaviate_handler/connection_args.py +++ /dev/null @@ -1,29 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - weaviate_url={ - "type": ARG_TYPE.STR, - "description": "weaviate url/ local endpoint", - "required": False, - }, - weaviate_api_key={ - "type": ARG_TYPE.STR, - "description": "weaviate API KEY", - "required": False, - "secret": True - }, - persistence_directory={ - "type": ARG_TYPE.STR, - "description": "persistence directory for weaviate", - "required": False, - }, -) - -connection_args_example = OrderedDict( - weaviate_url="http://localhost:8080", - weaviate_api_key="", - persistence_directory="db_path", -) diff --git a/mindsdb/integrations/handlers/weaviate_handler/icon.svg b/mindsdb/integrations/handlers/weaviate_handler/icon.svg deleted file mode 100644 index b860497982f..00000000000 --- a/mindsdb/integrations/handlers/weaviate_handler/icon.svg +++ /dev/null @@ -1,197 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mindsdb/integrations/handlers/weaviate_handler/requirements.txt b/mindsdb/integrations/handlers/weaviate_handler/requirements.txt deleted file mode 100644 index 86458abd2e3..00000000000 --- a/mindsdb/integrations/handlers/weaviate_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -weaviate-client~=3.24.2 \ No newline at end of file diff --git a/mindsdb/integrations/handlers/weaviate_handler/weaviate_handler.py b/mindsdb/integrations/handlers/weaviate_handler/weaviate_handler.py deleted file mode 100644 index 4afa749e0ba..00000000000 --- a/mindsdb/integrations/handlers/weaviate_handler/weaviate_handler.py +++ /dev/null @@ -1,652 +0,0 @@ -import ast -from datetime import datetime -from typing import List, Optional - -import weaviate -from weaviate.embedded import EmbeddedOptions -import pandas as pd - -from mindsdb.integrations.libs.response import RESPONSE_TYPE -from mindsdb.integrations.libs.response import HandlerResponse -from mindsdb.integrations.libs.response import HandlerResponse as Response -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse -from mindsdb.integrations.libs.vectordatabase_handler import ( - FilterCondition, - FilterOperator, - TableField, - VectorStoreHandler, -) -from mindsdb.utilities import log -from weaviate.util import generate_uuid5 - -logger = log.getLogger(__name__) - - -class WeaviateDBHandler(VectorStoreHandler): - """This handler handles connection and execution of the Weaviate statements.""" - - name = "weaviate" - - def __init__(self, name: str, **kwargs): - super().__init__(name) - - self._connection_data = kwargs.get("connection_data") - - self._client_config = { - "weaviate_url": self._connection_data.get("weaviate_url"), - "weaviate_api_key": self._connection_data.get("weaviate_api_key"), - "persistence_directory": self._connection_data.get("persistence_directory"), - } - - if not ( - self._client_config.get("weaviate_url") - or self._client_config.get("persistence_directory") - ): - raise Exception( - "Either url or persist_directory is required for weaviate connection!" - ) - - self._client = None - self._embedded_options = None - self.is_connected = False - self.connect() - - def _get_client(self) -> weaviate.Client: - if not ( - self._client_config - and ( - self._client_config.get("weaviate_url") - or self._client_config.get("persistence_directory") - ) - ): - raise Exception("Client config is not set! or missing parameters") - - # decide the client type to be used, either persistent or httpclient - if self._client_config.get("persistence_directory"): - self._embedded_options = EmbeddedOptions( - persistence_data_path=self._client_config.get("persistence_directory") - ) - return weaviate.Client(embedded_options=self._embedded_options) - if self._client_config.get("weaviate_api_key"): - return weaviate.Client( - url=self._client_config["weaviate_url"], - auth_client_secret=weaviate.AuthApiKey( - api_key=self._client_config["weaviate_api_key"] - ), - ) - return weaviate.Client(url=self._client_config["weaviate_url"]) - - def __del__(self): - self.is_connected = False - if self._embedded_options: - self._client._connection.embedded_db.stop() - del self._embedded_options - self._embedded_options = None - self._client._connection.close() - if self._client: - del self._client - - def connect(self): - """Connect to a weaviate database.""" - if self.is_connected: - return self._client - - try: - self._client = self._get_client() - self.is_connected = True - return self._client - except Exception as e: - logger.error(f"Error connecting to weaviate client, {e}!") - self.is_connected = False - - def disconnect(self): - """Close the database connection.""" - - if not self.is_connected: - return - if self._embedded_options: - self._client._connection.embedded_db.stop() - del self._embedded_options - del self._client - self._embedded_options = None - self._client = None - self.is_connected = False - - def check_connection(self): - """Check the connection to the Weaviate database.""" - response_code = StatusResponse(False) - - try: - if self._client.is_live(): - response_code.success = True - except Exception as e: - logger.error(f"Error connecting to weaviate , {e}!") - response_code.error_message = str(e) - finally: - if response_code.success and not self.is_connected: - self.disconnect() - if not response_code.success and self.is_connected: - self.is_connected = False - - return response_code - - @staticmethod - def _get_weaviate_operator(operator: FilterOperator) -> str: - mapping = { - FilterOperator.EQUAL: "Equal", - FilterOperator.NOT_EQUAL: "NotEqual", - FilterOperator.LESS_THAN: "LessThan", - FilterOperator.LESS_THAN_OR_EQUAL: "LessThanEqual", - FilterOperator.GREATER_THAN: "GreaterThan", - FilterOperator.GREATER_THAN_OR_EQUAL: "GreaterThanEqual", - FilterOperator.IS_NULL: "IsNull", - FilterOperator.LIKE: "Like", - } - - if operator not in mapping: - raise Exception(f"Operator {operator} is not supported by weaviate!") - - return mapping[operator] - - @staticmethod - def _get_weaviate_value_type(value) -> str: - # https://github.com/weaviate/weaviate-python-client/blob/c760b1d59b2a222e770d53cc257b1bf993a0a592/weaviate/gql/filter.py#L18 - if isinstance(value, list): - value_list_types = { - str: "valueTextList", - int: "valueIntList", - float: "valueIntList", - bool: "valueBooleanList", - } - if not value: - raise Exception("Empty list is not supported") - value_type = value_list_types.get(type(value[0])) - - else: - value_primitive_types = { - str: "valueText", - int: "valueInt", - float: "valueInt", - datetime: "valueDate", - bool: "valueBoolean", - } - value_type = value_primitive_types.get(type(value)) - - if not value_type: - raise Exception(f"Value type {type(value)} is not supported by weaviate!") - - return value_type - - def _translate_condition( - self, - table_name: str, - conditions: List[FilterCondition] = None, - meta_conditions: List[FilterCondition] = None, - ) -> Optional[dict]: - """ - Translate a list of FilterCondition objects a dict that can be used by Weaviate. - E.g., - [ - FilterCondition( - column="metadata.created_at", - op=FilterOperator.LESS_THAN, - value="2020-01-01", - ), - FilterCondition( - column="metadata.created_at", - op=FilterOperator.GREATER_THAN, - value="2019-01-01", - ) - ] - --> - {"operator": "And", - "operands": [ - { - "path": ["created_at"], - "operator": "LessThan", - "valueText": "2020-01-01", - }, - { - "path": ["created_at"], - "operator": "GreaterThan", - "valueInt": "2019-01-01", - }, - ]} - """ - table_name = table_name.capitalize() - metadata_table_name = table_name.capitalize() + "_metadata" - # - if not (conditions or meta_conditions): - return None - - # we translate each condition into a single dict - # conditions on columns - weaviate_conditions = [] - if conditions: - for condition in conditions: - column_key = condition.column - value_type = self._get_weaviate_value_type(condition.value) - weaviate_conditions.append( - { - "path": [column_key], - "operator": self._get_weaviate_operator(condition.op), - value_type: condition.value, - } - ) - # condition on metadata columns - if meta_conditions: - for condition in meta_conditions: - meta_key = condition.column.split(".")[-1] - value_type = self._get_weaviate_value_type(condition.value) - weaviate_conditions.append( - { - "path": [ - "associatedMetadata", - metadata_table_name, - meta_key, - ], - "operator": self._get_weaviate_operator(condition.op), - value_type: condition.value, - } - ) - - # we combine all conditions into a single dict - all_conditions = ( - {"operator": "And", "operands": weaviate_conditions} - # combining all conditions if there are more than one conditions - if len(weaviate_conditions) > 1 - # only a single condition - else weaviate_conditions[0] - ) - return all_conditions - - def select( - self, - table_name: str, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - ): - table_name = table_name.capitalize() - # columns which we will always provide in the result - filters = None - if conditions: - non_metadata_conditions = [ - condition - for condition in conditions - if not condition.column.startswith(TableField.METADATA.value) - and condition.column != TableField.SEARCH_VECTOR.value - and condition.column != TableField.EMBEDDINGS.value - ] - metadata_conditions = [ - condition - for condition in conditions - if condition.column.startswith(TableField.METADATA.value) - ] - filters = self._translate_condition( - table_name, - non_metadata_conditions if non_metadata_conditions else None, - metadata_conditions if metadata_conditions else None, - ) - - # check if embedding vector filter is present - vector_filter = ( - None - if not conditions - else [ - condition - for condition in conditions - if condition.column == TableField.SEARCH_VECTOR.value - or condition.column == TableField.EMBEDDINGS.value - ] - ) - - for col in ["id", "embeddings", "distance", "metadata"]: - if col in columns: - columns.remove(col) - - metadata_table = table_name.capitalize() + "_metadata" - - metadata_fields = " ".join( - [ - prop["name"] - for prop in self._client.schema.get(metadata_table)["properties"] - ] - ) - - # query to get all metadata fields - metadata_query = ( - f"associatedMetadata {{ ... on {metadata_table} {{ {metadata_fields} }} }}" - ) - - if columns: - query = self._client.query.get( - table_name, - columns + [metadata_query], - ).with_additional(["id vector distance"]) - else: - query = self._client.query.get( - table_name, - [metadata_query], - ).with_additional(["id vector distance"]) - if vector_filter: - # similarity search - # assuming the similarity search is on content - # assuming there would be only one vector based search per query - vector_filter = vector_filter[0] - near_vector = { - "vector": ast.literal_eval(vector_filter.value) - if isinstance(vector_filter.value, str) - else vector_filter.value - } - query = query.with_near_vector(near_vector) - if filters: - query = query.with_where(filters) - if limit: - query = query.with_limit(limit) - result = query.do() - result = result["data"]["Get"][table_name.capitalize()] - ids = [query_obj["_additional"]["id"] for query_obj in result] - contents = [query_obj.get("content") for query_obj in result] - distances = [ - query_obj.get("_additional").get("distance") for query_obj in result - ] - # distances will be null for non vector/embedding query - vectors = [query_obj.get("_additional").get("vector") for query_obj in result] - metadatas = [query_obj.get("associatedMetadata")[0] for query_obj in result] - - payload = { - TableField.ID.value: ids, - TableField.CONTENT.value: contents, - TableField.METADATA.value: metadatas, - TableField.EMBEDDINGS.value: vectors, - TableField.DISTANCE.value: distances, - } - - if columns: - payload = { - column: payload[column] - for column in columns + ["id", "embeddings", "distance", "metadata"] - if column != TableField.EMBEDDINGS.value - } - - # always include distance - if distances: - payload[TableField.DISTANCE.value] = distances - result_df = pd.DataFrame(payload) - return result_df - - def insert( - self, table_name: str, data: pd.DataFrame, columns: List[str] = None - ): - """ - Insert data into the Weaviate database. - """ - - table_name = table_name.capitalize() - - # drop columns with all None values - - data.dropna(axis=1, inplace=True) - - data = data.to_dict(orient="records") - # parsing the records one by one as we need to update metadata (which has variable columns) - for record in data: - metadata_data = record.get(TableField.METADATA.value) - data_object = {"content": record.get(TableField.CONTENT.value)} - data_obj_id = ( - record[TableField.ID.value] - if TableField.ID.value in record.keys() - else generate_uuid5(data_object) - ) - obj_id = self._client.data_object.create( - data_object=data_object, - class_name=table_name, - vector=record[TableField.EMBEDDINGS.value], - uuid=data_obj_id, - ) - if metadata_data: - meta_id = self.add_metadata(metadata_data, table_name) - self._client.data_object.reference.add( - from_uuid=obj_id, - from_property_name="associatedMetadata", - to_uuid=meta_id, - ) - - def update( - self, table_name: str, data: pd.DataFrame, columns: List[str] = None - ): - """ - Update data in the weaviate database. - """ - table_name = table_name.capitalize() - metadata_table_name = table_name.capitalize() + "_metadata" - data_list = data.to_dict("records") - for row in data_list: - non_metadata_keys = [ - key - for key in row.keys() - if key and not key.startswith(TableField.METADATA.value) - ] - metadata_keys = [ - key.split(".")[1] - for key in row.keys() - if key and key.startswith(TableField.METADATA.value) - ] - - id_filter = {"path": ["id"], "operator": "Equal", "valueText": row["id"]} - metadata_id_query = f"associatedMetadata {{ ... on {metadata_table_name} {{ _additional {{ id }} }} }}" - result = ( - self._client.query.get(table_name, metadata_id_query) - .with_additional(["id"]) - .with_where(id_filter) - .do() - ) - - metadata_id = result["data"]["Get"][table_name][0]["associatedMetadata"][0][ - "_additional" - ]["id"][0] - # updating table - self._client.data_object.update( - uuid=row["id"], - class_name=table_name, - data_object={key: row[key] for key in non_metadata_keys}, - ) - # updating metadata - self._client.data_object.update( - uuid=metadata_id, - class_name=metadata_table_name, - data_object={key: row[key] for key in metadata_keys}, - ) - - def delete( - self, table_name: str, conditions: List[FilterCondition] = None - ): - table_name = table_name.capitalize() - non_metadata_conditions = [ - condition - for condition in conditions - if not condition.column.startswith(TableField.METADATA.value) - and condition.column != TableField.SEARCH_VECTOR.value - and condition.column != TableField.EMBEDDINGS.value - ] - metadata_conditions = [ - condition - for condition in conditions - if condition.column.startswith(TableField.METADATA.value) - ] - filters = self._translate_condition( - table_name, - non_metadata_conditions if non_metadata_conditions else None, - metadata_conditions if metadata_conditions else None, - ) - if not filters: - raise Exception("Delete query must have at least one condition!") - metadata_table_name = table_name.capitalize() + "_metadata" - # query to get metadata ids - metadata_query = f"associatedMetadata {{ ... on {metadata_table_name} {{ _additional {{ id }} }} }}" - result = ( - self._client.query.get(table_name, metadata_query) - .with_additional(["id"]) - .with_where(filters) - .do() - ) - result = result["data"]["Get"][table_name] - metadata_table_name = table_name.capitalize() + "_metadata" - table_ids = [] - metadata_ids = [] - for i in result: - table_ids.append(i["_additional"]["id"]) - metadata_ids.append(i["associatedMetadata"][0]["_additional"]["id"]) - self._client.batch.delete_objects( - class_name=table_name, - where={ - "path": ["id"], - "operator": "ContainsAny", - "valueTextArray": table_ids, - }, - ) - self._client.batch.delete_objects( - class_name=metadata_table_name, - where={ - "path": ["id"], - "operator": "ContainsAny", - "valueTextArray": metadata_ids, - }, - ) - - def create_table(self, table_name: str, if_not_exists=True): - """ - Create a class with the given name in the weaviate database. - """ - # separate metadata table for each table (as different tables will have different metadata columns) - # this reduces the query time using metadata but increases the insertion time - metadata_table_name = table_name + "_metadata" - if not self._client.schema.exists(metadata_table_name): - self._client.schema.create_class({"class": metadata_table_name}) - if not self._client.schema.exists(table_name): - self._client.schema.create_class( - { - "class": table_name, - "properties": [ - {"dataType": ["text"], "name": prop["name"]} - for prop in self.SCHEMA - if prop["name"] != "id" - and prop["name"] != "embeddings" - and prop["name"] != "metadata" - ], - "vectorIndexType": "hnsw", - } - ) - add_prop = { - "name": "associatedMetadata", - "dataType": [metadata_table_name.capitalize()], - } - self._client.schema.property.create(table_name.capitalize(), add_prop) - - def drop_table(self, table_name: str, if_exists=True): - """ - Delete a class from the weaviate database. - """ - table_name = table_name.capitalize() - metadata_table_name = table_name.capitalize() + "_metadata" - table_id_query = self._client.query.get(table_name).with_additional(["id"]).do() - table_ids = [ - i["_additional"]["id"] for i in table_id_query["data"]["Get"][table_name] - ] - metadata_table_id_query = ( - self._client.query.get(metadata_table_name).with_additional(["id"]).do() - ) - metadata_ids = [ - i["_additional"]["id"] - for i in metadata_table_id_query["data"]["Get"][metadata_table_name] - ] - self._client.batch.delete_objects( - class_name=table_name, - where={ - "path": ["id"], - "operator": "ContainsAny", - "valueTextArray": table_ids, - }, - ) - self._client.batch.delete_objects( - class_name=metadata_table_name, - where={ - "path": ["id"], - "operator": "ContainsAny", - "valueTextArray": metadata_ids, - }, - ) - try: - self._client.schema.delete_class(table_name) - self._client.schema.delete_class(metadata_table_name) - except ValueError: - if not if_exists: - raise Exception(f"Table {table_name} does not exist!") - - def get_tables(self) -> HandlerResponse: - """ - Get the list of tables in the Weaviate database. - """ - query_tables = self._client.schema.get() - tables = [] - if query_tables: - tables = [table["class"] for table in query_tables["classes"]] - table_name = pd.DataFrame( - columns=["table_name"], - data=tables, - ) - return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=table_name) - - def get_columns(self, table_name: str) -> HandlerResponse: - table_name = table_name.capitalize() - # check if table exists - try: - table = self._client.schema.get(table_name) - except ValueError: - return Response( - resp_type=RESPONSE_TYPE.ERROR, - error_message=f"Table {table_name} does not exist!", - ) - data = pd.DataFrame( - data=[ - {"COLUMN_NAME": column["name"], "DATA_TYPE": column["dataType"][0]} - for column in table["properties"] - ] - ) - return Response(data_frame=data, resp_type=RESPONSE_TYPE.OK) - - def add_metadata(self, data: dict, table_name: str): - table_name = table_name.capitalize() - metadata_table_name = table_name.capitalize() + "_metadata" - self._client.schema.get(metadata_table_name) - # getting existing metadata fields - added_prop_list = [ - prop["name"] - for prop in self._client.schema.get(metadata_table_name)["properties"] - ] - # as metadata columns are not fixed, at every entry, a check takes place for the columns - for prop in data.keys(): - if prop not in added_prop_list: - if isinstance(data[prop], int): - add_prop = { - "name": prop, - "dataType": ["int"], - } - elif isinstance(data[prop][0], datetime): - add_prop = { - "name": prop, - "dataType": ["date"], - } - else: - add_prop = { - "name": prop, - "dataType": ["string"], - } - # when a new column is identified, it is added to the metadata table - self._client.schema.property.create(metadata_table_name, add_prop) - metadata_id = self._client.data_object.create( - data_object=data, class_name=table_name.capitalize() + "_metadata" - ) - return metadata_id diff --git a/mindsdb/integrations/handlers/webz_handler/README.md b/mindsdb/integrations/handlers/webz_handler/README.md deleted file mode 100644 index e38a8a19fca..00000000000 --- a/mindsdb/integrations/handlers/webz_handler/README.md +++ /dev/null @@ -1,57 +0,0 @@ -# Webz Handler - -This handler integrates with the [Webz API](https://docs.webz.io/reference#1) to make -webz data available to use for model training, predictions and automations. - - - -## Connect to the Webz API -The first step is to create a database with the new `webz` engine -by passing in the required `token` parameter: - -``` -CREATE DATABASE webz_datasource -WITH - ENGINE = 'webz', - PARAMETERS = { - "token": "" -}; -``` - -## Querying news articles, blogs entries or open discussions - -With the previous established connection, you can for instance, -query the 5 most relevant news articles, in english that contain -the text AI in the title - -``` -SELECT * -FROM webz_datasource.posts -WHERE query="language:english title:AI site_type:news" -ORDER BY posts.relevancy DESC -LIMIT 5; -``` - -The returned results should have rows like this: - -| thread__uuid | thread__url | thread__site_full | thread__site | thread__site_section | thread__section_title | thread__title | thread__title_full | thread__published | thread__replies_count | thread__participants_count | thread__site_type | thread__main_image | thread__country | thread__site_categories | thread__social__facebook__likes | thread__social__facebook__shares | thread__social__facebook__comments | thread__social__gplus__shares | thread__social__pinterest__shares | thread__social__linkedin__shares | thread__social__stumbledupon__shares | thread__social__vk__shares | thread__performance_score | thread__domain_rank | thread__domain_rank_updated | thread__reach__per_million | thread__reach__page_views | thread__reach__updated | uuid | url | ord_in_thread | parent_url | author | published | title | text | language | external_links | external_images | rating | entities__persons | entities__organizations | entities__locations | crawled | -| ------------ | ----------- | ----------------- | ------------ | -------------------- | --------------------- | ------------- | ------------------ | ----------------- | --------------------- | -------------------------- | ----------------- | ------------------ | --------------- | ----------------------- | ------------------------------- | -------------------------------- | ---------------------------------- | ----------------------------- | --------------------------------- | -------------------------------- | ------------------------------------ | -------------------------- | ------------------------- | ------------------- | --------------------------- | -------------------------- | ------------------------- | ---------------------- | ---- | --- | ------------- | ---------- | ------ | --------- | ----- | ---- | -------- | -------------- | --------------- | ------ | ----------------- | ----------------------- | ------------------- | ------- | -| e893796adad8a85e6ab5202ac34b5791c8fbb017 | https://www.economist.com/business/2023/06/06/generative-ai-could-radically-alter-the-practice-of-law | www.economist.com | economist.com | http://feeds.feedburner.com/twitter.com/indiefulrok | BizToc | Generative AI could radically alter the practice of law | Generative AI could radically alter the practice of law | 2023-07-15T09:01:00.000+03:00 | 0 | 0 | news | https://c.biztoc.com/p/f96527e070f97968/s.webp | US | ["media","law_government_and_politics","politics"] | 2169 | 501 | 843 | 0 | 2 | 0 | 0 | 1 | 5 | 253 | 2023-07-11T13:16:20.000+03:00 | [NULL] | [NULL] | [NULL] | e893796adad8a85e6ab5202ac34b5791c8fbb017 | https://www.economist.com/business/2023/06/06/generative-ai-could-radically-alter-the-practice-of-law | 0 | [NULL] | [NULL] | 2023-07-15T09:01:00.000+03:00 | Generative AI could radically alter the practice of law | Generative AI could radically alter the practice of law economist.com/business/2023/06/06/generative-ai-could-radically-alter-the-practice-of-law L a conservative bunch, befitting a profession that rewards preparedness, sagacity and respect for precedent. No doubt many enjoyed a chuckle at the tale of Steven Schwartz, a personal-injury lawyer at the New York firm Levidow, Levidow & Oberman, who last month used Chat to help him prepare a court filing. He relied a bit too heavily on the artificial-intelligence ( )… This story appeared on | english | [] | [] | [NULL] | [{"name":"steven schwartz","sentiment":"none"}] | [{"name":"levidow, levidow & oberman","sentiment":"none"}] | [{"name":"new york","sentiment":"none"}] | 2023-07-15T09:52:32.226+03:00 | - -## Queries reviews - -You can also query the last 10 reviews crawled, in English, -with rating equal or higher than 4 - -``` -SELECT * -FROM webz_datasource.reviews -WHERE query="language:english rating:>=4" -ORDER BY reviews.crawled ASC -LIMIT 10; -``` - -The returned results should have rows like this: - -| item__uuid | item__url | item__site_full | item__site | item__site_section | item__section_title | item__title | item__title_full | item__published | item__reviews_count | item__reviewers_count | item__main_image | item__country | item__site_categories | item__domain_rank | item__domain_rank_updated | uuid | url | ord_in_thread | author | published | title | text | language | external_links | rating | crawled | -| ---------- | --------- | --------------- | ---------- | ------------------ | ------------------- | ----------- | ---------------- | --------------- | ------------------- | --------------------- | ---------------- | ------------- | --------------------- | ----------------- | ------------------------- | ---- | --- | ------------- | ------ | --------- | ----- | ---- | -------- | -------------- | ------ | ------- | diff --git a/mindsdb/integrations/handlers/webz_handler/__about__.py b/mindsdb/integrations/handlers/webz_handler/__about__.py deleted file mode 100644 index 306b450f271..00000000000 --- a/mindsdb/integrations/handlers/webz_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Webz handler" -__package_name__ = "mindsdb_webz_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for the " -__author__ = "MindsDB Inc" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/webz_handler/__init__.py b/mindsdb/integrations/handlers/webz_handler/__init__.py deleted file mode 100644 index 18696d0424e..00000000000 --- a/mindsdb/integrations/handlers/webz_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version - -try: - from .webz_handler import WebzHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Webz" -name = "webz" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/webz_handler/icon.svg b/mindsdb/integrations/handlers/webz_handler/icon.svg deleted file mode 100644 index b133fdfeff9..00000000000 --- a/mindsdb/integrations/handlers/webz_handler/icon.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/webz_handler/requirements.txt b/mindsdb/integrations/handlers/webz_handler/requirements.txt deleted file mode 100644 index 3e2d5fc0709..00000000000 --- a/mindsdb/integrations/handlers/webz_handler/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -# include python libraries required by this handler -webzio==1.0.2 -dotty-dict==1.3.1 diff --git a/mindsdb/integrations/handlers/webz_handler/webz_handler.py b/mindsdb/integrations/handlers/webz_handler/webz_handler.py deleted file mode 100644 index d6a1db80d69..00000000000 --- a/mindsdb/integrations/handlers/webz_handler/webz_handler.py +++ /dev/null @@ -1,175 +0,0 @@ -import os -import time -from typing import Any, Dict - -import pandas as pd -import webzio -from dotty_dict import dotty -from mindsdb_sql_parser import parse_sql - -from mindsdb.integrations.handlers.webz_handler.webz_tables import ( - WebzPostsTable, - WebzReviewsTable, -) -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import HandlerResponse as Response -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse -from mindsdb.utilities import log -from mindsdb.utilities.config import Config - -logger = log.getLogger(__name__) - - -class WebzHandler(APIHandler): - """A class for handling connections and interactions with the Webz API.""" - - API_CALL_EXEC_LIMIT_SECONDS = 60 - AVAILABLE_CONNECTION_ARGUMENTS = ["token"] - - def __init__(self, name: str = None, **kwargs): - """Registers all tables and prepares the handler for an API connection. - - Args: - name: (str): The handler name to use - """ - super().__init__(name) - - args = kwargs.get("connection_data", {}) - self.connection_args = self._read_connection_args(name, **args) - - self.client = None - self.is_connected = False - self.max_page_size = 100 - - self._register_table(WebzPostsTable.TABLE_NAME, WebzPostsTable(self)) - self._register_table(WebzReviewsTable.TABLE_NAME, WebzReviewsTable(self)) - - def _read_connection_args(self, name: str = None, **kwargs) -> Dict[str, Any]: - """Read the connection arguments by following the order of precedence below: - - 1. PARAMETERS object - 2. Environment Variables - 3. MindsDB Config File - - """ - filtered_args = {} - handler_config = Config().get(f"{name.lower()}_handler", {}) - for k in type(self).AVAILABLE_CONNECTION_ARGUMENTS: - if k in kwargs: - filtered_args[k] = kwargs[k] - elif f"{name.upper()}_{k.upper()}" in os.environ: - filtered_args[k] = os.environ[f"{name.upper()}_{k.upper()}"] - elif k in handler_config: - filtered_args[k] = handler_config[k] - return filtered_args - - def connect(self) -> object: - """Set up any connections required by the handler - Should return output of check_connection() method after attempting - connection. Should switch self.is_connected. - Returns: - HandlerStatusResponse - """ - if self.is_connected and self.client is not None: - return self.client - - webzio.config(token=self.connection_args["token"]) - self.client = webzio - self.is_connected = True - return self.client - - def check_connection(self) -> StatusResponse: - """Check connection to the handler - Returns: - HandlerStatusResponse - """ - response = StatusResponse(False) - try: - webzio_client = self.connect() - webzio_client.query("filterWebContent", {"q": "AI", "size": 1}) - response.success = True - except Exception as e: - response.error_message = f"Error connecting to Webz api: {e}." - - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str = None) -> Response: - """Receive raw query and act upon it somehow. - Args: - query (Any): query in native format (str for sql databases, - dict for mongo, api's json etc) - Returns: - HandlerResponse - """ - ast = parse_sql(query) - return self.query(ast) - - def _parse_item(self, item, output_colums): - dotted_item = dotty(item) - return {field.replace(".", "__"): dotted_item[field] for field in output_colums} - - def call_webz_api( - self, method_name: str = None, params: Dict = None - ) -> pd.DataFrame: - """Calls the API method with the given params. - - Returns results as a pandas DataFrame. - - Args: - method_name (str): Method name to call - params (Dict): Params to pass to the API call - """ - table_name = method_name - table = self._tables[table_name] - - client = self.connect() - - left = None - count_results = None - - data = [] - limit_exec_time = time.time() + type(self).API_CALL_EXEC_LIMIT_SECONDS - - if "size" in params: - count_results = params["size"] - - # GET param q is mandatory, so in order to collect all data, - # it's needed to use as a query an asterisk (*) - if "q" not in params: - params["q"] = "*" - - while True: - if time.time() > limit_exec_time: - raise RuntimeError("Handler request timeout error") - - if count_results is not None: - left = count_results - len(data) - if left == 0: - break - elif left < 0: - # got more results that we need - data = data[:left] - break - - if left > self.max_page_size: - params["size"] = self.max_page_size - else: - params["size"] = left - - logger.debug( - f"Calling Webz API: {table.ENDPOINT} with params ({params})" - ) - - output = ( - client.query(table.ENDPOINT, params) - if len(data) == 0 - else client.get_next() - ) - for item in output.get(table_name, []): - data.append(self._parse_item(item, table.OUTPUT_COLUMNS)) - - df = pd.DataFrame(data) - return df diff --git a/mindsdb/integrations/handlers/webz_handler/webz_tables.py b/mindsdb/integrations/handlers/webz_handler/webz_tables.py deleted file mode 100644 index 94cf76bbe1b..00000000000 --- a/mindsdb/integrations/handlers/webz_handler/webz_tables.py +++ /dev/null @@ -1,218 +0,0 @@ -from typing import List - -import pandas as pd -from mindsdb_sql_parser import ast - -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions - - -class WebzBaseAPITable(APITable): - - ENDPOINT = None - OUTPUT_COLUMNS = [] - SORTABLE_COLUMNS = [] - TABLE_NAME = None - - def select(self, query: ast.Select) -> pd.DataFrame: - """Selects data from the API and returns it as a pandas DataFrame - - Returns dataframe representing the API results. - - Args: - query (ast.Select): SQL SELECT query - - """ - conditions = extract_comparison_conditions(query.where) - params = {} - - for op, arg1, arg2 in conditions: - if op != "=": - raise NotImplementedError(f"Unsupported Operator: {op}") - elif arg1 == "query": - params["q"] = arg2 - else: - raise NotImplementedError(f"Unknown clause: {arg1}") - - if query.order_by: - if len(query.order_by) > 1: - raise ValueError("Unsupported to order by multiple fields") - order_item = query.order_by[0] - sort_column = ".".join(order_item.field.parts[1:]) - # make sure that column is sortable - if sort_column not in type(self).SORTABLE_COLUMNS: - raise ValueError(f"Order by unknown column {sort_column}") - params.update({"sort": sort_column, "order": order_item.direction.lower()}) - - if query.limit is not None: - params["size"] = query.limit.value - result = self.handler.call_webz_api( - method_name=type(self).TABLE_NAME, params=params - ) - - # filter targets - columns = [] - for target in query.targets: - if isinstance(target, ast.Star): - columns = self.get_columns() - break - elif isinstance(target, ast.Identifier): - columns.append(target.parts[-1]) - else: - raise NotImplementedError(f"Unknown query target {type(target)}") - - # columns to lower case - columns = [name.lower() for name in columns] - - if len(result) == 0: - return pd.DataFrame([], columns=columns) - - # add absent columns - for col in set(columns) & set(result.columns) ^ set(columns): - result[col] = None - - # filter by columns - result = result[columns] - - # Rename columns - for target in query.targets: - if target.alias: - result.rename( - columns={target.parts[-1]: str(target.alias)}, inplace=True - ) - return result - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - List of columns - - """ - return [column.replace(".", "__") for column in type(self).OUTPUT_COLUMNS] - - -class WebzPostsTable(WebzBaseAPITable): - """To interact with structured posts data from news articles, blog posts and online discussions - provided through the Webz.IO API. - - """ - - ENDPOINT = "filterWebContent" - OUTPUT_COLUMNS = [ - "thread.uuid", - "thread.url", - "thread.site_full", - "thread.site", - "thread.site_section", - "thread.section_title", - "thread.title", - "thread.title_full", - "thread.published", - "thread.replies_count", - "thread.participants_count", - "thread.site_type", - "thread.main_image", - "thread.country", - "thread.site_categories", - "thread.social.facebook.likes", - "thread.social.facebook.shares", - "thread.social.facebook.comments", - "thread.social.gplus.shares", - "thread.social.pinterest.shares", - "thread.social.linkedin.shares", - "thread.social.stumbledupon.shares", - "thread.social.vk.shares", - "thread.performance_score", - "thread.domain_rank", - "thread.domain_rank_updated", - "thread.reach.per_million", - "thread.reach.page_views", - "thread.reach.updated", - "uuid", - "url", - "ord_in_thread", - "parent_url", - "author", - "published", - "title", - "text", - "language", - "external_links", - "external_images", - "rating", - "entities.persons", - "entities.organizations", - "entities.locations", - "crawled", - ] - SORTABLE_COLUMNS = [ - "crawled", - "relevancy", - "social.facebook.likes", - "social.facebook.shares", - "social.facebook.comments", - "social.gplus.shares", - "social.pinterest.shares", - "social.linkedin.shares", - "social.stumbledupon.shares", - "social.vk.shares", - "replies_count", - "participants_count", - "performance_score", - "published", - "thread.published", - "domain_rank", - "ord_in_thread", - "rating", - ] - TABLE_NAME = "posts" - - -class WebzReviewsTable(WebzBaseAPITable): - """To interact with structured reviews data from hundreds of review sites, - provided through the Webz.IO API. - - """ - - ENDPOINT = "reviewFilter" - OUTPUT_COLUMNS = [ - "item.uuid", - "item.url", - "item.site_full", - "item.site", - "item.site_section", - "item.section_title", - "item.title", - "item.title_full", - "item.published", - "item.reviews_count", - "item.reviewers_count", - "item.main_image", - "item.country", - "item.site_categories", - "item.domain_rank", - "item.domain_rank_updated", - "uuid", - "url", - "ord_in_thread", - "author", - "published", - "title", - "text", - "language", - "external_links", - "rating", - "crawled", - ] - SORTABLE_COLUMNS = [ - "crawled", - "relevancy", - "reviews_count", - "reviewers_count", - "spam_score", - "domain_rank", - "ord_in_thread", - "rating", - ] - TABLE_NAME = "reviews" diff --git a/mindsdb/integrations/handlers/whatsapp_handler/README.md b/mindsdb/integrations/handlers/whatsapp_handler/README.md deleted file mode 100644 index c493e0187d9..00000000000 --- a/mindsdb/integrations/handlers/whatsapp_handler/README.md +++ /dev/null @@ -1,80 +0,0 @@ -# WhatsApp Handler - -Whatsapp handler for MindsDB facilitates the ability to send messages to your Whatsapp using Twilio Whastapp APIs and retrieve the conversation history. - ---- - -## About Whatsapp - -WhatsApp is a popular messaging application that allows users to send text messages, voice messages, make voice and video calls, share media files, and more. - -## Whatsapp Handler Implementation - -This handler was implemented using [Twilio Python SDK](https://www.twilio.com/docs/libraries/python). This SDK provides a very effective way to integrate with the Twilio Whatapp API. - -## Whatsapp Handler Initialization - -The Whatsapp handler is initialized with the following parameters: - -- `account_sid`: Twilio Account SID -- `auth_token`: Twilio Authentication Token -- `to_number`: Required a phone number to send text messages to -- `from_number`: Required a phone number to send text messages from - -## How to get your Twilio credentials - -1. Sign up for a Twilio account or log into your existing account. -2. Navigate to the [Twilio Console Dashboard](https://www.twilio.com/console). -3. Here you will find your `ACCOUNT SID` and `AUTH TOKEN`. -4. To get a Twilio phone number, navigate to the "Phone Numbers" section and either use an existing number or buy a new one. -5. Store these as environment variables: `TWILIO_ACCOUNT_SID`, `TWILIO_AUTH_TOKEN`, and `TWILIO_PHONE_NUMBER` respectively. - -## Trying out Whatsapp Conversations with Twilio - -1. Navigate to this [guide](https://www.twilio.com/docs/conversations/use-twilio-sandbox-for-whatsapp) for complete documentation or follow the following steps. -2. Go to the [Twilio Console Dashboard](https://console.twilio.com/us1/develop/conversations/tryout/whatsapp) -3. Select "User-Initiated Conversation" -4. Either open whatsapp and send `join ` or `Scan the QR on the mobile phone`. - -All things are in place now, in order to use the Whatsapp Using MindsDB. - -## Implemented Features - -- Send a Whatsapp Message to a given number with a specified body. -- Fetch the last `n` messages sent or received by the whatsapp number. - -## Example Usage - -```sql --- Creating a Database -CREATE DATABASE whatsapp_test -WITH ENGINE = "whatsapp", -PARAMETERS = { - "account_sid": "YOUR_ACCOUNT_SID", - "auth_token": "YOUR_AUTH_TOKEN" - }; -``` - -You can now run queries as follows: - -```sql --- Get all messages -SELECT * FROM whatsapp_test.messages LIMIT 100; -``` - -```sql --- Get message with sid -SELECT * FROM whatsapp_test.messages where sid="SM375f075778f91b56634ce5d92db249cd"; -``` - -```sql --- filter to and from -SELECT * FROM whatsapp_test.messages where from_number="whatsapp:+14155238886"; -SELECT * FROM whatsapp_test.messages where to_number="whatsapp:+14155238886"; -``` - -```sql --- Send a whatsapp message -INSERT INTO whatsapp_test.messages (body, from_number, to_number) -VALUES('woww, such a cool integration', 'whatsapp:+14155238886', 'whatsapp:+14155238886'); -``` \ No newline at end of file diff --git a/mindsdb/integrations/handlers/whatsapp_handler/__about__.py b/mindsdb/integrations/handlers/whatsapp_handler/__about__.py deleted file mode 100644 index 833253c4b94..00000000000 --- a/mindsdb/integrations/handlers/whatsapp_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB WhatsApp Handler' -__package_name__ = 'mindsdb_whatsapp_handler' -__version__ = '0.0.1' -__description__ = 'MindsDB handler for WhatsApp' -__author__ = 'Tarun Chawla' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/whatsapp_handler/__init__.py b/mindsdb/integrations/handlers/whatsapp_handler/__init__.py deleted file mode 100644 index 0c9851b4fa5..00000000000 --- a/mindsdb/integrations/handlers/whatsapp_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .whatsapp_handler import ( - WhatsAppHandler as Handler - ) - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'WhatsApp' -name = 'whatsapp' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', - 'version', - 'name', - 'type', - 'title', - 'description', - 'import_error', - 'icon_path' -] diff --git a/mindsdb/integrations/handlers/whatsapp_handler/icon.svg b/mindsdb/integrations/handlers/whatsapp_handler/icon.svg deleted file mode 100644 index 4d4987772c8..00000000000 --- a/mindsdb/integrations/handlers/whatsapp_handler/icon.svg +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/whatsapp_handler/requirements.txt b/mindsdb/integrations/handlers/whatsapp_handler/requirements.txt deleted file mode 100644 index 1e2071a390f..00000000000 --- a/mindsdb/integrations/handlers/whatsapp_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -twilio \ No newline at end of file diff --git a/mindsdb/integrations/handlers/whatsapp_handler/whatsapp_handler.py b/mindsdb/integrations/handlers/whatsapp_handler/whatsapp_handler.py deleted file mode 100644 index c6853de0cd6..00000000000 --- a/mindsdb/integrations/handlers/whatsapp_handler/whatsapp_handler.py +++ /dev/null @@ -1,420 +0,0 @@ -import os -from twilio.rest import Client -import re -from datetime import datetime as datetime -from typing import List -import pandas as pd - -from mindsdb.utilities import log -from mindsdb.utilities.config import Config - -from mindsdb_sql_parser import ast -from mindsdb.integrations.utilities.date_utils import parse_local_date - -from mindsdb.integrations.libs.api_handler import APIHandler, APITable - -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions, project_dataframe, filter_dataframe - -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -logger = log.getLogger(__name__) - - -class WhatsAppMessagesTable(APITable): - def select(self, query: ast.Select) -> Response: - """ - Retrieves messages sent/received from the database using Twilio Whatsapp API - Returns - Response: conversation_history - """ - - # Extract comparison conditions from the query - conditions = extract_comparison_conditions(query.where) - params = {} - filters = [] - - # Build the filters and parameters for the query - for op, arg1, arg2 in conditions: - if op == 'or': - raise NotImplementedError('OR is not supported') - - if arg1 == 'sent_at' and arg2 is not None: - date = parse_local_date(arg2) - if op == '>': - params['date_sent_after'] = date - elif op == '<': - params['date_sent_before'] = date - else: - raise NotImplementedError - - # also add to post query filter because date_sent_after=date1 will include date1 - filters.append([op, arg1, arg2]) - - elif arg1 == 'sid': - if op == '=': - params['sid'] = arg2 - else: - NotImplementedError('Only "from_number=" is implemented') - - elif arg1 == 'from_number': - if op == '=': - params['from_number'] = arg2 - else: - NotImplementedError('Only "from_number=" is implemented') - - elif arg1 == 'to_number': - if op == '=': - params['to_number'] = arg2 - else: - NotImplementedError('Only "to_number=" is implemented') - - else: - filters.append([op, arg1, arg2]) - - # Fetch messages based on the filters - result = self.handler.fetch_messages(params, df=True) - - # filter targets - result = filter_dataframe(result, filters) - - # If limit is specified - if query.limit is not None: - result = result[:int(query.limit.value)] - - # project targets - result = project_dataframe(result, query.targets, self.get_columns()) - - return result - - def get_columns(self): - return [ - 'sid', - 'from_number', - 'to_number', - 'body', - 'direction', - 'msg_status', - 'sent_at', # datetime.strptime(str(msg.date_sent), '%Y-%m-%d %H:%M:%S%z'), - 'account_sid', - 'price', - 'price_unit', - 'api_version', - 'uri' - ] - - def insert(self, query: ast.Insert): - """ - Sends a whatsapp message - - Args: - body: message body - from_number: number from which to send the message - to_number: number to which message will be sent - """ - - # get column names and values from the query - columns = [col.name for col in query.columns] - - ret = [] - - insert_params = ["body", "from_number", "to_number"] - for row in query.values: - params = dict(zip(columns, row)) - - # Check text length - max_text_len = 1500 - text = params["body"] - words = re.split('( )', text) - messages = [] - - """ - Regex for matching if any URls are present, if yes then replace with string of hyphens(-) - - Example: - words = ['Check', ' ', 'out', ' ', 'this', ' ', 'cool', ' ', 'website:', ' ', 'https://example.com.', "It's", ' ', 'awesome!'] - - After parsing through regex ('https://example.com') URL is matched - - Final output: - messages = ['Check - out - this - cool - website: ----------------------- "It\'s - awesome!'] - """ - - text2 = '' - pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' - for word in words: - # replace the links in word to string with the length as twitter short url (23) - word2 = re.sub(pattern, '-' * 23, word) - if len(text2) + len(word2) > max_text_len - 3 - 7: # 3 is for ..., 7 is for (10/11) - messages.append(text2.strip()) - - text2 = '' - text2 += word - - # Parse last message - if text2.strip() != '': - messages.append(text2.strip()) - - len_messages = len(messages) - - # Modify message based on the length - for i, text in enumerate(messages): - if i < len_messages - 1: - text += '...' - else: - text += ' ' - - if i >= 1: - text += f'({i + 1}/{len_messages})' - - # Pass parameters and call 'send_message' - params['body'] = text - params_to_send = {key: params[key] for key in insert_params if (key in params)} - ret_row = self.handler.send_message(params_to_send, ret_as_dict=True) - - # Save the results - ret_row['body'] = text - ret.append(ret_row) - - return pd.DataFrame(ret) - - -class WhatsAppHandler(APIHandler): - """ - A class for handling connections and interactions with Twilio WhatsApp API. - Args: - account_sid(str): Accound ID of the twilio account. - auth_token(str): Authentication Token obtained from the twilio account. - """ - - def __init__(self, name=None, **kwargs): - """ - Initializes the connection by checking all the params are provided by the user. - """ - super().__init__(name) - - args = kwargs.get('connection_data', {}) - self.connection_args = {} - handler_config = Config().get('whatsapp_handler', {}) - for k in ['account_sid', 'auth_token']: - if k in args: - self.connection_args[k] = args[k] - elif f'TWILIO_{k.upper()}' in os.environ: - self.connection_args[k] = os.environ[f'TWILIO_{k.upper()}'] - elif k in handler_config: - self.connection_args[k] = handler_config[k] - self.client = None - self.is_connected = False - - messages = WhatsAppMessagesTable(self) - self._register_table('messages', messages) - - def connect(self): - """ - Authenticate with the Twilio API using the provided `account_SID` and `auth_token`. - """ - if self.is_connected is True: - return self.client - - self.client = Client( - self.connection_args['account_sid'], - self.connection_args['auth_token'] - ) - - self.is_connected = True - return self.client - - def check_connection(self) -> StatusResponse: - """ - Checks the connection by performing a basic operation with the Twilio API. - """ - response = StatusResponse(False) - - try: - self.connect() - response.success = True - - except Exception as e: - response.error_message = f'Error connecting to Twilio API: {str(e)}. Check credentials.' - logger.error(response.error_message) - - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def parse_native_query(self, query_string: str): - """Parses the native query string of format method(arg1=val1, arg2=val2, ...) and returns the method name and arguments.""" - - # Adjust regex to account for the possibility of no arguments inside the parenthesis - match = re.match(r'(\w+)\(([^)]*)\)', query_string) - if not match: - raise ValueError(f"Invalid query format: {query_string}") - - method_name = match.group(1) - arg_string = match.group(2) - - # Extract individual arguments - args = {} - if arg_string: # Check if there are any arguments - for arg in arg_string.split(','): - arg = arg.strip() - key, value = arg.split('=') - args[key.strip()] = value.strip() - - return method_name, args - - def native_query(self, query_string: str = None): - """ - Retreievs the native query from the `parse_native_query` and calls appropriate function and returns the result of the query as a Response object. - """ - method_name, params = self.parse_native_query(query_string) - if method_name == 'send_message': - response = self.send_message(params) - else: - raise ValueError(f"Method '{method_name}' not supported by TwilioHandler") - - return response - - def fetch_messages(self, params, df=False): - """ - Gets conversation history - - Returns: - Response: conversation history - """ - limit = int(params.get('limit', 1000)) - sid = params.get('sid', None) - # Convert date strings to datetime objects if provided - date_sent_after = params.get('date_sent_after', None) - date_sent_before = params.get('date_sent_before', None) - # Extract 'from_' and 'body' search criteria from params - from_number = params.get('from_number', None) - to_number = params.get('to_number', None) - args = { - 'limit': limit, - 'date_sent_after': date_sent_after, - 'date_sent_before': date_sent_before, - 'from_': from_number, - 'to': to_number - } - - args = {arg: val for arg, val in args.items() if val is not None} - if sid: - messages = [self.client.messages(sid).fetch()] - else: - messages = self.client.messages.list(**args) - - # Extract all possible properties for each message - data = [] - for msg in messages: - msg_data = { - 'sid': msg.sid, - 'to_number': msg.to, - 'from_number': msg.from_, - 'body': msg.body, - 'direction': msg.direction, - 'msg_status': msg.status, - 'sent_at': msg.date_created.replace(tzinfo=None), - 'account_sid': msg.account_sid, - 'price': msg.price, - 'price_unit': msg.price_unit, - 'api_version': msg.api_version, - 'uri': msg.uri, - # 'media_url': [media.uri for media in msg.media.list()] - # ... Add other properties as needed - } - data.append(msg_data) - - # Create a DataFrame - result_df = pd.DataFrame(data) - - # Filter rows where 'from_number' or 'to_number' begins with 'whatsapp:' - result_df = result_df[result_df['from_number'].str.startswith('whatsapp:') | result_df['to_number'].str.startswith('whatsapp:')] - - if df is True: - return pd.DataFrame(result_df) - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(result_df)) - - def send_message(self, params, ret_as_dict=False) -> Response: - """ - Sends a message to the given Whatsapp number. - - Args: - body: message body - from_number: number from which to send the message - to_number: number to which message will be sent - """ - try: - message = self.client.messages.create( - body=params.get('body'), - to=params.get('to_number'), - from_=params.get('from_number') - ) - - if ret_as_dict is True: - return {"sid": message.sid, "from": message.from_, "to": message.to, "message": message.body, "status": message.status} - - return Response( - RESPONSE_TYPE.MESSAGE, - sid=message.sid, - from_=message.from_, - to=message.to, - body=message.body, - status=message.status - ) - - except Exception as e: - # Log the exception for debugging purposes - logger.error(f"Error sending message: {str(e)}") - logger.exception(f"Error sending message: {str(e)}") - raise Exception("Error sending message") - - def call_whatsapp_api(self, method_name: str = None, params: dict = None): - """ - Calls specific method specified. - - Args: - method_name: to call specific method - params: parameters to call the method - - Returns: - List of dictionaries as a result of the method call - """ - api = self.connect() - method = getattr(api, method_name) - - try: - result = method(**params) - except Exception as e: - error = f"Error calling method '{method_name}' with params '{params}': {e}" - logger.error(error) - raise e - - if 'messages' in result: - result['messages'] = self.convert_channel_data(result['messages']) - - return [result] - - def convert_channel_data(self, messages: List[dict]): - """ - Convert the list of channel dictionaries to a format that can be easily used in the data pipeline. - - Args: - channels: A list of channel dictionaries. - - Returns: - A list of channel dictionaries with modified keys and values. - """ - new_messages = [] - for message in messages: - new_message = { - 'id': message['id'], - 'name': message['name'], - 'created': datetime.fromtimestamp(float(message['created'])) - } - new_messages.append(new_message) - return new_messages diff --git a/mindsdb/integrations/handlers/xata_handler/README.md b/mindsdb/integrations/handlers/xata_handler/README.md deleted file mode 100644 index a592fdc892b..00000000000 --- a/mindsdb/integrations/handlers/xata_handler/README.md +++ /dev/null @@ -1,113 +0,0 @@ -# Xata Handler - -This is the implementation of the Xata for MindsDB. - -## Xata - -Xata is a serverless database platform powered by PostgreSQL. It aims to make the data part easy with the functionality your application needs to evolve and scale. - -## Implementation - -This handler uses `xata` python library connect to a xata instance - -The required arguments to establish a connection are: - -* `db_url`: Xata database url with region, database and, optionally the branch information -* `api_key`: personal Xata API key - -Optional arguments to create a table are: - -* `dimension`: default dimension of embeddings vector used to create table when using create (default=8) - -Optional arguments for vector similarity searches are: - -* `similarity_function`: similarity function to use for vector searches (default=cosineSimilarity) - -## Limitations - -- Performing queries on columns other than vector database specified columns is not supported - - You can use metadata column for general query filtering -- Metadata filtering does not work on vector similarity search queries - -## Usage - -### Create Database - -In order to make use of this handler and connect to a hosted Xata instance in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE xata_test -WITH - ENGINE = 'xata', - PARAMETERS = {{ - "api_key": "...", - "db_url": "..." -}; - -``` - -### Create Table - -You can insert data into a new table like so: - -```sql -CREATE TABLE xata_test.testingtable (SELECT * FROM pg.df) -``` - -The table will have default parameters as specified in `CREATE DATABASE` command - -### Select - -You can query a collection within your Xata as follows: - -```sql -SELECT * FROM xata_test.testingtable -``` - -```sql -SELECT * FROM xata_test.testingtable -WHERE testingtable.metadata.price > 10 AND testingtable.metadata.price <= 100 -``` - -```sql -SELECT * FROM xata_test.testingtable -WHERE content LIKE 'test%' -``` - -### Similarity search - -Search for similar embeddings by specifying search vector. Note that you cannot use metadata column with search vector. - -```sql -SELECT * FROM xata_test.testingtable -WHERE search_vector = '[1.0, 2.0, 3.0]' -``` - -```sql -SELECT * FROM xata_test.testingtable -WHERE search_vector = '[1.0, 2.0, 3.0]' -AND content LIKE 'test%' -``` - -### Insert - -You can insert into table in various ways: - -```sql -INSERT INTO xata_test.testingtable (content,metadata,embeddings) -VALUES ('this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0]') -``` - -```sql -INSERT INTO xata_test.testingtable (content,metadata,embeddings) -SELECT content,metadata,embeddings FROM pg.df2 -``` - -## Delete - -You can delete only using ID and = operator. Deleting non existing records does not have any effect. - -```sql -DELETE FROM xata_test.testingtable -WHERE id = 'id2' -``` diff --git a/mindsdb/integrations/handlers/xata_handler/__about__.py b/mindsdb/integrations/handlers/xata_handler/__about__.py deleted file mode 100644 index 431c35101d4..00000000000 --- a/mindsdb/integrations/handlers/xata_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Xata handler" -__package_name__ = "mindsdb_xata_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Xata" -__author__ = "Aditya Azad" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/xata_handler/__init__.py b/mindsdb/integrations/handlers/xata_handler/__init__.py deleted file mode 100644 index 0a7b2758254..00000000000 --- a/mindsdb/integrations/handlers/xata_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version -from .connection_args import connection_args, connection_args_example -try: - from .xata_handler import XataHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Xata" -name = "xata" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/xata_handler/connection_args.py b/mindsdb/integrations/handlers/xata_handler/connection_args.py deleted file mode 100644 index a096ad4e805..00000000000 --- a/mindsdb/integrations/handlers/xata_handler/connection_args.py +++ /dev/null @@ -1,35 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - db_url={ - "type": ARG_TYPE.STR, - "description": "Xata database url with region, database and, optionally the branch information", - "required": True, - }, - api_key={ - "type": ARG_TYPE.STR, - "description": "personal Xata API key", - "required": True, - "secret": True - }, - dimension={ - "type": ARG_TYPE.INT, - "description": "default dimension of embeddings vector used to create table when using create (default=8)", - "required": False, - }, - similarity_function={ - "type": ARG_TYPE.STR, - "description": "similarity function to use for vector searches (default=cosineSimilarity)", - "required": False, - } -) - -connection_args_example = OrderedDict( - db_url="https://...", - api_key="abc_def...", - dimension=8, - similarity_function="l1" -) diff --git a/mindsdb/integrations/handlers/xata_handler/icon.svg b/mindsdb/integrations/handlers/xata_handler/icon.svg deleted file mode 100644 index cd0d82f60fe..00000000000 --- a/mindsdb/integrations/handlers/xata_handler/icon.svg +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/xata_handler/requirements.txt b/mindsdb/integrations/handlers/xata_handler/requirements.txt deleted file mode 100644 index 061364e420b..00000000000 --- a/mindsdb/integrations/handlers/xata_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -xata diff --git a/mindsdb/integrations/handlers/xata_handler/tests/__init__.py b/mindsdb/integrations/handlers/xata_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/xata_handler/xata_handler.py b/mindsdb/integrations/handlers/xata_handler/xata_handler.py deleted file mode 100644 index 5e5c9e82a0d..00000000000 --- a/mindsdb/integrations/handlers/xata_handler/xata_handler.py +++ /dev/null @@ -1,347 +0,0 @@ -from typing import List, Optional - -import pandas as pd -import json -import xata -from xata.helpers import BulkProcessor - -from mindsdb.integrations.libs.response import RESPONSE_TYPE -from mindsdb.integrations.libs.response import HandlerResponse -from mindsdb.integrations.libs.response import HandlerResponse as Response -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse -from mindsdb.integrations.libs.vectordatabase_handler import ( - FilterCondition, - FilterOperator, - TableField, - VectorStoreHandler, -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class XataHandler(VectorStoreHandler): - """This handler handles connection and execution of the Xata statements.""" - - name = "xata" - - def __init__(self, name: str, **kwargs): - super().__init__(name) - self._connection_data = kwargs.get("connection_data") - self._client_config = { - "db_url": self._connection_data.get("db_url"), - "api_key": self._connection_data.get("api_key"), - } - self._create_table_params = { - "dimension": self._connection_data.get("dimension", 8), - } - self._select_params = { - "similarity_function": self._connection_data.get("similarity_function", "cosineSimilarity"), - } - self._client = None - self.is_connected = False - self.connect() - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self): - """Connect to a Xata database.""" - if self.is_connected is True: - return self._client - try: - self._client = xata.XataClient(**self._client_config) - self.is_connected = True - return self._client - except Exception as e: - logger.error(f"Error connecting to Xata client: {e}!") - self.is_connected = False - - def disconnect(self): - """Close the database connection.""" - if self.is_connected is False: - return - self._client = None - self.is_connected = False - - def check_connection(self): - """Check the connection to the Xata database.""" - response_code = StatusResponse(False) - need_to_close = self.is_connected is False - # NOTE: no direct way to test this - # try getting the user, if it fails, it means that we are not connected - try: - resp = self._client.users().get() - if not resp.is_success(): - raise Exception(resp["message"]) - response_code.success = True - except Exception as e: - logger.error(f"Error connecting to Xata: {e}!") - response_code.error_message = str(e) - finally: - if response_code.success is True and need_to_close: - self.disconnect() - if response_code.success is False and self.is_connected is True: - self.is_connected = False - return response_code - - def create_table(self, table_name: str, if_not_exists=True) -> HandlerResponse: - """Create a table with the given name in the Xata database.""" - - resp = self._client.table().create(table_name) - if not resp.is_success(): - raise Exception(f"Unable to create table {table_name}: {resp['message']}") - resp = self._client.table().set_schema( - table_name=table_name, - payload={ - "columns": [ - { - "name": "embeddings", - "type": "vector", - "vector": {"dimension": self._create_table_params["dimension"]} - }, - {"name": "content", "type": "text"}, - {"name": "metadata", "type": "json"}, - ] - } - ) - if not resp.is_success(): - raise Exception(f"Unable to change schema of table {table_name}: {resp['message']}") - - def drop_table(self, table_name: str, if_exists=True) -> HandlerResponse: - """Delete a table from the Xata database.""" - - resp = self._client.table().delete(table_name) - if not resp.is_success(): - raise Exception(f"Unable to delete table: {resp['message']}") - - def get_columns(self, table_name: str) -> HandlerResponse: - """Get columns of the given table""" - # Vector stores have predefined columns - try: - # But at least try to see if the table is valid - resp = self._client.table().get_columns(table_name) - if not resp.is_success(): - raise Exception(f"Error getting columns: {resp['message']}") - except Exception as e: - return Response( - resp_type=RESPONSE_TYPE.ERROR, - error_message=f"{e}", - ) - return super().get_columns(table_name) - - def get_tables(self) -> HandlerResponse: - """Get the list of tables in the Xata database.""" - try: - table_names = pd.DataFrame( - columns=["TABLE_NAME"], - data=[table_data["name"] for table_data in self._client.branch().get_details()["schema"]["tables"]], - ) - return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=table_names) - except Exception as e: - return Response( - resp_type=RESPONSE_TYPE.ERROR, - error_message=f"Error getting list of tables: {e}", - ) - - def insert(self, table_name: str, data: pd.DataFrame, columns: List[str] = None): - """ Insert data into the Xata database. """ - if columns: - data = data[columns] - # Convert to records - data = data.to_dict("records") - # Convert metadata to json - for row in data: - if "metadata" in row: - row["metadata"] = json.dumps(row["metadata"]) - if len(data) > 1: - # Bulk processing - bp = BulkProcessor(self._client, throw_exception=True) - bp.put_records(table_name, data) - bp.flush_queue() - - elif len(data) == 0: - # Skip - return Response(resp_type=RESPONSE_TYPE.OK) - elif "id" in data[0] and TableField.ID.value in columns: - # If id present - id = data[0]["id"] - rest_of_data = data[0].copy() - del rest_of_data["id"] - - resp = self._client.records().insert_with_id( - table_name=table_name, - record_id=id, - payload=rest_of_data, - create_only=True, - columns=columns - ) - if not resp.is_success(): - raise Exception(resp["message"]) - - else: - # If id not present - resp = self._client.records().insert( - table_name=table_name, - payload=data[0], - columns=columns - ) - if not resp.is_success(): - raise Exception(resp["message"]) - - def update(self, table_name: str, data: pd.DataFrame, columns: List[str] = None) -> HandlerResponse: - """Update data in the Xata database.""" - # Not supported - return super().update(table_name, data, columns) - - def _get_xata_operator(self, operator: FilterOperator) -> str: - """Translate SQL operator to oprator understood by Xata filter language.""" - mapping = { - FilterOperator.EQUAL: "$is", - FilterOperator.NOT_EQUAL: "$isNot", - FilterOperator.LESS_THAN: "$lt", - FilterOperator.LESS_THAN_OR_EQUAL: "$le", - FilterOperator.GREATER_THAN: "$gt", - FilterOperator.GREATER_THAN_OR_EQUAL: "$gte", - FilterOperator.LIKE: "$pattern", - } - if operator not in mapping: - raise Exception(f"Operator '{operator}' is not supported!") - return mapping[operator] - - def _translate_non_vector_conditions(self, conditions: List[FilterCondition]) -> Optional[dict]: - """ - Translate a list of FilterCondition objects a dict that can be used by Xata for filtering. - E.g., - [ - FilterCondition( - column="metadata.price", - op=FilterOperator.LESS_THAN, - value=100, - ), - FilterCondition( - column="metadata.price", - op=FilterOperator.GREATER_THAN, - value=10, - ) - ] - --> - { - "metadata->price" { - "$gt": 10, - "$lt": 100 - }, - } - """ - if not conditions: - return None - # Translate metadata columns - for condition in conditions: - if condition.column.startswith(TableField.METADATA.value): - condition.column = condition.column.replace(".", "->") - # Generate filters - filters = {} - for condition in conditions: - # Skip search vector condition - if condition.column == TableField.SEARCH_VECTOR.value: - continue - current_filter = original_filter = {} - # Special case LIKE: needs pattern translation - if condition.op == FilterOperator.LIKE: - condition.value = condition.value.replace("%", "*").replace("_", "?") - # Generate substatment - current_filter[condition.column] = {self._get_xata_operator(condition.op): condition.value} - # Check for conflicting and insert - for key in original_filter: - if key in filters: - filters[key] = {**filters[key], **original_filter[key]} - else: - filters = {**filters, **original_filter} - return filters if filters else None - - def select(self, table_name: str, columns: List[str] = None, conditions: List[FilterCondition] = None, - offset: int = None, limit: int = None) -> pd.DataFrame: - """Run general query or a vector similarity search and return results.""" - if not columns: - columns = [col["name"] for col in self.SCHEMA] - # Generate filter conditions - filters = self._translate_non_vector_conditions(conditions) - # Check for search vector - search_vector = ( - [] - if conditions is None - else [ - condition.value - for condition in conditions - if condition.column == TableField.SEARCH_VECTOR.value - ] - ) - if len(search_vector) > 0: - search_vector = search_vector[0] - else: - search_vector = None - # Search - results_df = pd.DataFrame(columns) - if search_vector is not None: - # Similarity - - params = { - "queryVector": search_vector, - "column": TableField.EMBEDDINGS.value, - "similarityFunction": self._select_params["similarity_function"] - } - if filters: - params["filter"] = filters - if limit: - params["size"] = limit - results = self._client.data().vector_search(table_name, params) - # Check for errors - if not results.is_success(): - raise Exception(results["message"]) - # Convert result - results_df = pd.DataFrame.from_records(results["records"]) - if "xata" in results_df.columns: - results_df["xata"] = results_df["xata"].apply(lambda x: x["score"]) - results_df.rename({"xata": TableField.DISTANCE.value}, axis=1, inplace=True) - - else: - # General get query - - params = { - "columns": columns if columns else [], - } - if filters: - params["filter"] = filters - if limit or offset: - params["page"] = {} - if limit: - params["page"]["size"] = limit - if offset: - params["page"]["offset"] = offset - results = self._client.data().query(table_name, params) - # Check for errors - if not results.is_success(): - raise Exception(results["message"]) - # Convert result - results_df = pd.DataFrame.from_records(results["records"]) - if "xata" in results_df.columns: - results_df.drop(["xata"], axis=1, inplace=True) - - return results_df - - def delete(self, table_name: str, conditions: List[FilterCondition] = None): - ids = [] - for condition in conditions: - if condition.op == FilterOperator.EQUAL: - ids.append(condition.value) - else: - return Response( - resp_type=RESPONSE_TYPE.ERROR, - error_message="You can only delete using '=' operator ID one at a time!", - ) - - for id in ids: - resp = self._client.records().delete(table_name, id) - if not resp.is_success(): - raise Exception(resp["message"]) diff --git a/mindsdb/integrations/handlers/youtube_handler/README.md b/mindsdb/integrations/handlers/youtube_handler/README.md deleted file mode 100644 index 7122b3efab7..00000000000 --- a/mindsdb/integrations/handlers/youtube_handler/README.md +++ /dev/null @@ -1,74 +0,0 @@ -# Youtube Handler - -Youtube handler for MindsDB provides interfaces to connect with Youtube via APIs and pull the video comments of the particular video. - -## Youtube -Youtube is a social video sharing platform businesses and creators. MindsDB users can deploy the youtube integration to perform NLP on youtube comments. - -## Youtube Handler Initialization - -The Youtube handler is initialized with the following parameters: - -- `youtube_api_token`: Youtube API key to use for authentication - -Please follow this (link)[https://blog.hubspot.com/website/how-to-get-youtube-api-key] to generate the token for -accessing youtube API - -## Implemented Features - -- [x] Youtube comments table - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - -- [x] Youtube channels table - -- [x] Youtube videos table - - -## Example Usage - -The first step is to create a database with the new `Youtube` engine. - -~~~~sql -CREATE DATABASE mindsdb_youtube -WITH ENGINE = 'youtube', -PARAMETERS = { - "youtube_api_token": "" -}; -~~~~ - -Use the established connection to query the comments table - -~~~~sql -SELECT * FROM mindsdb_youtube.comments -WHERE video_id = "raWFGQ20OfA"; -~~~~ - -Advanced queries for the youtube handler - -~~~~sql -SELECT * FROM mindsdb_youtube.comments -WHERE video_id = "raWFGQ20OfA" -ORDER BY display_name ASC -LIMIT 5; -~~~~ - -Given a channel_id, get information about the channel - -~~~~sql -SELECT * FROM mindsdb_youtube.channels -WHERE channel_id="UC-..."; -~~~~ - -Here, `channel_id` column is mandatory in the where clause. - -Get information about any youtube video using video_id: - -~~~~sql -SELECT * FROM mindsdb_youtube.videos -WHERE video_id="id"; -~~~~ - -`video_id` is a mandatory column in the where clause. diff --git a/mindsdb/integrations/handlers/youtube_handler/__about__.py b/mindsdb/integrations/handlers/youtube_handler/__about__.py deleted file mode 100644 index ddc8121a803..00000000000 --- a/mindsdb/integrations/handlers/youtube_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Youtube handler" -__package_name__ = "mindsdb_youtube_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Youtube" -__author__ = "Balaji Seetharaman" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/youtube_handler/__init__.py b/mindsdb/integrations/handlers/youtube_handler/__init__.py deleted file mode 100644 index 97371a99174..00000000000 --- a/mindsdb/integrations/handlers/youtube_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - - -from .__about__ import __version__ as version, __description__ as description - -try: - from .youtube_handler import YoutubeHandler as Handler - from .connection_args import connection_args - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "YouTube" -name = "youtube" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", - "connection_args", -] diff --git a/mindsdb/integrations/handlers/youtube_handler/connection_args.py b/mindsdb/integrations/handlers/youtube_handler/connection_args.py deleted file mode 100644 index 926acb383be..00000000000 --- a/mindsdb/integrations/handlers/youtube_handler/connection_args.py +++ /dev/null @@ -1,32 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - youtube_api_token={ - 'type': ARG_TYPE.STR, - 'description': 'Youtube API Token', - 'label': 'Youtube API Token', - }, - credentials_url={ - 'type': ARG_TYPE.STR, - 'description': 'URL to Service Account Keys', - 'label': 'URL to Service Account Keys', - }, - credentials_file={ - 'type': ARG_TYPE.STR, - 'description': 'Location of Service Account Keys', - 'label': 'Path to Service Account Keys', - }, - credentials={ - 'type': ARG_TYPE.PATH, - 'description': 'Service Account Keys', - 'label': 'Upload Service Account Keys', - }, - code={ - 'type': ARG_TYPE.STR, - 'description': 'Code After Authorisation', - 'label': 'Code After Authorisation', - }, -) diff --git a/mindsdb/integrations/handlers/youtube_handler/icon.svg b/mindsdb/integrations/handlers/youtube_handler/icon.svg deleted file mode 100644 index c9244384ad9..00000000000 --- a/mindsdb/integrations/handlers/youtube_handler/icon.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/youtube_handler/requirements.txt b/mindsdb/integrations/handlers/youtube_handler/requirements.txt deleted file mode 100644 index 7e0220f8fd0..00000000000 --- a/mindsdb/integrations/handlers/youtube_handler/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -google-api-python-client -youtube-transcript-api --r mindsdb/integrations/utilities/handlers/auth_utilities/google/requirements.txt \ No newline at end of file diff --git a/mindsdb/integrations/handlers/youtube_handler/youtube_handler.py b/mindsdb/integrations/handlers/youtube_handler/youtube_handler.py deleted file mode 100644 index d31f666a71b..00000000000 --- a/mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +++ /dev/null @@ -1,128 +0,0 @@ -from mindsdb.integrations.handlers.youtube_handler.youtube_tables import ( - YoutubeCommentsTable, - YoutubeChannelsTable, - YoutubeVideosTable, -) -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - -from mindsdb.utilities.config import Config - -from googleapiclient.discovery import build - -from mindsdb.integrations.utilities.handlers.auth_utilities.google import GoogleUserOAuth2Manager - -DEFAULT_SCOPES = [ - 'https://www.googleapis.com/auth/youtube', - 'https://www.googleapis.com/auth/youtube.force-ssl', - 'https://www.googleapis.com/auth/youtubepartner' -] - -logger = log.getLogger(__name__) - - -class YoutubeHandler(APIHandler): - """Youtube handler implementation""" - - def __init__(self, name=None, **kwargs): - """Initialize the Youtube handler. - Parameters - ---------- - name : str - name of a handler instance - """ - super().__init__(name) - self.connection_data = kwargs.get("connection_data", {}) - self.kwargs = kwargs - - self.parser = parse_sql - self.connection = None - self.is_connected = False - - self.handler_storage = kwargs['handler_storage'] - - self.credentials_url = self.connection_data.get('credentials_url', None) - self.credentials_file = self.connection_data.get('credentials_file', None) - if self.connection_data.get('credentials'): - self.credentials_file = self.connection_data.pop('credentials') - if not self.credentials_file and not self.credentials_url: - # try to get from config - yt_config = Config().get('handlers', {}).get('youtube', {}) - secret_file = yt_config.get('credentials_file') - secret_url = yt_config.get('credentials_url') - if secret_file: - self.credentials_file = secret_file - elif secret_url: - self.credentials_url = secret_url - - self.youtube_api_token = self.connection_data.get('youtube_api_token', None) - - self.scopes = self.connection_data.get('scopes', DEFAULT_SCOPES) - - youtube_video_comments_data = YoutubeCommentsTable(self) - self._register_table("comments", youtube_video_comments_data) - - youtube_channel_data = YoutubeChannelsTable(self) - self._register_table("channels", youtube_channel_data) - - youtube_video_data = YoutubeVideosTable(self) - self._register_table("videos", youtube_video_data) - - def connect(self) -> StatusResponse: - """Set up the connection required by the handler. - Returns - ------- - StatusResponse - connection object - """ - if self.is_connected is True: - return self.connection - - google_oauth2_manager = GoogleUserOAuth2Manager(self.handler_storage, self.scopes, self.credentials_file, self.credentials_url, self.connection_data.get('code')) - creds = google_oauth2_manager.get_oauth2_credentials() - - youtube = build( - "youtube", "v3", developerKey=self.youtube_api_token, credentials=creds - ) - self.connection = youtube - - return self.connection - - def check_connection(self) -> StatusResponse: - """Check connection to the handler. - Returns - ------- - StatusResponse - Status confirmation - """ - response = StatusResponse(False) - - try: - self.connect() - response.success = True - response.copy_storage = True - except Exception as e: - logger.error(f"Error connecting to Youtube API: {e}!") - response.error_message = e - - self.is_connected = response.success - - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - Parameters - ---------- - query : str - query in a native format - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/youtube_handler/youtube_tables.py b/mindsdb/integrations/handlers/youtube_handler/youtube_tables.py deleted file mode 100644 index 7c9b607142c..00000000000 --- a/mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +++ /dev/null @@ -1,575 +0,0 @@ -from typing import List - -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.utilities import log - -from mindsdb_sql_parser import ast -from mindsdb.integrations.utilities.handlers.query_utilities import ( - SELECTQueryParser, - SELECTQueryExecutor, - INSERTQueryParser, -) - -import pandas as pd -import re -from youtube_transcript_api import YouTubeTranscriptApi -from youtube_transcript_api.formatters import JSONFormatter - -logger = log.getLogger(__name__) - - -class YoutubeCommentsTable(APITable): - """Youtube List Comments by video id Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the youtube "commentThreads()" API endpoint - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - Returns - ------- - pd.DataFrame - youtube "commentThreads()" matching the query - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - select_statement_parser = SELECTQueryParser(query, "comments", self.get_columns()) - - ( - selected_columns, - where_conditions, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - channel_id, video_id = None, None - for a_where in where_conditions: - if a_where[1] == "video_id": - if a_where[0] != "=": - raise NotImplementedError("Only '=' operator is supported for video_id column.") - else: - video_id = a_where[2] - elif a_where[1] == "channel_id": - if a_where[0] != "=": - raise NotImplementedError("Only '=' operator is supported for channel_id column.") - else: - channel_id = a_where[2] - - if not video_id and not channel_id: - raise ValueError("Either video_id or channel_id has to be present in where clause.") - - comments_df = self.get_comments(video_id=video_id, channel_id=channel_id) - - select_statement_executor = SELECTQueryExecutor( - comments_df, - selected_columns, - [ - where_condition - for where_condition in where_conditions - if where_condition[1] not in ["video_id", "channel_id"] - ], - order_by_conditions, - result_limit if query.limit else None, - ) - - comments_df = select_statement_executor.execute_query() - - return comments_df - - def insert(self, query: ast.Insert) -> None: - """Inserts data into the YouTube POST /commentThreads API endpoint. - - Parameters - ---------- - query : ast.Insert - Given SQL INSERT query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - insert_query_parser = INSERTQueryParser(query, self.get_columns()) - - values_to_insert = insert_query_parser.parse_query() - - for value in values_to_insert: - if not value.get("comment_id"): - if not value.get("comment"): - raise ValueError("comment is mandatory for inserting a top-level comment.") - else: - self.insert_comment(video_id=value["video_id"], text=value["comment"]) - - else: - if not value.get("reply"): - raise ValueError("reply is mandatory for inserting a reply.") - else: - self.insert_comment(comment_id=value["comment_id"], text=value["reply"]) - - def insert_comment(self, text, video_id: str = None, comment_id: str = None): - # if comment_id is provided, define the request body for a reply and insert it - if comment_id: - request_body = {"snippet": {"parentId": comment_id, "textOriginal": text}} - - self.handler.connect().comments().insert(part="snippet", body=request_body).execute() - - # else if video_id is provided, define the request body for a top-level comment and insert it - elif video_id: - request_body = {"snippet": {"topLevelComment": {"snippet": {"videoId": video_id, "textOriginal": text}}}} - - self.handler.connect().commentThreads().insert(part="snippet", body=request_body).execute() - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - Returns - ------- - List[str] - List of columns - """ - return [ - "comment_id", - "channel_id", - "video_id", - "user_id", - "display_name", - "comment", - "published_at", - "updated_at", - "reply_user_id", - "reply_author", - "reply", - ] - - def get_comments(self, video_id: str, channel_id: str): - """Pulls all the records from the given youtube api end point and returns it select() - - Returns - ------- - pd.DataFrame of all the records of the "commentThreads()" API end point - """ - - if video_id and channel_id: - channel_id = None - - resource = ( - self.handler.connect() - .commentThreads() - .list( - part="snippet, replies", - videoId=video_id, - allThreadsRelatedToChannelId=channel_id, - textFormat="plainText", - ) - ) - - data = [] - while resource: - comments = resource.execute() - - for comment in comments["items"]: - replies = [] - if "replies" in comment: - for reply in comment["replies"]["comments"]: - replies.append( - { - "reply_author": reply["snippet"]["authorDisplayName"], - "user_id": reply["snippet"]["authorChannelId"]["value"], - "reply": reply["snippet"]["textOriginal"], - } - ) - else: - replies.append( - { - "reply_author": None, - "user_id": None, - "reply": None, - } - ) - - data.append( - { - "channel_id": comment["snippet"]["channelId"], - "video_id": comment["snippet"]["videoId"], - "user_id": comment["snippet"]["topLevelComment"]["snippet"]["authorChannelId"]["value"], - "comment_id": comment["snippet"]["topLevelComment"]["id"], - "display_name": comment["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"], - "comment": comment["snippet"]["topLevelComment"]["snippet"]["textDisplay"], - "published_at": comment["snippet"]["topLevelComment"]["snippet"]["publishedAt"], - "updated_at": comment["snippet"]["topLevelComment"]["snippet"]["updatedAt"], - "replies": replies, - } - ) - - if "nextPageToken" in comments: - resource = ( - self.handler.connect() - .commentThreads() - .list( - part="snippet, replies", - videoId=video_id, - allThreadsRelatedToChannelId=channel_id, - textFormat="plainText", - pageToken=comments["nextPageToken"], - ) - ) - else: - break - - youtube_comments_df = pd.json_normalize( - data, - "replies", - [ - "comment_id", - "channel_id", - "video_id", - "user_id", - "display_name", - "comment", - "published_at", - "updated_at", - ], - record_prefix="replies.", - ) - youtube_comments_df = youtube_comments_df.rename( - columns={ - "replies.user_id": "reply_user_id", - "replies.reply_author": "reply_author", - "replies.reply": "reply", - } - ) - - # check if DataFrame is empty - if youtube_comments_df.empty: - return youtube_comments_df - else: - return youtube_comments_df[ - [ - "comment_id", - "channel_id", - "video_id", - "user_id", - "display_name", - "comment", - "published_at", - "updated_at", - "reply_user_id", - "reply_author", - "reply", - ] - ] - - -class YoutubeChannelsTable(APITable): - """Youtube Channel Info by channel id Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - select_statement_parser = SELECTQueryParser(query, "channel", self.get_columns()) - - ( - selected_columns, - where_conditions, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - channel_id = None - for op, arg1, arg2 in where_conditions: - if arg1 == "channel_id": - if op == "=": - channel_id = arg2 - break - else: - raise NotImplementedError("Only '=' operator is supported for channel_id column.") - - if not channel_id: - raise NotImplementedError("channel_id has to be present in where clause.") - - channel_df = self.get_channel_details(channel_id) - - select_statement_executor = SELECTQueryExecutor( - channel_df, - selected_columns, - [where_condition for where_condition in where_conditions if where_condition[1] == "channel_id"], - order_by_conditions, - result_limit if query.limit else None, - ) - - channel_df = select_statement_executor.execute_query() - - return channel_df - - def get_channel_details(self, channel_id): - details = ( - self.handler.connect().channels().list(part="statistics,snippet,contentDetails", id=channel_id).execute() - ) - snippet = details["items"][0]["snippet"] - statistics = details["items"][0]["statistics"] - data = { - "country": snippet["country"], - "description": snippet["description"], - "creation_date": snippet["publishedAt"], - "title": snippet["title"], - "subscriber_count": statistics["subscriberCount"], - "video_count": statistics["videoCount"], - "view_count": statistics["viewCount"], - "channel_id": channel_id, - } - return pd.json_normalize(data) - - def get_columns(self) -> List[str]: - return [ - "country", - "description", - "creation_date", - "title", - "subscriber_count", - "video_count", - "view_count", - "channel_id", - ] - - -class YoutubeVideosTable(APITable): - """Youtube Video info by video id Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - select_statement_parser = SELECTQueryParser(query, "video", self.get_columns()) - - ( - selected_columns, - where_conditions, - order_by_conditions, - result_limit, - ) = select_statement_parser.parse_query() - - video_id, channel_id, search_query = None, None, None - for op, arg1, arg2 in where_conditions: - if arg1 == "video_id": - if op == "=": - video_id = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for video_id column.") - - elif arg1 == "channel_id": - if op == "=": - channel_id = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for channel_id column.") - - elif arg1 == "query": - if op == "=": - search_query = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for query column.") - - if not video_id and not channel_id and not search_query: - raise ValueError("At least one of video_id, channel_id, or query must be present in the WHERE clause.") - - if video_id: - video_df = self.get_videos_by_video_ids([video_id]) - elif channel_id and search_query: - video_df = self.get_videos_by_search_query_in_channel(search_query, channel_id, result_limit) - elif channel_id: - video_df = self.get_videos_by_channel_id(channel_id, result_limit) - else: - video_df = self.get_videos_by_search_query(search_query, result_limit) - - select_statement_executor = SELECTQueryExecutor( - video_df, - selected_columns, - [ - where_condition - for where_condition in where_conditions - if where_condition[1] not in ["video_id", "channel_id", "query"] - ], - order_by_conditions, - result_limit if query.limit else None, - ) - - video_df = select_statement_executor.execute_query() - - return video_df - - def get_videos_by_search_query(self, search_query, limit=10): - video_ids = [] - resource = ( - self.handler.connect() - .search() - .list(part="snippet", q=search_query, type="video", maxResults=min(50, limit)) - ) - total_fetched = 0 - - while resource and total_fetched < limit: - response = resource.execute() - for item in response["items"]: - video_ids.append(item["id"]["videoId"]) - total_fetched += 1 - if total_fetched >= limit: - break - - if "nextPageToken" in response and total_fetched < limit: - resource = ( - self.handler.connect() - .search() - .list( - part="snippet", - q=search_query, - type="video", - maxResults=min(50, limit - total_fetched), - pageToken=response["nextPageToken"], - ) - ) - else: - break - - return self.get_videos_by_video_ids(video_ids) - - def get_videos_by_search_query_in_channel(self, search_query, channel_id, limit=10): - """Search for videos within a specific channel""" - video_ids = [] - resource = ( - self.handler.connect() - .search() - .list(part="snippet", q=search_query, channelId=channel_id, type="video", maxResults=min(50, limit)) - ) - total_fetched = 0 - - while resource and total_fetched < limit: - response = resource.execute() - for item in response["items"]: - video_ids.append(item["id"]["videoId"]) - total_fetched += 1 - if total_fetched >= limit: - break - - if "nextPageToken" in response and total_fetched < limit: - resource = ( - self.handler.connect() - .search() - .list( - part="snippet", - q=search_query, - channelId=channel_id, - type="video", - maxResults=min(50, limit - total_fetched), - pageToken=response["nextPageToken"], - ) - ) - else: - break - - return self.get_videos_by_video_ids(video_ids) - - def get_videos_by_channel_id(self, channel_id, limit=10): - video_ids = [] - resource = ( - self.handler.connect() - .search() - .list(part="snippet", channelId=channel_id, type="video", maxResults=min(50, limit)) - ) - total_fetched = 0 - while resource and total_fetched < limit: - response = resource.execute() - for item in response["items"]: - video_ids.append(item["id"]["videoId"]) - total_fetched += 1 - if total_fetched >= limit: - break - if "nextPageToken" in response and total_fetched < limit: - resource = ( - self.handler.connect() - .search() - .list( - part="snippet", - channelId=channel_id, - type="video", - maxResults=min(50, limit - total_fetched), - pageToken=response["nextPageToken"], - ) - ) - else: - break - - return self.get_videos_by_video_ids(video_ids) - - def get_videos_by_video_ids(self, video_ids): - data = [] - - if not isinstance(video_ids, list): - logger.error(f"video_ids must be a list. Received {type(video_ids)} instead.") - return pd.DataFrame() - - # loop over 50 video ids at a time - # an invalid request error is caused otherwise - for i in range(0, len(video_ids), 50): - resource = ( - self.handler.connect() - .videos() - .list(part="statistics,snippet,contentDetails", id=",".join(video_ids[i : i + 50])) - .execute() - ) - - for item in resource["items"]: - data.append( - { - "channel_id": item["snippet"]["channelId"], - "channel_title": item["snippet"]["channelTitle"], - "comment_count": item["statistics"]["commentCount"], - "description": item["snippet"]["description"], - "like_count": item["statistics"]["likeCount"], - "publish_time": item["snippet"]["publishedAt"], - "title": item["snippet"]["title"], - "transcript": self.get_captions_by_video_id(item["id"]), - "video_id": item["id"], - "view_count": item["statistics"]["viewCount"], - "duration_str": self.parse_duration(item["id"], item["contentDetails"]["duration"]), - } - ) - - return pd.json_normalize(data) - - def get_captions_by_video_id(self, video_id): - try: - transcript_response = YouTubeTranscriptApi.get_transcript(video_id, preserve_formatting=True) - json_formatted_transcript = JSONFormatter().format_transcript(transcript_response, indent=2) - return json_formatted_transcript - - except Exception as e: - (logger.error(f"Encountered an error while fetching transcripts for video ${video_id}: ${e}"),) - return "Transcript not available for this video" - - def parse_duration(self, video_id, duration): - try: - parsed_duration = re.search(r"PT(\d+H)?(\d+M)?(\d+S)", duration).groups() - duration_str = "" - for d in parsed_duration: - if d: - duration_str += f"{d[:-1]}:" - - return duration_str.strip(":") - except Exception as e: - (logger.error(f"Encountered an error while parsing duration for video ${video_id}: ${e}"),) - return "Duration not available for this video" - - def get_columns(self) -> List[str]: - return [ - "channel_id", - "channel_title", - "title", - "description", - "publish_time", - "comment_count", - "like_count", - "view_count", - "video_id", - "duration_str", - "transcript", - ] diff --git a/mindsdb/integrations/handlers/yugabyte_handler/README.md b/mindsdb/integrations/handlers/yugabyte_handler/README.md deleted file mode 100644 index d4609edb2a2..00000000000 --- a/mindsdb/integrations/handlers/yugabyte_handler/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# YugabyteDB Handler - -This is the implementation of the YugabyteDB handler for MindsDB. - -## YugabyteDB - -YugabyteDB is a high-performance, cloud-native distributed SQL database that aims to support all PostgreSQL features. It is best to fit for cloud-native OLTP (i.e. real-time, business-critical) applications that need absolute data correctness and require at least one of the following: scalability, high tolerance to failures, or globally-distributed deployments. - -## Implementation -This handler was implemented using the `psycopg`, a Python library that allows you to use Python code to run SQL commands on YugabyteDB. - -The required arguments to establish a connection are, -* `user`: username asscociated with database -* `password`: password to authenticate your access -* `host`: host to server IP Address or hostname -* `port`: port through which TCP/IP connection is to be made -* `database`: Database name to be connected -* `schema`(OPTIONAL): comma seperated schemas to be considered for querying (e.g., "**class,company**") -* `sslmode`(OPTIONAL): Specifies the SSL mode for the connection, determining whether to use SSL encryption and the level of verification required (e.g., "**disable**", "**allow**", "**prefer**", "**require**", "**verify-ca**", "**verify-full**"). - - -## Usage - -In order to make use of this handler and connect to yugabyte in MindsDB, the following syntax can be used, - -```sql -CREATE DATABASE yugabyte_datasource -WITH -engine='yugabyte', -parameters={ - "user":"admin", - "password":"1234", - "host":"127.0.0.1", - "port":5433, - "database":"yugabyte", - "schema":"your_schema_name" -}; -``` - -Now, you can use this established connection to query your database as follows, - -```sql -SELECT * FROM yugabyte_datasource.demo; -``` - -NOTE : If you are using YugabyteDB Cloud with MindsDB Cloud website you need to add below 3 static IPs of MindsDB Cloud to `allow IP list` for accessing it publicly. -``` -18.220.205.95 -3.19.152.46 -52.14.91.162 -``` -![public](https://github-production-user-asset-6210df.s3.amazonaws.com/75653580/238903548-1b054591-f5db-4a6d-a3d0-d048671e4cfa.png) - diff --git a/mindsdb/integrations/handlers/yugabyte_handler/__about__.py b/mindsdb/integrations/handlers/yugabyte_handler/__about__.py deleted file mode 100644 index 50e7bad5a6e..00000000000 --- a/mindsdb/integrations/handlers/yugabyte_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB YugabyteDB handler' -__package_name__ = 'mindsdb_yugabyte_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for YugabyteDB" -__author__ = 'Parthiv Makwana' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/yugabyte_handler/__init__.py b/mindsdb/integrations/handlers/yugabyte_handler/__init__.py deleted file mode 100644 index e8c9d0d9f78..00000000000 --- a/mindsdb/integrations/handlers/yugabyte_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .yugabyte_handler import YugabyteHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'YugabyteDB' -name = 'yugabyte' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', - 'description', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/yugabyte_handler/icon.svg b/mindsdb/integrations/handlers/yugabyte_handler/icon.svg deleted file mode 100644 index c0445a44037..00000000000 --- a/mindsdb/integrations/handlers/yugabyte_handler/icon.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/yugabyte_handler/tests/__init__.py b/mindsdb/integrations/handlers/yugabyte_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/yugabyte_handler/tests/test_yugabyte_handler.py b/mindsdb/integrations/handlers/yugabyte_handler/tests/test_yugabyte_handler.py deleted file mode 100644 index 278c041761a..00000000000 --- a/mindsdb/integrations/handlers/yugabyte_handler/tests/test_yugabyte_handler.py +++ /dev/null @@ -1,49 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.yugabyte_handler.yugabyte_handler import YugabyteHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class YugabyteHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "host": "localhost", - "port": 5433, - "user": "admin", - "password": "", - "database": "yugabyte" - } - } - cls.handler = YugabyteHandler('test_yugabyte_handler', cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_drop_table(self): - res = self.handler.query("DROP TABLE IF EXISTS PREM;") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_2_create_table(self): - res = self.handler.query("CREATE TABLE IF NOT EXISTS PREM (Premi varchar(50));") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_3_insert_table(self): - res = self.handler.query("INSERT INTO PREM VALUES('Radha <3 Krishna');") - assert res.type is not RESPONSE_TYPE.ERROR - - def test_4_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_5_select_query(self): - query = "SELECT * FROM PREM;" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE or RESPONSE_TYPE.OK - - def test_6_check_connection(self): - self.handler.check_connection() - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/yugabyte_handler/yugabyte_handler.py b/mindsdb/integrations/handlers/yugabyte_handler/yugabyte_handler.py deleted file mode 100644 index e9e8dbc4c3c..00000000000 --- a/mindsdb/integrations/handlers/yugabyte_handler/yugabyte_handler.py +++ /dev/null @@ -1,54 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.handlers.postgres_handler.postgres_handler import ( - PostgresHandler, -) -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -class YugabyteHandler(PostgresHandler): - """ - This handler handles connection and execution of the YugabyteSQL statements. - """ - - name = 'yugabyte' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the YugabyteDB server/database.', - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the YugabyteDB server.', - }, - password={ - 'type': ARG_TYPE.STR, - 'description': 'The password to authenticate the user with the YugabyteDB server.', - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'Specify port to connect YugabyteDB server', - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'Specify database name to connect YugabyteDB server', - }, - schema={ - 'type': ARG_TYPE.STR, - 'description': '(OPTIONAL) comma seperated value of schema to be considered while querying', - }, - sslmode={ - 'type': ARG_TYPE.STR, - 'description': ''' (OPTIONAL) Specifies the SSL mode for the connection, determining whether to use SSL encryption and the level of verification required - (e.g., "**disable**", "**allow**", "**prefer**", "**require**", "**verify-ca**", "**verify-full**")''', - }, -) - -connection_args_example = OrderedDict( - host='127.0.0.1', port=5433, password='', user='admin', database='yugabyte' -) diff --git a/mindsdb/integrations/handlers/zendesk_handler/README.md b/mindsdb/integrations/handlers/zendesk_handler/README.md deleted file mode 100644 index e4d2e619746..00000000000 --- a/mindsdb/integrations/handlers/zendesk_handler/README.md +++ /dev/null @@ -1,72 +0,0 @@ ---- -title: Zendesk -sidebarTitle: Zendesk ---- - -This documentation describes the integration of MindsDB with [Zendesk](https://www.zendesk.com/), which provides software-as-a-service products related to customer support, sales, and other customer communications. - -The integration allows MindsDB to access data from Zendesk and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect Zendesk to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to Zendesk from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/zendesk_handler) as an engine. - -```sql -CREATE DATABASE zendesk_datasource -WITH - ENGINE = 'zendesk', - PARAMETERS = { - "api_key":"api_key", - "sub_domain": "sub_domain", - "email":"email" - }; -``` - -Required connection parameters include the following: - -* `api_key`: The api key for the Zendesk account. -* `sub_domain`: The sub domain for the Zendesk account. -* `email`: The email ID of the account. - - -For enabling, generating and deleting API access, refer [Managing access to the Zendesk API](https://support.zendesk.com/hc/en-us/articles/4408889192858-Managing-access-to-the-Zendesk-API) - - -## Usage - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM zendesk_datasource.table_name -LIMIT 10; -``` - -Retrieve data for a specific ticket by providing the id: - -```sql -SELECT * -FROM zendesk_datasource.tickets -where id=""; -``` - - - -The above examples utilize `zendesk_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Supported Tables - -The Zendesk integration supports the following tables: - -* `users` : The table lists all the users. -* `tickets` : The table lists all the tickets. -* `triggers` : The table lists all the triggers. -* `activities` : The table lists all the activities. \ No newline at end of file diff --git a/mindsdb/integrations/handlers/zendesk_handler/__about__.py b/mindsdb/integrations/handlers/zendesk_handler/__about__.py deleted file mode 100644 index f190718159d..00000000000 --- a/mindsdb/integrations/handlers/zendesk_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Zendesk handler" -__package_name__ = "mindsdb_zendesk_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Zendesk" -__author__ = "Abhilash K R" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/zendesk_handler/__init__.py b/mindsdb/integrations/handlers/zendesk_handler/__init__.py deleted file mode 100644 index e056be25fce..00000000000 --- a/mindsdb/integrations/handlers/zendesk_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .zendesk_handler import ZendeskHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Zendesk" -name = "zendesk" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", - "connection_args_example", - "connection_args", -] diff --git a/mindsdb/integrations/handlers/zendesk_handler/connection_args.py b/mindsdb/integrations/handlers/zendesk_handler/connection_args.py deleted file mode 100644 index cbc791dfdb5..00000000000 --- a/mindsdb/integrations/handlers/zendesk_handler/connection_args.py +++ /dev/null @@ -1,33 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - api_key={ - "type": ARG_TYPE.STR, - "description": "API key", - "required": True, - "label": "api_key", - "secret": True - }, - sub_domain={ - "type": ARG_TYPE.STR, - "description": "Sub-domain", - "required": True, - "label": "sub_domain", - "secret": True - }, - email={ - "type": ARG_TYPE.STR, - "description": "Email ID", - "required": True, - "label": "email" - } -) - -connection_args_example = OrderedDict( - api_key="api_key", - sub_domain="sub_domain", - email="email" -) diff --git a/mindsdb/integrations/handlers/zendesk_handler/icon.svg b/mindsdb/integrations/handlers/zendesk_handler/icon.svg deleted file mode 100644 index 38967391a24..00000000000 --- a/mindsdb/integrations/handlers/zendesk_handler/icon.svg +++ /dev/null @@ -1 +0,0 @@ -golion-z-sourcefile-algae \ No newline at end of file diff --git a/mindsdb/integrations/handlers/zendesk_handler/requirements.txt b/mindsdb/integrations/handlers/zendesk_handler/requirements.txt deleted file mode 100644 index ad3501b591d..00000000000 --- a/mindsdb/integrations/handlers/zendesk_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -zenpy \ No newline at end of file diff --git a/mindsdb/integrations/handlers/zendesk_handler/zendesk_handler.py b/mindsdb/integrations/handlers/zendesk_handler/zendesk_handler.py deleted file mode 100644 index b820f948ac8..00000000000 --- a/mindsdb/integrations/handlers/zendesk_handler/zendesk_handler.py +++ /dev/null @@ -1,89 +0,0 @@ -from mindsdb_sql_parser import parse_sql - -from mindsdb.integrations.handlers.zendesk_handler.zendesk_tables import ( - ZendeskUsersTable, - ZendeskTicketsTable, - ZendeskTriggersTable, - ZendeskActivitiesTable -) -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) -from mindsdb.utilities import log -import zenpy - -logger = log.getLogger(__name__) - - -class ZendeskHandler(APIHandler): - """The Zendesk handler implementation""" - - def __init__(self, name: str, **kwargs): - """Initialize the zendesk handler. - - Parameters - ---------- - name : str - name of a handler instance - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.kwargs = kwargs - self.zen_client = None - self.is_connected = False - - self._register_table("users", ZendeskUsersTable(self)) - self._register_table("tickets", ZendeskTicketsTable(self)) - self._register_table("triggers", ZendeskTriggersTable(self)) - self._register_table("activities", ZendeskActivitiesTable(self)) - - def connect(self) -> StatusResponse: - """Set up the connection required by the handler. - - Returns - ------- - StatusResponse - connection object - """ - resp = StatusResponse(False) - self.zen_client = zenpy.Zenpy(subdomain=self.connection_data["sub_domain"], email=self.connection_data["email"], token=self.connection_data["api_key"]) - try: - self.zen_client.users() - self.is_connected = True - resp.success = True - except Exception as ex: - resp.success = False - resp.error_message = str(ex) - self.is_connected = False - return resp - - def check_connection(self) -> StatusResponse: - """Check connection to the handler. - - Returns - ------- - StatusResponse - Status confirmation - """ - response = self.connect() - self.is_connected = response.success - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - - Parameters - ---------- - query : str - query in a native format - - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py b/mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py deleted file mode 100644 index 8487b9a940c..00000000000 --- a/mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py +++ /dev/null @@ -1,496 +0,0 @@ -import pandas as pd -from typing import List -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.handlers.query_utilities import ( - SELECTQueryParser, - SELECTQueryExecutor, -) -from mindsdb.utilities import log -from mindsdb_sql_parser import ast -import zenpy - -logger = log.getLogger(__name__) - - -class ZendeskUsersTable(APITable): - """Zendesk Users Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the zendesk list users API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Zendesk users - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser(query, "users", self.get_columns()) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - subset_where_conditions = [] - api_filters = {} - for op, arg1, arg2 in where_conditions: - if arg1 in self.get_columns(): - if op != "=": - raise NotImplementedError(f"Unknown op: {op}. Only '=' is supported.") - api_filters[arg1] = arg2 - subset_where_conditions.append([op, arg1, arg2]) - - result = self.handler.zen_client.users(**api_filters) - response = [] - if isinstance(result, zenpy.lib.generator.BaseResultGenerator): - for user in result: - response.append(user.to_dict()) - else: - response.append(result.to_dict()) - - df = pd.DataFrame(response, columns=self.get_columns()) - - select_statement_executor = SELECTQueryExecutor( - df, selected_columns, subset_where_conditions, order_by_conditions, result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "active", - "alias", - "chat_only", - "created_at", - "custom_role_id", - "details", - "email", - "external_id", - "id", - "last_login_at", - "locale", - "locale_id", - "moderator", - "name", - "notes", - "only_private_comments", - "organization_id", - "phone", - "photo", - "restricted_agent", - "role", - "shared", - "shared_agent", - "signature", - "suspended", - "tags", - "ticket_restriction", - "time_zone", - "two_factor_auth_enabled", - "updated_at", - "url", - "verified", - "iana_time_zone", - "shared_phone_number", - "role_type", - "default_group_id", - "report_csv", - ] - - -class ZendeskTicketsTable(APITable): - """Zendesk tickets Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the zendesk tickets API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Ticket ID Data - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser(query, "tickets", self.get_columns()) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - subset_where_conditions = [] - api_filters = {} - for op, arg1, arg2 in where_conditions: - if arg1 in self.get_columns(): - if op != "=": - raise NotImplementedError(f"Unknown op: {op}. Only '=' is supported.") - api_filters[arg1] = arg2 - subset_where_conditions.append([op, arg1, arg2]) - - result = self.handler.zen_client.tickets(**api_filters) - response = [] - if isinstance(result, zenpy.lib.generator.BaseResultGenerator): - for ticket in result: - response.append(ticket.to_dict()) - else: - response.append(result.to_dict()) - - df = pd.DataFrame(response, columns=self.get_columns()) - - select_statement_executor = SELECTQueryExecutor( - df, selected_columns, subset_where_conditions, order_by_conditions, result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "assignee_id", - "brand_id", - "collaborator_ids", - "created_at", - "custom_fields", - "description", - "due_at", - "external_id", - "fields", - "forum_topic_id", - "group_id", - "has_incidents", - "id", - "organization_id", - "priority", - "problem_id", - "raw_subject", - "recipient", - "requester_id", - "sharing_agreement_ids", - "status", - "subject", - "submitter_id", - "tags", - "type", - "updated_at", - "url", - "generated_timestamp", - "follower_ids", - "email_cc_ids", - "is_public", - "custom_status_id", - "followup_ids", - "ticket_form_id", - "allow_channelback", - "allow_attachments", - "from_messaging_channel", - "satisfaction_rating.assignee_id", - "satisfaction_rating.created_at", - "satisfaction_rating.group_id", - "satisfaction_rating.id", - "satisfaction_rating.requester_id", - "satisfaction_rating.score", - "satisfaction_rating.ticket_id", - "satisfaction_rating.updated_at", - "satisfaction_rating.url", - "via.channel", - "via.source.rel", - ] - - -class ZendeskTriggersTable(APITable): - """Zendesk Triggers Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the zendesk triggers API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Trigger Data - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser(query, "triggers", self.get_columns()) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - subset_where_conditions = [] - api_filters = {} - for op, arg1, arg2 in where_conditions: - if arg1 in self.get_columns(): - if op != "=": - raise NotImplementedError(f"Unknown op: {op}. Only '=' is supported.") - api_filters[arg1] = arg2 - subset_where_conditions.append([op, arg1, arg2]) - - result = self.handler.zen_client.triggers(**api_filters) - response = [] - if isinstance(result, zenpy.lib.generator.BaseResultGenerator): - for trigger in result: - response.append(trigger.to_dict()) - else: - response.append(result.to_dict()) - - df = pd.DataFrame(response, columns=self.get_columns()) - - select_statement_executor = SELECTQueryExecutor( - df, selected_columns, subset_where_conditions, order_by_conditions, result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "actions", - "active", - "description", - "id", - "position", - "title", - "url", - "updated_at", - "created_at", - "default", - "raw_title", - "category_id", - "conditions.all", - "conditions.any", - ] - - -class ZendeskActivitiesTable(APITable): - """Zendesk Activities Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the zendesk activities API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Activity list Data - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser(query, "activities", self.get_columns()) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - subset_where_conditions = [] - api_filters = {} - for op, arg1, arg2 in where_conditions: - if arg1 in self.get_columns(): - if op != "=": - raise NotImplementedError(f"Unknown op: {op}. Only '=' is supported.") - api_filters[arg1] = arg2 - subset_where_conditions.append([op, arg1, arg2]) - - result = self.handler.zen_client.activities(**api_filters) - response = [] - if isinstance(result, zenpy.lib.generator.BaseResultGenerator): - for activity in result: - response.append(activity.to_dict()) - else: - response.append(result.to_dict()) - - df = pd.DataFrame(response, columns=self.get_columns()) - - select_statement_executor = SELECTQueryExecutor( - df, selected_columns, subset_where_conditions, order_by_conditions, result_limit - ) - - df = select_statement_executor.execute_query() - - return df - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - - Returns - ------- - List[str] - List of columns - """ - - return [ - "created_at", - "id", - "title", - "updated_at", - "url", - "verb", - "user_id", - "actor_id", - "actor.id", - "actor.url", - "actor.name", - "actor.email", - "actor.created_at", - "actor.updated_at", - "actor.time_zone", - "actor.iana_time_zone", - "actor.phone", - "actor.shared_phone_number", - "actor.photo", - "actor.locale_id", - "actor.locale", - "actor.organization_id", - "actor.role", - "actor.verified", - "actor.external_id", - "actor.tags", - "actor.alias", - "actor.active", - "actor.shared", - "actor.shared_agent", - "actor.last_login_at", - "actor.two_factor_auth_enabled", - "actor.signature", - "actor.details", - "actor.notes", - "actor.role_type", - "actor.custom_role_id", - "actor.moderator", - "actor.ticket_restriction", - "actor.only_private_comments", - "actor.restricted_agent", - "actor.suspended", - "actor.default_group_id", - "actor.report_csv", - "user.active", - "user.alias", - "user.chat_only", - "user.created_at", - "user.custom_role_id", - "user.details", - "user.email", - "user.external_id", - "user.id", - "user.last_login_at", - "user.locale", - "user.locale_id", - "user.moderator", - "user.name", - "user.notes", - "user.only_private_comments", - "user.organization_id", - "user.phone", - "user.photo", - "user.restricted_agent", - "user.role", - "user.shared", - "user.shared_agent", - "user.signature", - "user.suspended", - "user.tags", - "user.ticket_restriction", - "user.time_zone", - "user.two_factor_auth_enabled", - "user.updated_at", - "user.url", - "user.verified", - "user.iana_time_zone", - "user.shared_phone_number", - "user.role_type", - "user.default_group_id", - "user.report_csv", - "target.active", - "target.content_type", - "target.created_at", - "target.id", - "target.method", - "target.password", - "target.target_url", - "target.title", - "target.type", - "target.url", - "target.username", - "target.ticket.assignee_id", - "target.ticket.brand_id", - "target.ticket.collaborator_ids", - "target.ticket.created_at", - "target.ticket.custom_fields", - "target.ticket.description", - "target.ticket.due_at", - "target.ticket.external_id", - "target.ticket.fields", - "target.ticket.forum_topic_id", - "target.ticket.group_id", - "target.ticket.has_incidents", - "target.ticket.id", - "target.ticket.organization_id", - "target.ticket.priority", - "target.ticket.problem_id", - "target.ticket.raw_subject", - "target.ticket.recipient", - "target.ticket.requester_id", - "target.ticket.satisfaction_rating", - "target.ticket.sharing_agreement_ids", - "target.ticket.status", - "target.ticket.subject", - "target.ticket.submitter_id", - "target.ticket.tags", - "target.ticket.type", - "target.ticket.updated_at", - "target.ticket.url", - "target.ticket.via", - ] diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/README.md b/mindsdb/integrations/handlers/zipcodebase_handler/README.md deleted file mode 100644 index e86858edc30..00000000000 --- a/mindsdb/integrations/handlers/zipcodebase_handler/README.md +++ /dev/null @@ -1,76 +0,0 @@ -# ZipCodeBase Handler - -ZipCodeBase handler for MindsDB provides interfaces to connect to ZipCodeBase via APIs and import zipcode data into MindsDB. - ---- - -## Table of Contents - -- [ZipCodeBase Handler](#zipcodebase-handler) - - [Table of Contents](#table-of-contents) - - [About ZipCodeBase](#about-zipcodebase) - - [ZipCodeBase Handler Implementation](#zipcodebase-handler-implementation) - - [ZipCodeBase Handler Initialization](#zipcodebase-handler-initialization) - - [Implemented Features](#implemented-features) - - [Example Usage](#example-usage) - ---- - -## About ZipCodeBase - -[Zipcodebase.com](https://zipcodebase.com/) is the perfect tool to perform postal code validation, lookups and other calculative tasks, such as postal code distance calculations. Zipcodebase offers a wide range of endpoints that give you access to any type of data you might need. - -## ZipCodeBase Handler Implementation - -This handler was implemented using the `requests` library that makes http calls to https://app.zipcodebase.com/documentation. - -## ZipCodeBase Handler Initialization - -The ZipCodeBase handler is initialized with the following parameters: - -- `api_key`: API Key used to authenticate with ZipCodeBase - -Read about creating an API Key [here](https://zipcodebase.com/). - -## Implemented Features - -- [x] ZipCodeBase - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - -## Example Usage - -The first step is to create a database with the new `zipcodebase` engine. - -~~~~sql -CREATE DATABASE mindsdb_zipcodebase -WITH ENGINE = 'zipcodebase', -PARAMETERS = { - "api_key": "" -}; -~~~~ - -Use the established connection to query your database: - -~~~~sql -SELECT * FROM mindsdb_zipcodebase.code_to_location where codes="10005"; -~~~~ - -~~~~sql -SELECT * FROM mindsdb_zipcodebase.codes_within_radius WHERE code="10005" AND radius="100" AND country="us"; -~~~~ - -~~~~sql -SELECT * FROM mindsdb_zipcodebase.codes_by_city WHERE city="Amsterdam" AND country="nl"; -~~~~ - -~~~~sql -SELECT * FROM mindsdb_zipcodebase.codes_by_state WHERE state="Noord-Holland" AND country="nl"; -~~~~ - -~~~~sql -SELECT * FROM mindsdb_zipcodebase.states_by_country WHERE country="de"; -~~~~ diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/__about__.py b/mindsdb/integrations/handlers/zipcodebase_handler/__about__.py deleted file mode 100644 index ad8a4bf1127..00000000000 --- a/mindsdb/integrations/handlers/zipcodebase_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB ZipCodeBase handler" -__package_name__ = "mindsdb_zipcodebase_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for ZipCodeBase" -__author__ = "Abhilash" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/__init__.py b/mindsdb/integrations/handlers/zipcodebase_handler/__init__.py deleted file mode 100644 index 4508e5ce038..00000000000 --- a/mindsdb/integrations/handlers/zipcodebase_handler/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .zipcodebase_handler import ZipCodeBaseHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "ZipCodeBase" -name = "zipcodebase" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", - "connection_args_example", - "connection_args", -] diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/connection_args.py b/mindsdb/integrations/handlers/zipcodebase_handler/connection_args.py deleted file mode 100644 index 672b4654b5b..00000000000 --- a/mindsdb/integrations/handlers/zipcodebase_handler/connection_args.py +++ /dev/null @@ -1,18 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - api_key={ - "type": ARG_TYPE.PWD, - "description": "ZipCodeBase api key to use for authentication.", - "required": True, - "label": "Api key", - "secret": True - } -) - -connection_args_example = OrderedDict( - api_key="" -) diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/icon.svg b/mindsdb/integrations/handlers/zipcodebase_handler/icon.svg deleted file mode 100644 index 45197f7418e..00000000000 --- a/mindsdb/integrations/handlers/zipcodebase_handler/icon.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/tests/__init__.py b/mindsdb/integrations/handlers/zipcodebase_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase.py b/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase.py deleted file mode 100644 index b1298e526e4..00000000000 --- a/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase.py +++ /dev/null @@ -1,67 +0,0 @@ -import requests - - -class ZipCodeBaseClient: - - def __init__(self, api_key): - self.api_key = api_key - self.base_endpoint = "https://app.zipcodebase.com/api/v1" - - def make_request(self, url, params=None): - headers = {'Content-type': 'application/json'} - if self.api_key: - headers['apikey'] = self.api_key - resp = requests.get(url, headers=headers, params=params) - content = {} - if resp.status_code == 200: - content = {'content': resp.json(), 'code': 200} - else: - content = {'content': {}, 'code': resp.status_code, 'error': resp.text} - return content - - def code_to_location(self, codes): - url = f'{self.base_endpoint}/search' - params = ( - ("codes", codes), - ) - return self.make_request(url, params) - - def codes_within_radius(self, code, radius, country, unit): - url = f'{self.base_endpoint}/radius' - params = ( - ("code", code), - ("radius", radius), - ("country", country), - ("unit", unit), - ) - return self.make_request(url, params) - - def codes_by_city(self, city, country, limit): - url = f'{self.base_endpoint}/code/city' - params = ( - ("city", city), - ("country", country), - ("limit", limit), - ) - return self.make_request(url, params) - - def codes_by_state(self, state, country, limit): - url = f'{self.base_endpoint}/code/state' - params = ( - ("state_name", state), - ("country", country), - ("limit", limit), - ) - return self.make_request(url, params) - - def states_by_country(self, country): - url = f'{self.base_endpoint}/country/province' - params = ( - ("country", country), - ) - return self.make_request(url, params) - - def remaining_requests(self): - url = f'{self.base_endpoint}/status' - params = () - return self.make_request(url, params) diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase_handler.py b/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase_handler.py deleted file mode 100644 index 1684ebca743..00000000000 --- a/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase_handler.py +++ /dev/null @@ -1,107 +0,0 @@ -from mindsdb.integrations.handlers.zipcodebase_handler.zipcodebase_tables import ( - ZipCodeBaseCodeLocationTable, - ZipCodeBaseCodeInRadiusTable, - ZipCodeBaseCodeByCityTable, - ZipCodeBaseCodeByStateTable, - ZipCodeBaseStatesByCountryTable -) -from mindsdb.integrations.handlers.zipcodebase_handler.zipcodebase import ZipCodeBaseClient -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) - -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql - - -logger = log.getLogger(__name__) - - -class ZipCodeBaseHandler(APIHandler): - """The ZipCodeBase handler implementation""" - - def __init__(self, name: str, **kwargs): - """Initialize the ZipCodeBase handler. - Parameters - ---------- - name : str - name of a handler instance - """ - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = connection_data - self.kwargs = kwargs - self.client = ZipCodeBaseClient(self.connection_data["api_key"]) - self.is_connected = False - - code_to_location_data = ZipCodeBaseCodeLocationTable(self) - self._register_table("code_to_location", code_to_location_data) - - codes_within_radius_data = ZipCodeBaseCodeInRadiusTable(self) - self._register_table("codes_within_radius", codes_within_radius_data) - - codes_by_city_data = ZipCodeBaseCodeByCityTable(self) - self._register_table("codes_by_city", codes_by_city_data) - - codes_by_state_data = ZipCodeBaseCodeByStateTable(self) - self._register_table("codes_by_state", codes_by_state_data) - - states_by_country_data = ZipCodeBaseStatesByCountryTable(self) - self._register_table("states_by_country", states_by_country_data) - - def connect(self) -> StatusResponse: - """Set up the connection required by the handler. - Returns - ------- - StatusResponse - connection object - """ - resp = StatusResponse(False) - status = self.client.remaining_requests() - if status["code"] != 200: - resp.success = False - resp.error_message = status["error"] - return resp - self.is_connected = True - return resp - - def check_connection(self) -> StatusResponse: - """Check connection to the handler. - Returns - ------- - StatusResponse - Status confirmation - """ - response = StatusResponse(False) - - try: - status = self.client.remaining_requests() - if status["code"] == 200: - logger.info("Authentication successful") - response.success = True - else: - response.success = False - logger.info("Error connecting to ZipCodeBase. " + status["error"]) - response.error_message = status["error"] - except Exception as e: - logger.error(f"Error connecting to ZipCodeBase: {e}!") - response.error_message = e - - self.is_connected = response.success - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - Parameters - ---------- - query : str - query in a native format - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase_tables.py b/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase_tables.py deleted file mode 100644 index 6e9edc8ee89..00000000000 --- a/mindsdb/integrations/handlers/zipcodebase_handler/zipcodebase_tables.py +++ /dev/null @@ -1,499 +0,0 @@ -import pandas as pd -from typing import List -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor -from mindsdb.utilities import log -from mindsdb_sql_parser import ast - -logger = log.getLogger(__name__) - - -class ZipCodeBaseCodeLocationTable(APITable): - """The ZipCodeBase Location Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://app.zipcodebase.com/documentation#search API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Location of the codes matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'code_to_location', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - - for op, arg1, arg2 in where_conditions: - if arg1 == "codes": - if op == '=': - search_params["codes"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for codes column.") - - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = "codes" in search_params - - if not filter_flag: - raise NotImplementedError("`codes` column has to be present in where clause.") - - code_to_location_df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.client.code_to_location(search_params.get("codes")) - - self.check_res(res=response) - - content = response["content"] - - code_to_location_df = pd.json_normalize(self.clean_resp(content["results"])) - - select_statement_executor = SELECTQueryExecutor( - code_to_location_df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - code_to_location_df = select_statement_executor.execute_query() - - return code_to_location_df - - def clean_resp(self, data): - clean_data = [] - for k, v in data.items(): - clean_data.extend(v) - return clean_data - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["error"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - Returns - ------- - List[str] - List of columns - """ - - return [ - "postal_code", - "country_code", - "latitude", - "longitude", - "city", - "state", - "city_en", - "state_en", - "state_code", - "province", - "province_code" - ] - - -class ZipCodeBaseCodeInRadiusTable(APITable): - """The ZipCodeBase Codes within Radius Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://app.zipcodebase.com/documentation#radius API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - codes within the radius - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'codes_within_radius', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - - for op, arg1, arg2 in where_conditions: - if arg1 == "code": - if op == '=': - search_params["code"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for code column.") - - if arg1 == "radius": - if op == '=': - search_params["radius"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for radius column.") - - if arg1 == "country": - if op == '=': - search_params["country"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for country column.") - - if arg1 == "unit": - if op == '=': - search_params["unit"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for unit column.") - - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("code" in search_params) and ("radius" in search_params) and ("country" in search_params) - - if not filter_flag: - raise NotImplementedError("`codes`, `radius` and `country` columns have to be present in where clause.") - - code_to_location_df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.client.codes_within_radius(search_params.get("code"), search_params.get("radius"), search_params.get("country"), search_params.get("unit", "km")) - - self.check_res(res=response) - - content = response["content"] - - logger.info(f"response size - {len(content['results'])}") - code_to_location_df = pd.json_normalize(content["results"]) - - select_statement_executor = SELECTQueryExecutor( - code_to_location_df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - code_to_location_df = select_statement_executor.execute_query() - - return code_to_location_df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["error"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - Returns - ------- - List[str] - List of columns - """ - - return [ - "code", - "city", - "state", - "city_en", - "state_en", - "distance" - ] - - -class ZipCodeBaseCodeByCityTable(APITable): - """The ZipCodeBase Codes within a City Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://app.zipcodebase.com/documentation#city API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - codes within the city - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'codes_by_city', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - - for op, arg1, arg2 in where_conditions: - if arg1 == "city": - if op == '=': - search_params["city"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for city column.") - - if arg1 == "country": - if op == '=': - search_params["country"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for country column.") - - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("city" in search_params) and ("country" in search_params) - - if not filter_flag: - raise NotImplementedError("`city` and `country` columns have to be present in where clause.") - - codes_by_city_df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.client.codes_by_city(search_params.get("city"), search_params.get("country")) - - self.check_res(res=response) - - content = response["content"] - - logger.info(f"response size - {len(content['results'])}") - codes_by_city_df = pd.json_normalize({"codes": content["results"]}) - - select_statement_executor = SELECTQueryExecutor( - codes_by_city_df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - codes_by_city_df = select_statement_executor.execute_query() - - return codes_by_city_df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["error"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - Returns - ------- - List[str] - List of columns - """ - - return [ - "codes" - ] - - -class ZipCodeBaseCodeByStateTable(APITable): - """The ZipCodeBase Codes within a State Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://app.zipcodebase.com/documentation#state API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - codes within the state - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'codes_by_state', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - - for op, arg1, arg2 in where_conditions: - if arg1 == "state": - if op == '=': - search_params["state"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for state column.") - - if arg1 == "country": - if op == '=': - search_params["country"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for country column.") - - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("state" in search_params) and ("country" in search_params) - - if not filter_flag: - raise NotImplementedError("`state` and `country` columns have to be present in where clause.") - - codes_by_state_df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.client.codes_by_state(search_params.get("state"), search_params.get("country")) - - self.check_res(res=response) - - content = response["content"] - - logger.info(f"response size - {len(content['results'])}") - codes_by_state_df = pd.json_normalize({"codes": content["results"]}) - - select_statement_executor = SELECTQueryExecutor( - codes_by_state_df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - codes_by_state_df = select_statement_executor.execute_query() - - return codes_by_state_df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["error"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - Returns - ------- - List[str] - List of columns - """ - - return [ - "codes" - ] - - -class ZipCodeBaseStatesByCountryTable(APITable): - """The ZipCodeBase Provinces/states within a country Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls data from the https://app.zipcodebase.com/documentation#provinces API - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - states within a country - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'states_by_country', - self.get_columns() - ) - - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - subset_where_conditions = [] - - for op, arg1, arg2 in where_conditions: - - if arg1 == "country": - if op == '=': - search_params["country"] = arg2 - else: - raise NotImplementedError("Only '=' operator is supported for country column.") - - elif arg1 in self.get_columns(): - subset_where_conditions.append([op, arg1, arg2]) - - filter_flag = ("country" in search_params) - - if not filter_flag: - raise NotImplementedError("`country` column has to be present in where clause.") - - states_by_country_df = pd.DataFrame(columns=self.get_columns()) - - response = self.handler.client.states_by_country(search_params.get("country")) - - self.check_res(res=response) - - content = response["content"] - - logger.info(f"response size - {len(content['results'])}") - states_by_country_df = pd.json_normalize({"states": content["results"]}) - - select_statement_executor = SELECTQueryExecutor( - states_by_country_df, - selected_columns, - subset_where_conditions, - order_by_conditions, - result_limit - ) - - states_by_country_df = select_statement_executor.execute_query() - - return states_by_country_df - - def check_res(self, res): - if res["code"] != 200: - raise Exception("Error fetching results - " + res["error"]) - - def get_columns(self) -> List[str]: - """Gets all columns to be returned in pandas DataFrame responses - Returns - ------- - List[str] - List of columns - """ - - return [ - "states" - ] diff --git a/mindsdb/integrations/handlers/zotero_handler/README.md b/mindsdb/integrations/handlers/zotero_handler/README.md deleted file mode 100644 index c49534976f1..00000000000 --- a/mindsdb/integrations/handlers/zotero_handler/README.md +++ /dev/null @@ -1,57 +0,0 @@ -# Build your own Zotero AI agent - -This is the implementation of the Zotero handler for MindsDB. - -## Zotero -[Zotero](https://www.zotero.org/) is a free tool for organizing and managing research materials. It lets you store articles, books, and other resources in one place, annotate them, take notes, and create collections for organization. You can also share your collections with others. - -## Implementation -This handler uses [pyzotero](https://pyzotero.readthedocs.io/en/latest/) , an API wrapper for Zotero as a simple library to facilitate the integration. - -The required arguments to establish a connection are, -- `library_id` : -To find your library ID, as noted on [pyzotero](https://pyzotero.readthedocs.io/en/latest/) : - - Your personal library ID is available [here](https://www.zotero.org/settings/keys), in the section "Your userID for use in API calls" - you must be logged in for the link to work. - - For group libraries, the ID can be found by opening the group’s page: https://www.zotero.org/groups/groupname, and hovering over the group settings link. The ID is the integer after /groups/ -- `library_type` : "user" for personal account or "group" for a group library -- `api_key` : -To find your api key, as noted on [pyzotero](https://pyzotero.readthedocs.io/en/latest/) : follow [this link](https://www.zotero.org/settings/keys/new) - -## Usage -In order to make use of this handler and connect to Zotero in MindsDB, the following syntax can be used, - -```sql -CREATE DATABASE mindsdb_zotero -WITH ENGINE = 'zotero', -PARAMETERS = { - "library_id": "", - "library_type": "user", - "api_key": "" } -``` - -## Implemented Features - -Now, you can use this established connection to query your table as follows: - -Important note: Most things (articles, books, annotations, notes etc.) are refered to as **items** in the pyzotero API. So each item has an item_type (such as "annotation") and an item_id. Some items are children to other items (for example an annotation can be a child-item on the article where it was made on which is the parent-item) - -### Annotations -Annotations are the highlighted text of research materials. So, the most important information is in that annotated text content. To get that content and metadata about the annotations we can use the following queries. - -To select all annotations of your library: - -```sql -SELECT * FROM mindsdb_zotero.annotations -``` - -To select all data for one annotation by its id: -```sql -SELECT * FROM mindsdb_zotero.annotations WHERE item_id = "" -``` - -To select all annotations of a parent item (like all annotations on a book): -```sql -SELECT * FROM mindsdb_zotero.annotations WHERE parent_item_id = "" -``` - - diff --git a/mindsdb/integrations/handlers/zotero_handler/__about__.py b/mindsdb/integrations/handlers/zotero_handler/__about__.py deleted file mode 100644 index 3f9f15648e9..00000000000 --- a/mindsdb/integrations/handlers/zotero_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Zotero handler' -__package_name__ = 'mindsdb_zotero_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Zotero" -__author__ = 'Elina Kapetanaki' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2024 - mindsdb' diff --git a/mindsdb/integrations/handlers/zotero_handler/__init__.py b/mindsdb/integrations/handlers/zotero_handler/__init__.py deleted file mode 100644 index ace936d806f..00000000000 --- a/mindsdb/integrations/handlers/zotero_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .zotero_handler import ( - ZoteroHandler as Handler - ) - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Zotero' -name = 'zotero' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/zotero_handler/icon.svg b/mindsdb/integrations/handlers/zotero_handler/icon.svg deleted file mode 100644 index 8a11ff428fa..00000000000 --- a/mindsdb/integrations/handlers/zotero_handler/icon.svg +++ /dev/null @@ -1,102 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mindsdb/integrations/handlers/zotero_handler/requirements.txt b/mindsdb/integrations/handlers/zotero_handler/requirements.txt deleted file mode 100644 index 3f3fa7b9ec9..00000000000 --- a/mindsdb/integrations/handlers/zotero_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pyzotero diff --git a/mindsdb/integrations/handlers/zotero_handler/zotero_handler.py b/mindsdb/integrations/handlers/zotero_handler/zotero_handler.py deleted file mode 100644 index c85f3df2dd9..00000000000 --- a/mindsdb/integrations/handlers/zotero_handler/zotero_handler.py +++ /dev/null @@ -1,109 +0,0 @@ -import os -from pyzotero import zotero -from mindsdb.utilities import log -from mindsdb.utilities.config import Config -from mindsdb.integrations.libs.api_handler import APIHandler, FuncParser - -from mindsdb.integrations.handlers.zotero_handler.zotero_tables import AnnotationsTable - -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE -) - -logger = log.getLogger(__name__) - - -class ZoteroHandler(APIHandler): - """Handles communication with the Zotero API.""" - - def __init__(self, name=None, **kwargs): - """Initialize the Zotero handler. - - Parameters - ---------- - name : str - Name of the handler instance. - - Other Parameters - ---------------- - connection_data : dict - Dictionary containing connection data such as 'library_id', 'library_type', and 'api_key'. - If not provided, will attempt to fetch from environment variables or configuration file. - """ - super().__init__(name) - self.connection_args = self._get_connection_args(kwargs.get('connection_data', {})) - self.is_connected = False - self.api = None - self._register_table('annotations', AnnotationsTable(self)) - - def _get_connection_args(self, args): - """Fetch connection arguments from parameters, environment variables, or configuration. - - Parameters - ---------- - args - Dictionary containing connection data. - - Returns - ------- - connection_args - Connection data list - """ - handler_config = Config().get('zotero_handler', {}) - connection_args = {} - for k in ['library_id', 'library_type', 'api_key']: - connection_args[k] = args.get(k) or os.getenv(f'ZOTERO_{k.upper()}') or handler_config.get(k) - return connection_args - - def connect(self) -> StatusResponse: - """Connect to the Zotero API. - - Returns - ------- - StatusResponse - Status of the connection attempt. - """ - if not self.is_connected: - self.api = zotero.Zotero( - self.connection_args['library_id'], - self.connection_args['library_type'], - self.connection_args['api_key'] - ) - self.is_connected = True - return StatusResponse(True) - - def check_connection(self) -> StatusResponse: - """Check the connection status to the Zotero API. - - Returns - ------- - StatusResponse - Status of the connection. - """ - try: - self.connect() - return StatusResponse(True) - except Exception as e: - error_message = f'Error connecting to Zotero API: {str(e)}. Check credentials.' - logger.error(error_message) - self.is_connected = False - return StatusResponse(False, error_message=error_message) - - def native_query(self, query_string: str = None): - """Execute a native query against the Zotero API. - - Parameters - ---------- - query_string : str - The query string to execute, formatted as required by the Zotero API. - - Returns - ------- - Response - Response object containing the result of the query. - """ - method_name, params = FuncParser().from_string(query_string) - df = self._call_find_annotations_zotero_api(method_name, params) - return Response(RESPONSE_TYPE.TABLE, data_frame=df) diff --git a/mindsdb/integrations/handlers/zotero_handler/zotero_tables.py b/mindsdb/integrations/handlers/zotero_handler/zotero_tables.py deleted file mode 100644 index 3c6b5b5f6a0..00000000000 --- a/mindsdb/integrations/handlers/zotero_handler/zotero_tables.py +++ /dev/null @@ -1,157 +0,0 @@ -import pandas as pd -from mindsdb.utilities import log -from mindsdb_sql_parser import ast -from mindsdb.integrations.libs.api_handler import APITable -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions - -logger = log.getLogger(__name__) - - -class AnnotationsTable(APITable): - """Represents a table of annotations in Zotero.""" - - def select(self, query: ast.Select): - """Select annotations based on the provided query. - - Parameters - ---------- - query : ast.Select - AST (Abstract Syntax Tree) representation of the SQL query. - - Returns - ------- - Response - Response object containing the selected annotations as a DataFrame. - """ - if query.where is None: # Handle case for SELECT * FROM annotations - df = self._get_items() - return df[self.get_columns()] - - conditions = extract_comparison_conditions(query.where) - supported = False # Flag to check if the query is supported - - for op, arg1, arg2 in conditions: - if op in {'or', 'and'}: - raise NotImplementedError('OR and AND are not supported') - if arg1 == 'item_id' and op == '=': - df = self._get_item(arg2) - supported = True - elif arg1 == 'parent_item_id' and op == '=': - df = self._get_item_children(arg2) - supported = True - - if not supported: - raise NotImplementedError('Only "item_id=" and "parent_item_id=" conditions are implemented') - - return df[self.get_columns()] - - def get_columns(self): - """Get the columns of the annotations table. - - Returns - ------- - list - List of column names. - """ - return [ - 'annotationColor', - 'annotationComment', - 'annotationPageLabel', - 'annotationText', - 'annotationType', - 'dateAdded', - 'dateModified', - 'key', - 'parentItem', - 'relations', - 'tags', - 'version' - ] - - def _get_items(self) -> pd.DataFrame: - """Get all annotations from the Zotero API. - - Returns - ------- - pd.DataFrame - DataFrame containing all annotations. - """ - if not self.handler.is_connected: - self.handler.connect() - - try: - method = getattr(self.handler.api, 'items') - result = method(itemType='annotation') - - if isinstance(result, dict): - return pd.DataFrame([result.get('data', {})]) - if isinstance(result, list) and all(isinstance(item, dict) for item in result): - data_list = [item.get('data', {}) for item in result] - return pd.DataFrame(data_list) - - except Exception as e: - logger.error(f"Error fetching items: {e}") - raise e - - return pd.DataFrame() - - def _get_item(self, item_id: str) -> pd.DataFrame: - """Get a single annotation by item ID. - - Parameters - ---------- - item_id : str - The ID of the item to fetch. - - Returns - ------- - pd.DataFrame - DataFrame containing the annotation. - """ - if not self.handler.is_connected: - self.handler.connect() - - try: - method = getattr(self.handler.api, 'item') - result = method(item_id, itemType='annotation') - - if isinstance(result, dict): - return pd.DataFrame([result.get('data', {})]) - - except Exception as e: - logger.error(f"Error fetching item with ID {item_id}: {e}") - raise e - - return pd.DataFrame() - - def _get_item_children(self, parent_item_id: str) -> pd.DataFrame: - """Get annotations for a specific parent item ID. - - Parameters - ---------- - parent_item_id : str - The parent item ID to fetch annotations for. - - Returns - ------- - pd.DataFrame - DataFrame containing the annotations. - """ - if not self.handler.is_connected: - self.handler.connect() - - try: - method = getattr(self.handler.api, 'children') - result = method(parent_item_id, itemType='annotation') - - if isinstance(result, dict): - return pd.DataFrame([result.get('data', {})]) - if isinstance(result, list) and all(isinstance(item, dict) for item in result): - data_list = [item.get('data', {}) for item in result] - return pd.DataFrame(data_list) - - except Exception as e: - logger.error(f"Error fetching children for parent item ID {parent_item_id}: {e}") - raise e - - return pd.DataFrame() diff --git a/mindsdb/integrations/libs/base.py b/mindsdb/integrations/libs/base.py index 9f7dbe618ff..2757b7ba594 100644 --- a/mindsdb/integrations/libs/base.py +++ b/mindsdb/integrations/libs/base.py @@ -1,15 +1,23 @@ import ast import concurrent.futures +import functools import inspect import textwrap from _ast import AnnAssign, AugAssign -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, get_type_hints, get_args, Union, get_origin import pandas as pd from mindsdb_sql_parser.ast.base import ASTNode from mindsdb.utilities import log -from mindsdb.integrations.libs.response import HandlerResponse, HandlerStatusResponse, RESPONSE_TYPE +from mindsdb.integrations.libs.response import ( + HandlerStatusResponse, + RESPONSE_TYPE, + DataHandlerResponse, + normalize_response, + ErrorResponse, + TableResponse, +) logger = log.getLogger(__name__) @@ -21,6 +29,59 @@ class BaseHandler: broader MindsDB ecosystem via SQL commands. """ + stream_response = False + + def __init_subclass__(cls, **kwargs): + """Automatically wrap handler methods to normalize their responses. + + When a subclass is defined, this method checks if any of the methods + in _methods_to_normalize are overridden and wraps them to convert + legacy HandlerResponse to new response types (TableResponse, OkResponse, + ErrorResponse). + """ + super().__init_subclass__(**kwargs) + + # Methods whose return values should be normalized to new response types + _methods_to_normalize = ( + "native_query", + "query", + "insert", + "get_tables", + "get_columns", + "meta_get_tables", + "meta_get_columns", + "meta_get_column_statistics", + "meta_get_column_statistics_for_table", + "meta_get_primary_keys", + "meta_get_foreign_keys", + ) + for method_name in _methods_to_normalize: + # Only wrap if method is defined directly in this class (not inherited) + if method_name not in cls.__dict__: + continue + + original_method = cls.__dict__[method_name] + + return_type = get_type_hints(original_method).get("return") + if return_type is DataHandlerResponse or ( + get_origin(return_type) is Union and issubclass(get_args(return_type)[0], DataHandlerResponse) + ): + # this is already new style response + continue + + # Skip if already wrapped + if getattr(original_method, "_response_normalized", False): + continue + + # Create wrapper that normalizes response + @functools.wraps(original_method) + def wrapper(self, *args, _orig=original_method, **kwargs): + result = _orig(self, *args, **kwargs) + return normalize_response(result) + + wrapper._response_normalized = True + setattr(cls, method_name, wrapper) + def __init__(self, name: str): """constructor Args: @@ -53,19 +114,19 @@ def check_connection(self) -> HandlerStatusResponse: """ raise NotImplementedError() - def native_query(self, query: Any) -> HandlerResponse: + def native_query(self, query: Any, stream: bool = False, **kwargs) -> DataHandlerResponse: """Receive raw query and act upon it somehow. Args: - query (Any): query in native format (str for sql databases, - etc) - + query (Any): query in native format (str for sql databases, etc) + stream (bool): Whether to stream the results of the query + **kwargs: Additional keyword arguments. Returns: - HandlerResponse + DataHandlerResponse """ raise NotImplementedError() - def query(self, query: ASTNode) -> HandlerResponse: + def query(self, query: ASTNode) -> DataHandlerResponse: """Receive query as AST (abstract syntax tree) and act upon it somehow. Args: @@ -73,30 +134,30 @@ def query(self, query: ASTNode) -> HandlerResponse: of query: SELECT, INSERT, DELETE, etc Returns: - HandlerResponse + DataHandlerResponse """ raise NotImplementedError() - def get_tables(self) -> HandlerResponse: + def get_tables(self) -> DataHandlerResponse: """Return list of entities Return list of entities that will be accesible as tables. Returns: - HandlerResponse: shoud have same columns as information_schema.tables + DataHandlerResponse: shoud have same columns as information_schema.tables (https://dev.mysql.com/doc/refman/8.0/en/information-schema-tables-table.html) Column 'TABLE_NAME' is mandatory, other is optional. """ raise NotImplementedError() - def get_columns(self, table_name: str) -> HandlerResponse: + def get_columns(self, table_name: str) -> DataHandlerResponse: """Returns a list of entity columns Args: table_name (str): name of one of tables returned by self.get_tables() Returns: - HandlerResponse: shoud have same columns as information_schema.columns + DataHandlerResponse: shoud have same columns as information_schema.columns (https://dev.mysql.com/doc/refman/8.0/en/information-schema-columns-table.html) Column 'COLUMN_NAME' is mandatory, other is optional. Hightly recomended to define also 'DATA_TYPE': it should be one of @@ -125,12 +186,12 @@ class MetaDatabaseHandler(DatabaseHandler): def __init__(self, name: str): super().__init__(name) - def meta_get_tables(self, table_names: Optional[List[str]]) -> HandlerResponse: + def meta_get_tables(self, table_names: Optional[List[str]]) -> DataHandlerResponse: """ Returns metadata information about the tables to be stored in the data catalog. Returns: - HandlerResponse: The response should consist of the following columns: + DataHandlerResponse: The response should consist of the following columns: - TABLE_NAME (str): Name of the table. - TABLE_TYPE (str): Type of the table, e.g. 'BASE TABLE', 'VIEW', etc. (optional). - TABLE_SCHEMA (str): Schema of the table (optional). @@ -139,12 +200,12 @@ def meta_get_tables(self, table_names: Optional[List[str]]) -> HandlerResponse: """ raise NotImplementedError() - def meta_get_columns(self, table_names: Optional[List[str]]) -> HandlerResponse: + def meta_get_columns(self, table_names: Optional[List[str]]) -> DataHandlerResponse: """ Returns metadata information about the columns in the tables to be stored in the data catalog. Returns: - HandlerResponse: The response should consist of the following columns: + DataHandlerResponse: The response should consist of the following columns: - TABLE_NAME (str): Name of the table. - COLUMN_NAME (str): Name of the column. - DATA_TYPE (str): Data type of the column, e.g. 'VARCHAR', 'INT', etc. @@ -154,13 +215,13 @@ def meta_get_columns(self, table_names: Optional[List[str]]) -> HandlerResponse: """ raise NotImplementedError() - def meta_get_column_statistics(self, table_names: Optional[List[str]]) -> HandlerResponse: + def meta_get_column_statistics(self, table_names: Optional[List[str]]) -> DataHandlerResponse: """ Returns metadata statisical information about the columns in the tables to be stored in the data catalog. Either this method should be overridden in the handler or `meta_get_column_statistics_for_table` should be implemented. Returns: - HandlerResponse: The response should consist of the following columns: + DataHandlerResponse: The response should consist of the following columns: - TABLE_NAME (str): Name of the table. - COLUMN_NAME (str): Name of the column. - MOST_COMMON_VALUES (List[str]): Most common values in the column (optional). @@ -207,17 +268,14 @@ def meta_get_column_statistics(self, table_names: Optional[List[str]]) -> Handle if not results: logger.warning("No column statistics could be retrieved for the specified tables.") - return HandlerResponse(RESPONSE_TYPE.ERROR, error_message="No column statistics could be retrieved.") - return HandlerResponse( - RESPONSE_TYPE.TABLE, pd.concat(results, ignore_index=True) if results else pd.DataFrame() - ) - + return ErrorResponse(error_message="No column statistics could be retrieved.") + return TableResponse(data=pd.concat(results, ignore_index=True) if results else pd.DataFrame()) else: raise NotImplementedError() def meta_get_column_statistics_for_table( self, table_name: str, column_names: Optional[List[str]] = None - ) -> HandlerResponse: + ) -> DataHandlerResponse: """ Returns metadata statistical information about the columns in a specific table to be stored in the data catalog. Either this method should be implemented in the handler or `meta_get_column_statistics` should be overridden. @@ -227,7 +285,7 @@ def meta_get_column_statistics_for_table( column_names (Optional[List[str]]): List of column names to retrieve statistics for. If None, statistics for all columns will be returned. Returns: - HandlerResponse: The response should consist of the following columns: + DataHandlerResponse: The response should consist of the following columns: - TABLE_NAME (str): Name of the table. - COLUMN_NAME (str): Name of the column. - MOST_COMMON_VALUES (List[str]): Most common values in the column (optional). @@ -239,12 +297,12 @@ def meta_get_column_statistics_for_table( """ pass - def meta_get_primary_keys(self, table_names: Optional[List[str]]) -> HandlerResponse: + def meta_get_primary_keys(self, table_names: Optional[List[str]]) -> DataHandlerResponse: """ Returns metadata information about the primary keys in the tables to be stored in the data catalog. Returns: - HandlerResponse: The response should consist of the following columns: + DataHandlerResponse: The response should consist of the following columns: - TABLE_NAME (str): Name of the table. - COLUMN_NAME (str): Name of the column that is part of the primary key. - ORDINAL_POSITION (int): Position of the column in the primary key (optional). @@ -252,12 +310,12 @@ def meta_get_primary_keys(self, table_names: Optional[List[str]]) -> HandlerResp """ raise NotImplementedError() - def meta_get_foreign_keys(self, table_names: Optional[List[str]]) -> HandlerResponse: + def meta_get_foreign_keys(self, table_names: Optional[List[str]]) -> DataHandlerResponse: """ Returns metadata information about the foreign keys in the tables to be stored in the data catalog. Returns: - HandlerResponse: The response should consist of the following columns: + DataHandlerResponse: The response should consist of the following columns: - PARENT_TABLE_NAME (str): Name of the parent table. - PARENT_COLUMN_NAME (str): Name of the parent column that is part of the foreign key. - CHILD_TABLE_NAME (str): Name of the child table. diff --git a/mindsdb/integrations/libs/const.py b/mindsdb/integrations/libs/const.py index 0e5ccc23c32..01749c4ce0a 100644 --- a/mindsdb/integrations/libs/const.py +++ b/mindsdb/integrations/libs/const.py @@ -16,6 +16,7 @@ class HANDLER_CONNECTION_ARG_TYPE: PATH = "path" DICT = "dict" PWD = "pwd" + LIST = "list" HANDLER_CONNECTION_ARG_TYPE = HANDLER_CONNECTION_ARG_TYPE() diff --git a/mindsdb/integrations/libs/keyword_search_base.py b/mindsdb/integrations/libs/keyword_search_base.py index 6a1cfdd9b80..d515764ba2a 100644 --- a/mindsdb/integrations/libs/keyword_search_base.py +++ b/mindsdb/integrations/libs/keyword_search_base.py @@ -36,6 +36,6 @@ def keyword_select( conditions (List[FilterCondition]): conditions to select Returns: - HandlerResponse + pd.DataFrame """ raise NotImplementedError() diff --git a/mindsdb/integrations/libs/llm/utils.py b/mindsdb/integrations/libs/llm/utils.py index dcf80dd425a..da01454142e 100644 --- a/mindsdb/integrations/libs/llm/utils.py +++ b/mindsdb/integrations/libs/llm/utils.py @@ -1,8 +1,5 @@ import re -import json -import itertools -from enum import Enum -from typing import Optional, Dict, List, Tuple +from typing import Dict, List, Tuple import numpy as np import pandas as pd @@ -20,23 +17,6 @@ BedrockConfig, ) from mindsdb.utilities.config import config -from mindsdb.integrations.utilities.rag.splitters.custom_splitters import RecursiveCharacterTextSplitter - - -class Language(Enum): - PYTHON = "python" - JAVASCRIPT = "javascript" - TYPESCRIPT = "typescript" - JAVA = "java" - CPP = "cpp" - C = "c" - GO = "go" - RUST = "rust" - RUBY = "ruby" - PHP = "php" - SWIFT = "swift" - KOTLIN = "kotlin" - SCALA = "scala" # Default to latest GPT-4 model (https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) @@ -256,357 +236,3 @@ def get_llm_config(provider: str, args: Dict) -> BaseLLMConfig: ) raise ValueError(f"Provider {provider} is not supported.") - - -def ft_jsonl_validation( - items: list, # read from a JSONL file - messages_col: str = "messages", - # valid keys for each chat message - role_key: str = "role", - content_key: str = "content", - name_key: str = "name", - # valid roles for each chat message - system_key: str = "system", - user_key: str = "user", - assistant_key: str = "assistant", -): - """ - This helper checks a list of dictionaries for compliance with the format usually expected by LLM providers - (such as OpenAI or AnyscaleEndpoints) for fine-tuning LLMs that generate chat completions. - - Defaults are set according to the expected format, but these can be changed if needed by any given provider. - - :param items: list of JSON lines, each dictionary containing a chat sequence. Should be read from a JSONL file. - :param messages_col: key in each dictionary to access a sequence of chat messages - - - For chat-level checks, this method defers to `ft_chat_format_validation()` below. Relevant parameters for it are: - - For each chat: - :param role_key: key that defines the role of each message (e.g. system, user, or LLM) - :param content_key: key that defines the content of each message - :param name_key: key that defines the name of each message - - For each message: - :param system_key: valid role for each chat message - :param user_key: valid role for each chat message - :param assistant_key: valid role for each chat message - - :return: None, raises an Exception if validation fails. - """ # noqa - try: - if not all([isinstance(m, dict) for m in items]): - raise Exception("Each line in the provided data should be a dictionary") - - for line_num, batch in enumerate(items): - prefix = f"error in chat #{line_num + 1}, " - - if not isinstance(batch[messages_col], list): - raise Exception( - f"{prefix}Each line in the provided data should have a '{messages_col}' key with a list of messages" - ) # noqa - - if messages_col not in batch: - raise Exception(f"{prefix}Each line in the provided data should have a '{messages_col}' key") - - messages = batch[messages_col] - try: - ft_chat_format_validation( - messages, - role_key=role_key, - content_key=content_key, - name_key=name_key, - system_key=system_key, - user_key=user_key, - assistant_key=assistant_key, - ) - except Exception as e: - raise Exception(f"{prefix}{e}") from e - - except Exception as e: - raise Exception(f"Fine-tuning data format is not valid. Got {e}") from e - - -def ft_chat_format_validation( - chat: list, - transitions: Optional[Dict] = None, - system_key: str = "system", - user_key: str = "user", - assistant_key: str = "assistant", - role_key: str = "role", - content_key: str = "content", - name_key: str = "name", -): - """ - Finite state machine to check a chat has valid format to finetune an LLM with it. - Follows OpenAI ChatCompletion format (also used by other providers such as AnyscaleEndpoints). - Reference: https://cookbook.openai.com/examples/chat_finetuning_data_prep - - The unit test in `test_llm_utils.py` for examples of valid and invalid chats. - - :param chat: list of dictionaries, each containing a chat message - :param transitions: optional dictionary defining valid transitions between chat messages (e.g. from system to user to assistant) - - For each chat: - :param role_key: key that defines the role of each message (e.g. system, user, or LLM) - :param content_key: key that defines the content of each message - :param name_key: key that defines the name of each message - - For each message: - :param system_key: valid role for each chat message - :param user_key: valid role for each chat message - :param assistant_key: valid role for each chat message - - :return: None if chat is valid, otherwise raise an informative Exception. - """ # noqa - - valid_keys = (role_key, content_key, name_key) - valid_roles = (system_key, user_key, assistant_key) - - for c in chat: - if any(k not in valid_keys for k in c.keys()): - raise Exception(f"Each message should only have these keys: `{valid_keys}`. Found: `{c.keys()}`") - - roles = [m[role_key] for m in chat] - contents = [m[content_key] for m in chat] - - if len(roles) != len(contents): - raise Exception(f"Each message should contain both `{role_key}` and `{content_key}` fields") - - if len(roles) == 0: - raise Exception("Chat should have at least one message") - - if assistant_key not in roles: - raise Exception("Chat should have at least one assistant message") # otherwise it is useless for FT - - if user_key not in roles: - raise Exception("Chat should have at least one user message") # perhaps remove in the future - - # set default transitions for finite state machine if undefined - if transitions is None: - transitions = { - None: [system_key, user_key], - system_key: [user_key], - user_key: [assistant_key], - assistant_key: [user_key], - } - - # check order is valid via finite state machine - state = None - for i, (role, content) in enumerate(zip(roles, contents)): - prefix = f"message #{i + 1}: " - - # check invalid roles - if role not in valid_roles: - raise Exception(f"{prefix}Invalid role (found `{role}`, expected one of `{valid_roles}`)") - - # check content - if not isinstance(content, str): - raise Exception(f"{prefix}Content should be a string, got type `{type(content)}`") - - # check transition - if role not in transitions[state]: - raise Exception(f"{prefix}Invalid transition from `{state}` to `{role}`") - else: - state = role - - -def ft_formatter(df: pd.DataFrame) -> List[Dict]: - """ - Data preparation entry point for chat LLM finetuning. This method will dispatch to the appropriate formatters. - - Supported formats: - - code: long tabular format with a `code` column - - chat: long tabular format with `role` and `content` columns, or a JSON format with a `chat_json` column. - """ - if "code" in df.columns: - df = ft_code_formatter(df) - - elif {"question", "context", "answer"}.issubset(set(df.columns)): - # TODO: handler user-specified names for these columns - df = ft_cqa_formatter(df) - - return ft_chat_formatter(df) - - -def ft_chat_formatter(df: pd.DataFrame) -> List[Dict]: - """ - For more details, check `FineTuning -> Data Format` in the Anyscale API reference, or the OpenAI equivalent. - Additionally, the unit test in `test_llm_utils.py` provides example usage. - - :param df: input dataframe has chats in one of the following formats: - 1) long tabular: at least two columns, `role` and `content`. Rows contain >= 1 chats in long (stacked) format. - - 2) JSON: at least one column, `chat_json`. Each row contains exactly 1 chat in JSON format. - Example for `chat_json` content: - > `{"messages": [{"role": "user", "content": "Hello!"}, {"role": "assistant", "content": "Hi!"}]}` - - Optional df columns are: - - chat_id: unique identifier for each chat - - message_id: unique identifier for each message within each chat - - Data will be sorted by both if they are provided. - - If only `chat_id` is provided, data will be sorted by it with a stable sort, so messages for each chat - will be in the same order as in the original data. - - If only `message_id` is provided, it must not contain duplicate IDs. Entire dataset will be treated - as a single chat. Otherwise an exception will be raised. - - :return: list of chats. Each chat is a dictionary with a top level key 'messages' containing a list of messages - that comply with the OpenAI's ChatEndpoint expected format (i.e., each is a dictionary with a `role` and - `content` key. - - """ # noqa - # 1. pre-sort df on optional columns - if "chat_id" in df.columns: - if "message_id" in df.columns: - df = df.sort_values(["chat_id", "message_id"]) - else: - df = df.sort_values(["chat_id"], kind="stable") - elif "message_id" in df.columns: - if df["message_id"].duplicated().any(): - raise Exception("If `message_id` is provided, it must not contain duplicate IDs.") - df = df.sort_values(["message_id"]) - - # 2. build chats - chats = [] - - # 2a. chats are in JSON format - if "chat_json" in df.columns: - for _, row in df.iterrows(): - try: - chat = json.loads(row["chat_json"]) - assert list(chat.keys()) == ["messages"], "Each chat should have a 'messages' key, and nothing else." - ft_chat_format_validation(chat["messages"]) # will raise Exception if chat is invalid - chats.append(chat) - except json.JSONDecodeError: - pass # TODO: add logger info here, prompt user to clean dataset carefully - - # 2b. chats are in tabular format - aggregate each chat sequence into one row - else: - chat = [] - for i, row in df.iterrows(): - if row["role"] == "system" and len(chat) > 0: - ft_chat_format_validation(chat) # will raise Exception if chat is invalid - chats.append({"messages": chat}) - chat = [] - event = {"role": row["role"], "content": row["content"]} - chat.append(event) - - ft_chat_format_validation(chat) # will raise Exception if chat is invalid - chats.append({"messages": chat}) - - return chats - - -def ft_code_formatter( - df: pd.DataFrame, - format="chat", - language="python", - chunk_size=100, - chunk_overlap=0, - chat_sections=("Code prefix", "Code suffix", "Completion"), - fim_tokens=("
", "", ""),
-) -> pd.DataFrame:
-    """
-    This utility processes a raw codebase stored as a dataframe with a `code` column, where
-    every row may be an entire file or some portion of it.
-    It chunks code into triples made of a prefix, middle, and suffix.
-
-    Depending on the target LLM, these triples are then formatted into a chat-like prompt, or a
-    fill-in-the-middle (FIM) prompt. The latter is used for fine-tuning models like codellama,
-    while the former is more generic and should work with any LLM that supports the ChatCompletion
-    format, as the rest of our tools do.
-    """
-
-    # input and setup validation
-    assert len(df) > 0, "Input dataframe should not be empty"
-    assert "code" in df.columns, "Input dataframe should have a 'code' column"
-    assert chunk_size > 0 and isinstance(chunk_size, int), "`chunk_size` should be a positive integer"
-
-    supported_formats = ["chat", "fim"]
-    supported_langs = [e.value for e in Language]
-    assert language.lower() in supported_langs, f"Invalid language. Valid choices are: {supported_langs}"
-
-    # ensure correct encoding
-    df["code"] = df["code"].map(lambda x: x.encode("utf8").decode("unicode_escape"))
-
-    # set prompt templates
-    system_prompt = "You are a powerful text to code model. Your job is to provide great code completions. As context, you are given code that is found immediately before and after the code you must generate.\n\nYou must output the code that should go in between the prefix and suffix.\n\n"
-    if format == "chat":
-        templates = [f"### {c}:" for c in chat_sections]
-    elif format == "fim":
-        templates = fim_tokens
-    else:
-        raise Exception(f"Invalid format. Please choose one of {supported_formats}")
-
-    # split code into chunks
-    # Get language enum value (handle both string and enum)
-    lang_enum = getattr(Language, language.upper(), language)
-    code_splitter = RecursiveCharacterTextSplitter.from_language(
-        language=lang_enum,
-        chunk_size=3 * chunk_size,  # each triplet element has `chunk_size`
-        chunk_overlap=chunk_overlap,  # some overlap here is fine
-    )
-    chunk_docs = code_splitter.create_documents(list(df["code"]))
-    chunks = [c.page_content for c in chunk_docs]
-
-    # split each chunk into a triplet, with no overlap
-    triplet_splitter = RecursiveCharacterTextSplitter.from_language(
-        language=lang_enum,
-        chunk_size=chunk_size,
-        chunk_overlap=0,  # no overlap admitted, otherwise context may leak into answer
-    )
-    triplet_chunk_docs = triplet_splitter.create_documents(chunks)
-    chunks = [c.page_content for c in triplet_chunk_docs]
-    chunks = chunks[: len(chunks) - len(chunks) % 3]  # should be a multiple of 3
-
-    # format chunks into prompts
-    roles = []
-    contents = []
-    for idx in range(0, len(chunks), 3):
-        pre, mid, suf = chunks[idx : idx + 3]
-        interleaved = list(itertools.chain(*zip(templates, (pre, suf, mid))))
-        user = "\n".join(interleaved[:-1])
-        assistant = "\n".join(interleaved[-1:])
-        roles.extend(["system", "user", "assistant"])
-        contents.extend([system_prompt, user, assistant])
-
-    # return formatted prompts in a dataframe to be processed by `ft_chat_formatter()`
-    df = pd.DataFrame({"role": roles, "content": contents})
-    return df
-
-
-def ft_cqa_formatter(
-    df: pd.DataFrame,
-    question_col="question",
-    answer_col="answer",
-    instruction_col="instruction",
-    context_col="context",
-    default_instruction="You are a helpful assistant.",
-    default_context="",
-) -> pd.DataFrame:
-    # input and setup validation
-    assert len(df) > 0, "Input dataframe should not be empty"
-    assert {question_col, answer_col}.issubset(set(df.columns)), (
-        f"Input dataframe must have columns `{question_col}`, and `{answer_col}`"
-    )  # noqa
-
-    if instruction_col not in df.columns:
-        df[instruction_col] = default_instruction
-
-    if context_col not in df.columns:
-        df[context_col] = default_context
-
-    # format data into chat-like prompts
-    roles = []
-    contents = []
-    for i, row in df.iterrows():
-        system = "\n".join([row[instruction_col], row[context_col]])
-        user = row[question_col]
-        assistant = row[answer_col]
-        roles.extend(["system", "user", "assistant"])
-        contents.extend([system, user, assistant])
-
-    return pd.DataFrame({"role": roles, "content": contents})
diff --git a/mindsdb/integrations/libs/ml_exec_base.py b/mindsdb/integrations/libs/ml_exec_base.py
index 96eca4a033a..abac27d75de 100644
--- a/mindsdb/integrations/libs/ml_exec_base.py
+++ b/mindsdb/integrations/libs/ml_exec_base.py
@@ -7,7 +7,7 @@
       normally associated with a DB handler (e.g. `native_query`, `get_tables`), as well as other ML-specific behaviors,
       like `learn()` or `predict()`. Note that while these still have to be implemented at the engine level, the burden
       on that class is lesser given that it only needs to return a pandas DataFrame. It's this class that will take said
-      output and format it into the HandlerResponse instance that MindsDB core expects.
+      output and format it into the DataHandlerResponse instance that MindsDB core expects.
 
     - `learn_process` method: handles async dispatch of the `learn` method in an engine, as well as registering all
       models inside of the internal MindsDB registry.
diff --git a/mindsdb/integrations/libs/passthrough.py b/mindsdb/integrations/libs/passthrough.py
new file mode 100644
index 00000000000..f535d3dfeb8
--- /dev/null
+++ b/mindsdb/integrations/libs/passthrough.py
@@ -0,0 +1,477 @@
+"""
+PassthroughMixin β€” generic HTTP passthrough for authenticated REST APIs.
+
+A handler opts in by declaring three class attributes:
+
+    class MyHandler(APIHandler, PassthroughMixin):
+        _bearer_token_arg = "api_key"                 # key in connection_data
+        _base_url_default = "https://api.example.com" # fallback if user omits
+        _test_request = PassthroughRequest("GET", "/me")
+
+The mixin defaults to ``Authorization: Bearer ``. Handlers using a
+different auth scheme (e.g. Shopify's ``X-Shopify-Access-Token``) override
+``_auth_header_name`` and ``_auth_header_format`` β€” see CHANGE 3.
+
+The mixin reads ``self.connection_data`` (a dict populated from
+integration setup) to pull the token, resolve the base URL, and enforce
+the host allowlist. Handlers that need custom URL composition (e.g.
+``http://{host}:{port}``) override ``_build_base_url``.
+
+``PassthroughProtocol`` is a structural type describing the two public
+methods (``api_passthrough`` and ``test_passthrough``). The HTTP layer
+checks against the protocol rather than the mixin class, so a handler
+can satisfy the contract without inheriting the default implementation.
+"""
+
+import ipaddress
+import os
+import time
+from typing import Any, Protocol, runtime_checkable
+from urllib.parse import urlparse
+
+import requests
+
+from mindsdb.integrations.libs.passthrough_types import (
+    ALLOWED_METHODS,
+    FORBIDDEN_REQUEST_HEADERS,
+    HOP_BY_HOP_RESPONSE_HEADERS,
+    HostNotAllowedError,
+    PassthroughConfigError,
+    PassthroughRequest,
+    PassthroughResponse,
+    PassthroughValidationError,
+)
+from mindsdb.utilities import log
+
+logger = log.getLogger(__name__)
+
+
+PASSTHROUGH_TIMEOUT_SECONDS = int(os.getenv("MINDSDB_PASSTHROUGH_TIMEOUT_SECONDS", "30"))
+PASSTHROUGH_MAX_REQUEST_BYTES = int(os.getenv("MINDSDB_PASSTHROUGH_MAX_REQUEST_BYTES", str(1 * 1024 * 1024)))
+PASSTHROUGH_MAX_RESPONSE_BYTES = int(os.getenv("MINDSDB_PASSTHROUGH_MAX_RESPONSE_BYTES", str(10 * 1024 * 1024)))
+
+REDACTED_SENTINEL = "[REDACTED_API_KEY]"
+
+
+def _is_private_host(hostname: str) -> bool:
+    """Return True if `hostname` resolves to a private/loopback/link-local IP literal.
+
+    Only IP literals are checked; DNS resolution is intentionally not performed
+    (handlers may legitimately point at an internal DNS name the operator has
+    approved via `allowed_hosts`). The IP-literal check prevents a caller from
+    smuggling `http://127.0.0.1/` or `http://10.0.0.1/` through a typo'd base_url.
+    """
+    try:
+        ip = ipaddress.ip_address(hostname)
+    except ValueError:
+        return False
+    return ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_multicast or ip.is_reserved
+
+
+def _host_matches(host: str, allowlist: list[str]) -> bool:
+    if not host:
+        return False
+    host = host.lower()
+    return any(host == entry.lower() for entry in allowlist)
+
+
+@runtime_checkable
+class PassthroughProtocol(Protocol):
+    """Structural contract for handlers that expose HTTP passthrough.
+
+    The HTTP namespace checks against this Protocol rather than the
+    `PassthroughMixin` class, which lets future handlers satisfy the
+    contract without inheriting the default implementation.
+    """
+
+    def api_passthrough(self, req: PassthroughRequest) -> PassthroughResponse: ...
+
+    def test_passthrough(self) -> dict[str, Any]: ...
+
+
+class PassthroughMixin:
+    # Required overrides
+    _bearer_token_arg: str = ""
+
+    # Optional overrides
+    _base_url_arg: str = "base_url"
+    _base_url_default: str | None = None
+    _allowed_hosts_arg: str = "allowed_hosts"
+    _default_headers_arg: str = "default_headers"
+
+    # Auth header. Defaults to bearer-compatible; handlers using a custom
+    # scheme (e.g. Shopify's `X-Shopify-Access-Token: `) override
+    # both attrs. The value from `_get_bearer_token()` is formatted into
+    # `{token}` β€” the method name is retained for backwards compat but
+    # now represents "the auth secret" regardless of scheme.
+    _auth_header_name: str = "Authorization"
+    _auth_header_format: str = "Bearer {token}"
+
+    # Declarative auth mode surfaced to /capabilities. One handler instance
+    # has exactly one auth mode, so this is a single string; the API
+    # response still wraps it in a list because a future contract may
+    # surface handlers supporting multiple configurations. Known values:
+    # "bearer", "custom", "oauth_refresh". Handlers that use a non-bearer
+    # header scheme or a refresh-aware mixin should set this explicitly β€”
+    # don't infer it from _auth_header_format, since OAuth-refresh also
+    # uses "Bearer {token}" but is a distinct mode.
+    _auth_mode: str = "bearer"
+
+    # Canonical sanity-check request for `test_passthrough()`. Handlers MUST
+    # set this if they want the /passthrough/test endpoint to do anything
+    # useful. `None` means "test endpoint returns 'not implemented'".
+    _test_request: PassthroughRequest | None = None
+
+    # Stamped on every upstream request so the upstream can identify our
+    # traffic for support/debugging. See design Β§13 (q3).
+    _upstream_marker_header: str = "X-Minds-Passthrough"
+
+    # Hook: override when URL composition is more than "take a string"
+    # (e.g. strapi composes from host+port).
+    def _build_base_url(self) -> str | None:
+        data = self._get_connection_data()
+        value = data.get(self._base_url_arg) if self._base_url_arg else None
+        if value:
+            return str(value).rstrip("/")
+        if self._base_url_default is not None:
+            return self._base_url_default.rstrip("/")
+        return None
+
+    def _get_connection_data(self) -> dict[str, Any]:
+        """Return the handler's stored connection_data dict.
+
+        Handlers store this differently; we check the common attribute names
+        so most handlers don't need to override.
+        """
+        for attr in ("connection_data", "_connection_data"):
+            value = getattr(self, attr, None)
+            if isinstance(value, dict):
+                return value
+        return {}
+
+    def _get_bearer_token(self) -> str:
+        if not self._bearer_token_arg:
+            raise PassthroughConfigError("handler did not declare _bearer_token_arg")
+        token = self._get_connection_data().get(self._bearer_token_arg)
+        if not token:
+            raise PassthroughConfigError(f"bearer token ('{self._bearer_token_arg}') is missing from connection_data")
+        return str(token)
+
+    def _resolve_url(self, path: str) -> tuple[str, str]:
+        """Return ``(url, hostname)`` for the outgoing request.
+
+        `path` is appended to the base URL verbatim. After joining we parse
+        the result and compare the hostname against the allowlist β€” path
+        injection tricks like ``@evil.com`` or ``//evil.com`` are rejected
+        at the hostname-comparison step, not by string matching.
+        """
+        if not path.startswith("/"):
+            raise PassthroughValidationError("path must start with '/'")
+        base = self._build_base_url()
+        if not base:
+            raise PassthroughConfigError("base_url is not configured for this datasource")
+
+        url = f"{base}{path}"
+        parsed = urlparse(url)
+        if parsed.scheme not in ("http", "https") or not parsed.hostname:
+            raise PassthroughValidationError(f"resolved URL is not valid: {url}")
+        return url, parsed.hostname
+
+    def _allowed_hosts(self, default_host: str) -> list[str]:
+        data = self._get_connection_data()
+        allowed = data.get(self._allowed_hosts_arg)
+        if isinstance(allowed, list) and allowed:
+            return [str(h) for h in allowed]
+        return [default_host]
+
+    def _check_host_allowed(self, hostname: str) -> None:
+        allowlist = self._allowed_hosts(hostname)
+        if allowlist == ["*"]:
+            return
+        if not _host_matches(hostname, allowlist):
+            raise HostNotAllowedError(f"host '{hostname}' is not in the datasource allowlist")
+        if _is_private_host(hostname):
+            raise HostNotAllowedError(
+                f"host '{hostname}' resolves to a private/loopback address; "
+                "set allowed_hosts=['*'] to bypass this check (explicit "
+                "listing is ignored for private IPs)"
+            )
+
+    def _build_outgoing_headers(self, caller_headers: dict[str, str], bearer: str) -> dict[str, str]:
+        """Merge caller headers (filtered) + default_headers + Authorization."""
+        out: dict[str, str] = {}
+        data = self._get_connection_data()
+        defaults = data.get(self._default_headers_arg) or {}
+        if isinstance(defaults, dict):
+            out.update({str(k): str(v) for k, v in defaults.items()})
+        for name, value in (caller_headers or {}).items():
+            if name.lower() in FORBIDDEN_REQUEST_HEADERS:
+                continue
+            if name.lower().startswith("proxy-"):
+                continue
+            out[name] = value
+        out[self._auth_header_name] = self._auth_header_format.format(token=bearer)
+        out[self._upstream_marker_header] = "1"
+        return out
+
+    def _secrets_for_scrub(self) -> list[str]:
+        """Values that must not appear in the response returned to the caller."""
+        secrets: list[str] = []
+        try:
+            secrets.append(self._get_bearer_token())
+        except PassthroughConfigError:
+            pass
+        data = self._get_connection_data()
+        defaults = data.get(self._default_headers_arg) or {}
+        if isinstance(defaults, dict):
+            for value in defaults.values():
+                s = str(value)
+                if len(s) >= 16:
+                    secrets.append(s)
+        return secrets
+
+    def _scrub(self, text: str, secrets: list[str]) -> str:
+        for s in secrets:
+            if s:
+                text = text.replace(s, REDACTED_SENTINEL)
+        return text
+
+    def _scrub_bytes(self, data: bytes, secrets: list[str]) -> bytes:
+        """Byte-level secret scrub (spec Β§7.6).
+
+        Replacing on raw bytes before decoding prevents U+FFFD substitutions
+        from `errors="replace"` from fragmenting a secret and letting part of
+        it survive the scrub.
+        """
+        sentinel = REDACTED_SENTINEL.encode("utf-8")
+        for s in secrets:
+            if s:
+                data = data.replace(s.encode("utf-8"), sentinel)
+        return data
+
+    def _filter_response_headers(self, headers: dict[str, str], secrets: list[str]) -> dict[str, str]:
+        filtered: dict[str, str] = {}
+        for name, value in headers.items():
+            if name.lower() in HOP_BY_HOP_RESPONSE_HEADERS:
+                continue
+            filtered[name] = self._scrub(str(value), secrets)
+        return filtered
+
+    def _read_capped_body(self, response: requests.Response) -> bytes:
+        """Read the response body in chunks, abort if it exceeds the cap."""
+        chunks: list[bytes] = []
+        total = 0
+        try:
+            for chunk in response.iter_content(chunk_size=64 * 1024):
+                if not chunk:
+                    continue
+                total += len(chunk)
+                if total > PASSTHROUGH_MAX_RESPONSE_BYTES:
+                    raise PassthroughValidationError(f"response body exceeded {PASSTHROUGH_MAX_RESPONSE_BYTES} bytes")
+                chunks.append(chunk)
+        finally:
+            response.close()
+        return b"".join(chunks)
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def api_passthrough(self, req: PassthroughRequest) -> PassthroughResponse:
+        method = (req.method or "").upper()
+        if method not in ALLOWED_METHODS:
+            raise PassthroughValidationError(f"method '{req.method}' is not allowed")
+
+        connection_data = self._get_connection_data()
+        allowed_methods_cfg = connection_data.get("allowed_methods")
+        if allowed_methods_cfg is not None:
+            if not isinstance(allowed_methods_cfg, list):
+                raise PassthroughConfigError("'allowed_methods' must be a list of HTTP method strings")
+            if not all(isinstance(m, str) for m in allowed_methods_cfg):
+                raise PassthroughConfigError("'allowed_methods' must be a list of HTTP method strings")
+            allowed_upper = {m.upper() for m in allowed_methods_cfg}
+            unknown = sorted(allowed_upper - ALLOWED_METHODS)
+            if unknown:
+                raise PassthroughConfigError(
+                    f"'allowed_methods' contains unsupported verbs: {unknown}. Allowed: {sorted(ALLOWED_METHODS)}"
+                )
+            if method not in allowed_upper:
+                raise PassthroughValidationError(
+                    f"method '{method}' is not permitted by this datasource",
+                    error_code="method_not_allowed",
+                    http_status=405,
+                )
+
+        request_bytes = 0
+        if req.body is not None:
+            # requests will serialize dict bodies to JSON; we cap on the
+            # serialized length. For raw strings / bytes we cap directly.
+            import json as _json
+
+            if isinstance(req.body, (dict, list)):
+                body_bytes_for_size = _json.dumps(req.body).encode("utf-8")
+            elif isinstance(req.body, (bytes, bytearray)):
+                body_bytes_for_size = bytes(req.body)
+            else:
+                body_bytes_for_size = str(req.body).encode("utf-8")
+            if len(body_bytes_for_size) > PASSTHROUGH_MAX_REQUEST_BYTES:
+                raise PassthroughValidationError(f"request body exceeded {PASSTHROUGH_MAX_REQUEST_BYTES} bytes")
+            request_bytes = len(body_bytes_for_size)
+
+        url, hostname = self._resolve_url(req.path)
+        self._check_host_allowed(hostname)
+        bearer = self._get_bearer_token()
+        headers = self._build_outgoing_headers(req.headers or {}, bearer)
+
+        request_kwargs: dict[str, Any] = {
+            "headers": headers,
+            "params": req.query or None,
+            "timeout": PASSTHROUGH_TIMEOUT_SECONDS,
+            "stream": True,
+        }
+        if req.body is not None:
+            if isinstance(req.body, (dict, list)):
+                request_kwargs["json"] = req.body
+            else:
+                request_kwargs["data"] = req.body
+
+        datasource_name = getattr(self, "name", None) or "?"
+        start = time.monotonic()
+        response = requests.request(method, url, **request_kwargs)
+        body_bytes = self._read_capped_body(response)
+        duration_ms = int((time.monotonic() - start) * 1000)
+
+        secrets = self._secrets_for_scrub()
+        body_bytes = self._scrub_bytes(body_bytes, secrets)
+        content_type = response.headers.get("Content-Type", "") or ""
+        out_headers = self._filter_response_headers(dict(response.headers), secrets)
+
+        body: Any
+        if "application/json" in content_type.lower():
+            try:
+                text = body_bytes.decode("utf-8", errors="replace")
+                import json as _json
+
+                body = _json.loads(text) if text else None
+            except ValueError:
+                body = body_bytes.decode("utf-8", errors="replace")
+        else:
+            body = body_bytes.decode("utf-8", errors="replace")
+
+        self._log_passthrough_call(
+            method=method,
+            path=req.path,
+            datasource_name=datasource_name,
+            upstream_status_code=response.status_code,
+            request_bytes=request_bytes,
+            response_bytes=len(body_bytes),
+            duration_ms=duration_ms,
+        )
+
+        return PassthroughResponse(
+            status_code=response.status_code,
+            headers=out_headers,
+            body=body,
+            content_type=content_type.split(";", 1)[0].strip() or None,
+        )
+
+    def _log_passthrough_call(
+        self,
+        *,
+        method: str,
+        path: str,
+        datasource_name: str,
+        upstream_status_code: int,
+        request_bytes: int,
+        response_bytes: int,
+        duration_ms: int,
+    ) -> None:
+        """Emit one audit line per passthrough call (spec Β§7.8).
+
+        Never logs headers or bodies. user_id / org_id are pulled from the
+        MindsDB request context when available; in test/dev invocations
+        where the context is not populated, they are omitted.
+        """
+        fields: dict[str, Any] = {
+            "method": method,
+            "path": path,
+            "datasource_name": datasource_name,
+            "upstream_status_code": upstream_status_code,
+            "request_bytes": request_bytes,
+            "response_bytes": response_bytes,
+            "duration_ms": duration_ms,
+        }
+        # TODO: org_id lives in Minds; when the passthrough is called via the
+        # Minds gateway the org scope should be propagated and logged here.
+        try:
+            from mindsdb.utilities.context import context as _ctx
+
+            user_id = getattr(_ctx, "user_id", None)
+            company_id = getattr(_ctx, "company_id", None)
+            if user_id is not None:
+                fields["user_id"] = user_id
+            if company_id is not None:
+                fields["company_id"] = company_id
+        except Exception:
+            pass
+        # DEBUG level per team decision: per-request audit logging at
+        # info level happens in Minds at the HTTP layer. This log is
+        # intended for mindsdb-side troubleshooting only.
+        logger.debug("passthrough %s", fields)
+
+    def test_passthrough(self) -> dict[str, Any]:
+        """Run the handler's canonical sanity-check call (see Β§6.1a).
+
+        Returns a structured dict the HTTP layer forwards to the caller:
+            { "ok": bool, "status_code": int?, "host": str?, "latency_ms": int?,
+              "error_code": str?, "message": str? }
+        """
+        if self._test_request is None:
+            return {
+                "ok": False,
+                "error_code": "not_implemented",
+                "message": "this handler does not define a passthrough test request",
+            }
+
+        start = time.monotonic()
+        try:
+            resp = self.api_passthrough(self._test_request)
+        except HostNotAllowedError as e:
+            return {"ok": False, "error_code": e.error_code, "message": str(e)}
+        except PassthroughConfigError as e:
+            return {"ok": False, "error_code": e.error_code, "message": str(e)}
+        except PassthroughValidationError as e:
+            return {"ok": False, "error_code": e.error_code, "message": str(e)}
+        except requests.exceptions.Timeout as e:
+            return {"ok": False, "error_code": "timeout", "message": str(e)}
+        except requests.exceptions.ConnectionError as e:
+            return {"ok": False, "error_code": "network", "message": str(e)}
+        except Exception as e:  # noqa: BLE001
+            logger.exception("passthrough test failed unexpectedly")
+            return {"ok": False, "error_code": "unknown", "message": str(e)}
+
+        latency_ms = int((time.monotonic() - start) * 1000)
+        try:
+            _, host = self._resolve_url(self._test_request.path)
+        except Exception:
+            host = None
+
+        if 200 <= resp.status_code < 300:
+            return {"ok": True, "status_code": resp.status_code, "host": host, "latency_ms": latency_ms}
+        if resp.status_code in (401, 403):
+            return {
+                "ok": False,
+                "status_code": resp.status_code,
+                "host": host,
+                "latency_ms": latency_ms,
+                "error_code": "auth_failed",
+                "message": "upstream rejected credentials; base URL and allowlist look correct",
+            }
+        return {
+            "ok": False,
+            "status_code": resp.status_code,
+            "host": host,
+            "latency_ms": latency_ms,
+            "error_code": "upstream_error",
+            "message": f"upstream returned {resp.status_code}",
+        }
diff --git a/mindsdb/integrations/libs/passthrough_types.py b/mindsdb/integrations/libs/passthrough_types.py
new file mode 100644
index 00000000000..63d1cb14524
--- /dev/null
+++ b/mindsdb/integrations/libs/passthrough_types.py
@@ -0,0 +1,94 @@
+"""
+Request/response dataclasses and error types for the REST passthrough path.
+
+These are the payloads exchanged between the HTTP layer and
+`PassthroughMixin`. They are intentionally framework-agnostic so the
+mixin can be unit-tested without Flask.
+"""
+
+from dataclasses import dataclass, field
+from typing import Any
+
+
+ALLOWED_METHODS = frozenset({"GET", "POST", "PUT", "PATCH", "DELETE"})
+
+# Hop-by-hop and auth-related headers that must never come from the caller.
+FORBIDDEN_REQUEST_HEADERS = frozenset(
+    h.lower()
+    for h in (
+        "authorization",
+        "host",
+        "cookie",
+        "content-length",
+        "connection",
+    )
+)
+
+# Hop-by-hop response headers stripped before returning to the caller.
+HOP_BY_HOP_RESPONSE_HEADERS = frozenset(
+    h.lower()
+    for h in (
+        "connection",
+        "keep-alive",
+        "proxy-authenticate",
+        "proxy-authorization",
+        "te",
+        "trailers",
+        "transfer-encoding",
+        "upgrade",
+        "content-length",
+    )
+)
+
+
+@dataclass
+class PassthroughRequest:
+    method: str
+    path: str
+    query: dict[str, Any] = field(default_factory=dict)
+    headers: dict[str, str] = field(default_factory=dict)
+    body: Any = None
+
+
+@dataclass
+class PassthroughResponse:
+    status_code: int
+    headers: dict[str, str]
+    body: Any
+    content_type: str | None = None
+
+
+class PassthroughError(Exception):
+    """Base class for passthrough failures that should not be leaked as 500s."""
+
+    error_code: str = "passthrough_error"
+    http_status: int = 400
+
+    def __init__(self, message: str, *, error_code: str | None = None, http_status: int | None = None):
+        super().__init__(message)
+        if error_code is not None:
+            self.error_code = error_code
+        if http_status is not None:
+            self.http_status = http_status
+
+
+class PassthroughConfigError(PassthroughError):
+    error_code = "config_error"
+    http_status = 500
+
+
+class HostNotAllowedError(PassthroughError):
+    error_code = "host_not_allowed"
+    http_status = 400
+
+
+class PassthroughValidationError(PassthroughError):
+    error_code = "invalid_request"
+    http_status = 400
+
+
+class PassthroughNotSupportedError(PassthroughError):
+    """Raised when a handler does not implement the mixin."""
+
+    error_code = "passthrough_not_supported"
+    http_status = 501
diff --git a/mindsdb/integrations/libs/response.py b/mindsdb/integrations/libs/response.py
index aa39ce4c2c6..3af33b444fa 100644
--- a/mindsdb/integrations/libs/response.py
+++ b/mindsdb/integrations/libs/response.py
@@ -1,14 +1,17 @@
 import sys
-from typing import Callable
+from abc import ABC
+from typing import Callable, Generator, ClassVar
 from dataclasses import dataclass, fields
 
 import numpy
 import pandas
+import psutil
 
 from mindsdb.utilities import log
 from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
 from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
 from mindsdb_sql_parser.ast import ASTNode
+from mindsdb.utilities.types.column import Column
 
 
 logger = log.getLogger(__name__)
@@ -40,7 +43,464 @@ class _INFORMATION_SCHEMA_COLUMNS_NAMES:
 INF_SCHEMA_COLUMNS_NAMES_SET = set(f.name for f in fields(INF_SCHEMA_COLUMNS_NAMES))
 
 
+class HandlerStatusResponse:
+    def __init__(
+        self,
+        success: bool = True,
+        error_message: str = None,
+        redirect_url: str = None,
+        copy_storage: str = None,
+    ) -> None:
+        self.success = success
+        self.error_message = error_message
+        self.redirect_url = redirect_url
+        self.copy_storage = copy_storage
+
+    def to_json(self):
+        data = {"success": self.success, "error": self.error_message}
+        if self.redirect_url is not None:
+            data["redirect_url"] = self.redirect_url
+        if self.copy_storage is not None:
+            data["copy_storage"] = self.copy_storage
+        return data
+
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}("
+            f"success={self.success}, "
+            f"error={self.error_message}, "
+            f"redirect_url={self.redirect_url}, "
+            f"copy_storage={self.copy_storage})"
+        )
+
+
+class DataHandlerResponse(ABC):
+    """Base class for all data handler responses."""
+
+    type: ClassVar[str]
+
+    @property
+    def resp_type(self):
+        # For back compatibility with old code, use the type attribute instead of resp_type
+        return self.type
+
+
+class ErrorResponse(DataHandlerResponse):
+    """Response for error cases.
+
+    Attributes:
+        type: RESPONSE_TYPE.ERROR
+        error_code: int
+        error_message: str | None
+        is_expected_error: bool
+        exception: Exception | None
+    """
+
+    type: ClassVar[str] = RESPONSE_TYPE.ERROR
+    error_code: int
+    error_message: str | None
+    is_expected_error: bool
+    exception: Exception | None
+
+    def __init__(self, error_code: int = 0, error_message: str | None = None, is_expected_error: bool = False):
+        self.error_code = error_code
+        self.error_message = error_message
+        self.is_expected_error = is_expected_error
+        self.exception = None
+        current_exception = sys.exc_info()
+        if current_exception[0] is not None:
+            self.exception = current_exception[1]
+
+    def to_columns_table_response(self, map_type_fn: Callable) -> None:
+        raise ValueError(
+            f"Cannot convert {self.type} to {RESPONSE_TYPE.COLUMNS_TABLE}, the error is: {self.error_message}"
+        )
+
+
+class OkResponse(DataHandlerResponse):
+    """Response for successful cases without data (e.g. CREATE TABLE, DROP TABLE, etc.).
+
+    Attributes:
+        type: RESPONSE_TYPE.OK
+        affected_rows: int - how many rows were affected by the query
+    """
+
+    type: ClassVar[str] = RESPONSE_TYPE.OK
+    affected_rows: int
+
+    def __init__(self, affected_rows: int = None):
+        self.affected_rows = affected_rows
+
+
+def _safe_pandas_concat(pieces: list[pandas.DataFrame]) -> pandas.DataFrame:
+    """Safely concatenates multiple pandas DataFrames while checking available memory.
+    If the estimated memory required for concatenation (with a safety multiplier of 2.5x)
+    exceeds the available memory, it raises a MemoryError.
+
+    Args:
+        pieces (list[pandas.DataFrame]): A list of pandas DataFrames to concatenate.
+
+    Returns:
+        pandas.DataFrame: The concatenated DataFrame.
+
+    Raises:
+        MemoryError: If there is insufficient memory to perform the concatenation safely.
+    """
+    if len(pieces) == 1:
+        return pieces[0]
+    available_memory_kb = psutil.virtual_memory().available >> 10
+    pieces_size_kb = sum([(x.memory_usage(index=True, deep=True).sum() >> 10) for x in pieces])
+    if (pieces_size_kb * 2.5) > available_memory_kb:
+        raise MemoryError()
+    return pandas.concat(pieces)
+
+
+class TableResponse(DataHandlerResponse):
+    """Response for successful cases with data (e.g. SELECT, SHOW, etc.).
+
+    Attributes:
+        type: RESPONSE_TYPE.TABLE | RESPONSE_TYPE.COLUMNS_TABLE - type of data in the response
+        affected_rows: int | None - how many rows were affected by the query
+        data_generator: Generator[pandas.DataFrame, None, None] | None - generator of data for lazy loading
+        _columns: list[Column] | None - list of columns
+        _data: pandas.DataFrame | None - loaded data
+        _fetched: bool - if data was already fetched (data_generator is consumed)
+        _invalid: bool - if data has already been fetched and cannot be iterated over
+        _last_data_piece: pandas.DataFrame | None - last data piece fetched
+        rows_fetched: int - how many rows were fetched
+    """
+
+    type: str
+    affected_rows: int | None
+    _data_generator: Generator[pandas.DataFrame, None, None] | None
+    _columns: list[Column] | None
+    _data: pandas.DataFrame | None
+    _fetched: bool
+    _invalid: bool
+    _last_data_piece: pandas.DataFrame | None
+    rows_fetched: int
+
+    def __init__(
+        self,
+        data: pandas.DataFrame | None = None,
+        data_generator: Generator[pandas.DataFrame, None, None] | None = None,
+        affected_rows: int | None = None,
+        columns: list[Column] = None,
+    ):
+        """
+        Either data and/or data_generator must be provided.
+        Args:
+            data (pandas.DataFrame): initial data
+            data_generator (Generator[pandas.DataFrame, None, None]): generator of data
+            affected_rows (int): total data rowcount - can be None depending on the handler
+                                 NOTE: name affected_rows for compatibility with OKResponse
+            columns (list[Column]): list of columns
+        """
+        self.type = RESPONSE_TYPE.TABLE
+        self._data_generator = data_generator
+        self._columns = columns
+        self.affected_rows = affected_rows
+        self._data = data
+        self._fetched = False if data_generator else True
+        self._invalid = False
+        self._last_data_piece = None
+        self.rows_fetched = len(data) if data is not None else 0
+
+    @property
+    def data_generator(self) -> Generator[pandas.DataFrame, None, None]:
+        return self._data_generator
+
+    @data_generator.setter
+    def data_generator(self, value):
+        self._fetched = False if value else True
+        self._data_generator = value
+
+    def fetchall(self) -> pandas.DataFrame:
+        """Fetch all data and store it in the _data attribute.
+
+        Returns:
+            pandas.DataFrame: Data frame.
+        """
+        self._raise_if_invalid()
+        if self._data_generator is None or self._fetched:
+            return self._data
+
+        pieces = list(self._iterate_with_memory_check())
+        if self._data is None:
+            if len(pieces) == 1:
+                self._data = pieces[0]
+            elif len(pieces) == 0:
+                self._data = pandas.DataFrame([], columns=[column.name for column in self._columns])
+            else:
+                self._data = _safe_pandas_concat(pieces)
+        elif len(pieces) > 0:
+            self._data = _safe_pandas_concat([self._data, *pieces])
+
+        self._fetched = True
+        self._data_generator = None
+
+        return self._data
+
+    def _raise_if_low_memory(self) -> None:
+        """Check if there is enough available memory to load the next data chunk.
+
+        Estimates the memory required for the next chunk based on the size of the last
+        fetched chunk. If `affected_rows` (fetched rows) is known, the estimate is capped at the
+        number of remaining rows (but no more than one chunk). Otherwise, assumes the next chunk will
+        be the same size as the previous one.
+
+        Does nothing when no data has been fetched yet.
+
+        Raises:
+            MemoryError: If estimated memory for the next chunk exceeds available memory.
+        """
+        if self._last_data_piece is None or len(self._last_data_piece) == 0:
+            return
+
+        data_piece_size_kb = self._last_data_piece.memory_usage(index=True, deep=True).sum() >> 10
+        if isinstance(self.affected_rows, int) and self.affected_rows > 0:
+            row_size_kb = data_piece_size_kb / len(self._last_data_piece)
+            rows_expected = min(self.affected_rows - self.rows_fetched, len(self._last_data_piece))
+            if rows_expected > 0:
+                available_memory_kb = psutil.virtual_memory().available >> 10
+                if available_memory_kb < (row_size_kb * rows_expected * 1.1):
+                    raise MemoryError(
+                        f"Not enough memory to load remaining data. "
+                        f"Available: {available_memory_kb}KB, estimated need: {int(row_size_kb * rows_expected * 1.1)}KB"
+                    )
+        else:
+            # assume that next piece is the same size
+            available_memory_kb = psutil.virtual_memory().available >> 10
+            if available_memory_kb < (data_piece_size_kb * 1.1):
+                raise MemoryError(
+                    f"Not enough memory to load remaining data. "
+                    f"Available: {available_memory_kb}KB, estimated need: {int(data_piece_size_kb * 1.1)}KB"
+                )
+
+    def _iterate_with_memory_check(self) -> Generator[pandas.DataFrame, None, None]:
+        """Iterate over `_data_generator` with memory safety checks.
+
+        Yields:
+            pandas.DataFrame: The next chunk from the underlying data generator.
+
+        Raises:
+            MemoryError: Propagated from `_raise_if_low_memory` if available
+                         memory is insufficient for the next chunk.
+        """
+        if self._data_generator is None:
+            return
+
+        self._raise_if_low_memory()
+
+        for piece in self._data_generator:
+            self._last_data_piece = piece
+            self.rows_fetched += len(piece)
+            yield piece
+            self._raise_if_low_memory()
+
+    def fetchmany(self) -> pandas.DataFrame | None:
+        """Fetch one piece of data and store it in the _data attribute.
+
+        Returns:
+            pandas.DataFrame: Data frame, piece of data.
+        """
+        self._raise_if_invalid()
+        try:
+            piece = next(self._iterate_with_memory_check())
+            if self._data is None:
+                self._data = piece
+            else:
+                self._data = _safe_pandas_concat([self._data, piece])
+        except StopIteration:
+            self._fetched = True
+            self._data_generator = None
+            return None
+        return piece
+
+    def iterate_no_save(self) -> Generator[pandas.DataFrame, None, None]:
+        """Iterate over the data and yield each piece of data. Do not save the data to the _data attribute.
+        NOTE: do it only once, before return result to the user
+
+        Returns:
+            Generator[pandas.DataFrame, None, None]: Generator of data frames.
+        """
+        self._raise_if_invalid()
+        if self._data is not None:
+            yield self._data
+        if self._data_generator:
+            self._invalid = True
+            for piece in self._iterate_with_memory_check():
+                yield piece
+
+    def _raise_if_invalid(self):
+        if self._invalid:
+            raise ValueError("Data has already been fetched and cannot be iterated over.")
+
+    @property
+    def data_frame(self) -> pandas.DataFrame:
+        """Get the data frame. Represents the entire dataset.
+
+        Returns:
+            pandas.DataFrame: Data frame.
+        """
+        self.fetchall()
+        return self._data
+
+    @data_frame.setter
+    def data_frame(self, value):
+        """for back compatibility"""
+        self._data = value
+
+    @property
+    def columns(self) -> list[Column]:
+        """Get the columns.
+
+        Returns:
+            list[Column]: List of columns.
+        """
+        self._resolve_columns()
+        return self._columns
+
+    def _resolve_columns(self):
+        if self._columns is not None:
+            return
+        self.fetchall()
+        self._columns = [Column(name=c) for c in self._data.columns]
+
+    def set_columns_attrs(self, table_name: str | None, table_alias: str | None, database: str | None):
+        """Set the attributes of the columns.
+
+        Args:
+            table_name (str | None): Table name.
+            table_alias (str | None): Table alias.
+            database (str | None): Database name.
+        """
+        self._resolve_columns()
+        for column in self._columns:
+            if table_name:
+                column.table_name = table_name
+            if table_alias:
+                column.table_alias = table_alias
+            if database:
+                column.database = database
+
+    def to_columns_table_response(self, map_type_fn: Callable) -> None:
+        """Transform the response to a `columns table` response.
+        NOTE: original dataframe will be mutated
+
+        Args:
+            map_type_fn (Callable): Function to map the data type to the MySQL data type.
+        """
+        if self.type == RESPONSE_TYPE.COLUMNS_TABLE:
+            return
+        if self.type != RESPONSE_TYPE.TABLE:
+            raise ValueError(
+                f"Cannot convert handler response with type '{self.type}' to '{RESPONSE_TYPE.COLUMNS_TABLE}'"
+            )
+
+        self.fetchall()
+        self._resolve_columns()
+        self.type = RESPONSE_TYPE.COLUMNS_TABLE
+
+        if self._data is None:
+            return
+        self._data.columns = [name.upper() for name in self._data.columns]
+
+        for required_column in (INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME, INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE):
+            if required_column not in self._data.columns:
+                raise ValueError(
+                    f"Missed required for INFORMATION_SCHEMA.COLUMNS column {required_column}. "
+                    f"Columns set: {self._data.columns}"
+                )
+        for column_name in INF_SCHEMA_COLUMNS_NAMES_SET:
+            if column_name not in self._data.columns:
+                self._data[column_name] = None
+
+        self._data[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = self._data[INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE].apply(
+            map_type_fn
+        )
+
+        self._data = self._data.astype(
+            {
+                INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME: "string",
+                INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE: "string",
+                INF_SCHEMA_COLUMNS_NAMES.ORDINAL_POSITION: "Int32",
+                INF_SCHEMA_COLUMNS_NAMES.COLUMN_DEFAULT: "string",
+                INF_SCHEMA_COLUMNS_NAMES.IS_NULLABLE: "string",
+                INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH: "Int32",
+                INF_SCHEMA_COLUMNS_NAMES.CHARACTER_OCTET_LENGTH: "Int32",
+                INF_SCHEMA_COLUMNS_NAMES.NUMERIC_PRECISION: "Int32",
+                INF_SCHEMA_COLUMNS_NAMES.NUMERIC_SCALE: "Int32",
+                INF_SCHEMA_COLUMNS_NAMES.DATETIME_PRECISION: "Int32",
+                INF_SCHEMA_COLUMNS_NAMES.CHARACTER_SET_NAME: "string",
+                INF_SCHEMA_COLUMNS_NAMES.COLLATION_NAME: "string",
+            }
+        )
+        self._data.replace([numpy.nan, pandas.NA], None, inplace=True)
+
+
+def normalize_response(response) -> TableResponse | OkResponse | ErrorResponse:
+    """Convert legacy HandlerResponse to new response types.
+
+    If response is already a new type (TableResponse, OkResponse, ErrorResponse),
+    return it as-is. If response is a legacy HandlerResponse, convert it based
+    on its resp_type.
+
+    Args:
+        response: Either a new response type or legacy HandlerResponse
+
+    Returns:
+        TableResponse | OkResponse | ErrorResponse: Normalized response
+    """
+    # Already new format - return as-is
+    if isinstance(response, (TableResponse, OkResponse, ErrorResponse)):
+        return response
+
+    # Legacy HandlerResponse - convert based on type
+    if isinstance(response, HandlerResponse):
+        if response.resp_type == RESPONSE_TYPE.ERROR:
+            err = ErrorResponse(
+                error_code=response.error_code,
+                error_message=response.error_message,
+                is_expected_error=response.is_expected_error,
+            )
+            err.exception = response.exception
+            return err
+
+        if response.resp_type == RESPONSE_TYPE.OK:
+            return OkResponse(affected_rows=response.affected_rows)
+
+        # TABLE or COLUMNS_TABLE
+        if response.data_frame is not None:
+            columns = list(response.data_frame.columns)
+        else:
+            columns = []
+
+        mysql_types = response.mysql_types
+        if mysql_types is None:
+            mysql_types = [None] * len(columns)
+
+        table_response = TableResponse(
+            data=response.data_frame,
+            columns=[
+                Column(name=column_name, type=mysql_type) for column_name, mysql_type in zip(columns, mysql_types)
+            ],
+            data_generator=iter([]),  # empty generator for legacy responses
+        )
+        if response.resp_type == RESPONSE_TYPE.COLUMNS_TABLE:
+            table_response.type = RESPONSE_TYPE.COLUMNS_TABLE
+        return table_response
+
+    # Unknown type - return as-is (shouldn't happen normally)
+    return response
+
+
+# ! deprecated
 class HandlerResponse:
+    """Legacy response class for compatibility with old code.
+    NOTE: do not use this class directly, use DataHandlerResponse instead
+    """
+
     def __init__(
         self,
         resp_type: RESPONSE_TYPE,
@@ -86,16 +546,21 @@ def to_columns_table_response(self, map_type_fn: Callable) -> None:
             raise ValueError(f"Cannot convert {self.resp_type} to {RESPONSE_TYPE.COLUMNS_TABLE}")
 
         self.data_frame.columns = [name.upper() for name in self.data_frame.columns]
+
+        for required_column in (INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME, INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE):
+            if required_column not in self.data_frame.columns:
+                raise ValueError(
+                    f"Missed required for INFORMATION_SCHEMA.COLUMNS column {required_column}. "
+                    f"Columns set: {self.data_frame.columns}"
+                )
+        for column_name in INF_SCHEMA_COLUMNS_NAMES_SET:
+            if column_name not in self.data_frame.columns:
+                self.data_frame[column_name] = None
+
         self.data_frame[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = self.data_frame[
             INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE
         ].apply(map_type_fn)
 
-        # region validate df
-        current_columns_set = set(self.data_frame.columns)
-        if INF_SCHEMA_COLUMNS_NAMES_SET != current_columns_set:
-            raise ValueError(f"Columns set for INFORMATION_SCHEMA.COLUMNS is wrong: {list(current_columns_set)}")
-        # endregion
-
         self.data_frame = self.data_frame.astype(
             {
                 INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME: "string",
@@ -142,28 +607,3 @@ def __repr__(self):
             self.error_message,
             self.affected_rows,
         )
-
-
-class HandlerStatusResponse:
-    def __init__(
-        self,
-        success: bool = True,
-        error_message: str = None,
-        redirect_url: str = None,
-        copy_storage: str = None,
-    ) -> None:
-        self.success = success
-        self.error_message = error_message
-        self.redirect_url = redirect_url
-        self.copy_storage = copy_storage
-
-    def to_json(self):
-        data = {"success": self.success, "error": self.error_message}
-        if self.redirect_url is not None:
-            data["redirect_url"] = self.redirect_url
-        return data
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}: success={self.success},\
-              error={self.error_message},\
-              redirect_url={self.redirect_url}"
diff --git a/mindsdb/integrations/libs/vectordatabase_handler.py b/mindsdb/integrations/libs/vectordatabase_handler.py
index 876f2d898d6..0bd11c7834f 100644
--- a/mindsdb/integrations/libs/vectordatabase_handler.py
+++ b/mindsdb/integrations/libs/vectordatabase_handler.py
@@ -19,11 +19,10 @@
 )
 from mindsdb_sql_parser.ast.base import ASTNode
 
-from mindsdb.integrations.libs.response import RESPONSE_TYPE, HandlerResponse
-from mindsdb.utilities import log
+from mindsdb.integrations.libs.response import DataHandlerResponse, OkResponse, TableResponse
 from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, KeywordSearchArgs
-
 from mindsdb.integrations.utilities.query_traversal import query_traversal
+from mindsdb.utilities import log
 from .base import BaseHandler
 
 LOG = log.getLogger(__name__)
@@ -463,7 +462,7 @@ def dispatch_select(
                 handler_engine = self.__class__.name
                 raise VectorHandlerException(f"Error in {handler_engine} database: {e}")
 
-    def _dispatch(self, query: ASTNode) -> HandlerResponse:
+    def _dispatch(self, query: ASTNode) -> DataHandlerResponse:
         """
         Parse and Dispatch query to the appropriate method.
         """
@@ -478,14 +477,14 @@ def _dispatch(self, query: ASTNode) -> HandlerResponse:
         if type(query) in dispatch_router:
             resp = dispatch_router[type(query)](query)
             if resp is not None:
-                return HandlerResponse(resp_type=RESPONSE_TYPE.TABLE, data_frame=resp)
+                return TableResponse(data=resp)
             else:
-                return HandlerResponse(resp_type=RESPONSE_TYPE.OK)
+                return OkResponse()
 
         else:
             raise NotImplementedError(f"Query type {type(query)} not implemented.")
 
-    def query(self, query: ASTNode) -> HandlerResponse:
+    def query(self, query: ASTNode) -> DataHandlerResponse:
         """
         Receive query as AST (abstract syntax tree) and act upon it somehow.
 
@@ -494,11 +493,11 @@ def query(self, query: ASTNode) -> HandlerResponse:
                 of query: SELECT, INSERT, DELETE, etc
 
         Returns:
-            HandlerResponse
+            DataHandlerResponse
         """
         return self._dispatch(query)
 
-    def create_table(self, table_name: str, if_not_exists=True) -> HandlerResponse:
+    def create_table(self, table_name: str, if_not_exists=True) -> DataHandlerResponse:
         """Create table
 
         Args:
@@ -506,11 +505,11 @@ def create_table(self, table_name: str, if_not_exists=True) -> HandlerResponse:
             if_not_exists (bool): if True, do nothing if table exists
 
         Returns:
-            HandlerResponse
+            DataHandlerResponse
         """
         raise NotImplementedError()
 
-    def drop_table(self, table_name: str, if_exists=True) -> HandlerResponse:
+    def drop_table(self, table_name: str, if_exists=True) -> DataHandlerResponse:
         """Drop table
 
         Args:
@@ -518,11 +517,11 @@ def drop_table(self, table_name: str, if_exists=True) -> HandlerResponse:
             if_exists (bool): if True, do nothing if table does not exist
 
         Returns:
-            HandlerResponse
+            DataHandlerResponse
         """
         raise NotImplementedError()
 
-    def insert(self, table_name: str, data: pd.DataFrame) -> HandlerResponse:
+    def insert(self, table_name: str, data: pd.DataFrame) -> DataHandlerResponse:
         """Insert data into table
 
         Args:
@@ -531,7 +530,7 @@ def insert(self, table_name: str, data: pd.DataFrame) -> HandlerResponse:
             columns (List[str]): columns to insert
 
         Returns:
-            HandlerResponse
+            DataHandlerResponse
         """
         raise NotImplementedError()
 
@@ -544,11 +543,11 @@ def update(self, table_name: str, data: pd.DataFrame, key_columns: List[str] = N
             key_columns (List[str]): key to  to update
 
         Returns:
-            HandlerResponse
+            DataHandlerResponse
         """
         raise NotImplementedError()
 
-    def delete(self, table_name: str, conditions: List[FilterCondition] = None) -> HandlerResponse:
+    def delete(self, table_name: str, conditions: List[FilterCondition] = None) -> DataHandlerResponse:
         """Delete data from table
 
         Args:
@@ -556,7 +555,7 @@ def delete(self, table_name: str, conditions: List[FilterCondition] = None) -> H
             conditions (List[FilterCondition]): conditions to delete
 
         Returns:
-            HandlerResponse
+            DataHandlerResponse
         """
         raise NotImplementedError()
 
@@ -567,7 +566,7 @@ def select(
         conditions: List[FilterCondition] = None,
         offset: int = None,
         limit: int = None,
-    ) -> pd.DataFrame:
+    ) -> DataHandlerResponse:
         """Select data from table
 
         Args:
@@ -576,18 +575,15 @@ def select(
             conditions (List[FilterCondition]): conditions to select
 
         Returns:
-            HandlerResponse
+            DataHandlerResponse
         """
         raise NotImplementedError()
 
-    def get_columns(self, table_name: str) -> HandlerResponse:
+    def get_columns(self, table_name: str) -> TableResponse:
         # return a fixed set of columns
         data = pd.DataFrame(self.SCHEMA)
         data.columns = ["COLUMN_NAME", "DATA_TYPE"]
-        return HandlerResponse(
-            resp_type=RESPONSE_TYPE.DATA,
-            data_frame=data,
-        )
+        return TableResponse(data=data)
 
     def hybrid_search(
         self,
diff --git a/mindsdb/integrations/utilities/community_handler_fetcher.py b/mindsdb/integrations/utilities/community_handler_fetcher.py
new file mode 100644
index 00000000000..2f66640a035
--- /dev/null
+++ b/mindsdb/integrations/utilities/community_handler_fetcher.py
@@ -0,0 +1,265 @@
+import base64
+import json
+import os
+import shutil
+import threading
+from pathlib import Path
+from typing import Optional
+
+import requests
+
+from mindsdb.utilities import log
+
+logger = log.getLogger(__name__)
+
+# GitHub API configuration
+# It can be replaced later with making the repo public.
+GITHUB_API_BASE = "https://api.github.com"
+DEFAULT_REPO = "mindsdb/mindsdb-community-handlers"
+DEFAULT_BRANCH = "main"
+DEFAULT_PATH_PREFIX = "community_handlers"
+_fetch_locks: dict = {}
+_fetch_locks_lock = threading.Lock()
+
+
+def _get_fetch_lock(handler_dir_name: str) -> threading.Lock:
+    """
+    Get and create if needed a threading.
+    Lock for the given handler directory.
+    This ensures that concurrent fetches for the same handler_dir_name are
+    serializedlly, preventing race conditions on disk.
+    """
+    with _fetch_locks_lock:
+        if handler_dir_name not in _fetch_locks:
+            _fetch_locks[handler_dir_name] = threading.Lock()
+        return _fetch_locks[handler_dir_name]
+
+
+def _github_headers() -> dict:
+    """
+    Return headers for GitHub API requests, including optional auth if GITHUB_TOKEN is set in the environment.
+    TODO: Remove this after repository is set to public.
+    """
+    headers = {"Accept": "application/vnd.github.v3+json"}
+    token = os.environ.get("GITHUB_TOKEN")
+    if token:
+        headers["Authorization"] = f"token {token}"
+    return headers
+
+
+# It can be removed later with making the repo public. TBD
+def _get_repo_config() -> tuple:
+    """Returns (repo, branch, path_prefix)."""
+    repo = os.environ.get("COMMUNITY_HANDLERS_REPO", DEFAULT_REPO)
+    branch = os.environ.get("COMMUNITY_HANDLERS_BRANCH", DEFAULT_BRANCH)
+    path_prefix = os.environ.get("COMMUNITY_HANDLERS_PATH", DEFAULT_PATH_PREFIX)
+    return repo, branch, path_prefix
+
+
+def _resolve_tree_sha(repo: str, branch: str, dir_path: str, headers: dict) -> Optional[str]:
+    """Return the Git tree SHA for dir_path by inspecting its parent directory listing.
+
+    Calls the Contents API on the parent of dir_path, then finds the matching
+    directory entry and returns its SHA.  Returns None if the path does not exist
+    (404) or if the directory name is not found in the parent listing.
+
+    Raises:
+        RuntimeError: On network errors or unexpected GitHub API responses.
+    """
+    parent_path, _, dir_name = dir_path.rstrip("/").rpartition("/")
+    api_url = f"{GITHUB_API_BASE}/repos/{repo}/contents/{parent_path}"
+    params = {"ref": branch}
+    try:
+        resp = requests.get(api_url, params=params, headers=headers, timeout=30)
+    except requests.RequestException as e:
+        raise RuntimeError(f"Network error resolving tree SHA for '{dir_path}': {e}") from e
+    if resp.status_code == 404:
+        return None
+    if resp.status_code != 200:
+        raise RuntimeError(
+            f"GitHub API error resolving tree SHA for '{dir_path}': HTTP {resp.status_code} β€” {resp.text[:300]}"
+        )
+    try:
+        entries = resp.json()
+    except json.JSONDecodeError as e:
+        raise RuntimeError(f"Invalid JSON resolving tree SHA for '{dir_path}': {e}") from e
+    for entry in entries:
+        if entry.get("name") == dir_name and entry.get("type") == "dir":
+            return entry.get("sha")
+    return None
+
+
+def _fetch_tree_recursive(
+    repo: str,
+    branch: str,
+    tree_sha: str,
+    remote_prefix: str,
+    dest_dir: Path,
+    headers: dict,
+    max_depth: int = 4,
+) -> int:
+    """Fetch all files in a Git tree recursively, preserving directory structure.
+
+    Uses the Git Trees API with ?recursive=1 to obtain the full file listing in
+    a single API call, then downloads each blob from raw.githubusercontent.com.
+
+    Args:
+        repo: GitHub repository in "owner/repo" format.
+        branch: Branch or ref name used to build raw download URLs.
+        tree_sha: SHA of the Git tree to fetch.
+        remote_prefix: Path within the repo to the handler directory
+        Used to construct raw download URLs.
+        dest_dir: Local directory where files will be written.
+        headers: HTTP headers (auth, Accept) for GitHub API requests.
+        max_depth: Maximum allowed directory nesting depth.  Entries whose
+            relative path contains >= max_depth slashes are skipped.
+
+    Returns:
+        Number of files downloaded.
+
+    Raises:
+        RuntimeError: On network errors or unexpected API responses.
+    """
+    api_url = f"{GITHUB_API_BASE}/repos/{repo}/git/trees/{tree_sha}"
+    params = {"recursive": "1"}
+    try:
+        resp = requests.get(api_url, params=params, headers=headers, timeout=30)
+        resp.raise_for_status()
+    except requests.RequestException as e:
+        raise RuntimeError(f"Network error fetching tree '{tree_sha}': {e}") from e
+    try:
+        tree_data = resp.json()
+    except json.JSONDecodeError as e:
+        raise RuntimeError(f"Invalid JSON from Git Trees API for tree '{tree_sha}': {e}") from e
+
+    if tree_data.get("truncated"):
+        logger.warning("Tree for handler '%s' was truncated; some files may be missing", remote_prefix)
+
+    file_count = 0
+    for entry in tree_data.get("tree", []):
+        if entry.get("type") != "blob":
+            continue
+        path = entry["path"]
+        if path.count("/") >= max_depth:
+            logger.debug("Skipping deeply nested path '%s' (max_depth=%d)", path, max_depth)
+            continue
+        local_path = dest_dir / path
+        local_path.parent.mkdir(parents=True, exist_ok=True)
+        raw_url = f"https://raw.githubusercontent.com/{repo}/{branch}/{remote_prefix}/{path}"
+        try:
+            file_resp = requests.get(raw_url, headers=headers, timeout=30)
+            file_resp.raise_for_status()
+        except requests.RequestException as e:
+            raise RuntimeError(f"Failed to download '{path}' for handler '{remote_prefix}': {e}") from e
+        local_path.write_bytes(file_resp.content)
+        logger.debug("Downloaded %s (%d bytes)", path, entry.get("size", 0))
+        file_count += 1
+    return file_count
+
+
+def fetch_handler(handler_dir_name: str, storage_dir: Path) -> Optional[Path]:
+    """
+    Fetch a single community handler directory from GitHub into storage_dir.
+
+    Downloads the full directory tree for the requested handler using the
+    GitHub Git Trees API, preserving subdirectory structure.
+
+    Args:
+        handler_dir_name: The directory name of the handler (e.g. "github_handler")
+        storage_dir: Root directory where community handlers are stored
+
+    Returns:
+        Path to the fetched handler directory, or None if the handler does not
+        exist in the remote repository.
+
+    Raises:
+        RuntimeError: On network errors or unexpected GitHub API responses.
+    """
+    lock = _get_fetch_lock(handler_dir_name)
+    with lock:
+        dest_dir = storage_dir / handler_dir_name
+
+        if dest_dir.is_dir() and (dest_dir / "__init__.py").exists():
+            logger.debug("Community handler '%s' already on disk at %s", handler_dir_name, dest_dir)
+            return dest_dir
+
+        repo, branch, path_prefix = _get_repo_config()
+        headers = _github_headers()
+        remote_prefix = f"{path_prefix}/{handler_dir_name}"
+
+        logger.debug("Fetching community handler '%s' from %s@%s", handler_dir_name, repo, branch)
+
+        tree_sha = _resolve_tree_sha(repo, branch, remote_prefix, headers)
+        if tree_sha is None:
+            logger.error("Community handler '%s' not found in repo '%s'", handler_dir_name, repo)
+            return None
+
+        # Use a temporary directory for downloading files before moving to the final location.
+        # This prevents leaving a partially downloaded handler on disk if something goes wrong.
+        # As a fail-safe measure, we remove any existing temp directory before starting, and ensure cleanup on exceptions.
+        tmp_dir = storage_dir / f".tmp_{handler_dir_name}"
+        if tmp_dir.exists():
+            shutil.rmtree(tmp_dir)
+        tmp_dir.mkdir(parents=True, exist_ok=True)
+
+        try:
+            file_count = _fetch_tree_recursive(repo, branch, tree_sha, remote_prefix, tmp_dir, headers)
+            logger.debug("Fetched %d files for handler '%s'", file_count, handler_dir_name)
+
+            # Atomic rename.
+            # If dest_dir already exists, remove it first.
+            # This ensures that we don't end up with a mix of old and new files if the handler is updated.
+            if dest_dir.exists():
+                shutil.rmtree(dest_dir)
+            tmp_dir.rename(dest_dir)
+
+        except Exception:
+            if tmp_dir.exists():
+                shutil.rmtree(tmp_dir)
+            raise
+
+        logger.debug("Community handler '%s' fetched successfully to %s", handler_dir_name, dest_dir)
+        return dest_dir
+
+
+def community_handlers_enabled() -> bool:
+    """Returns True if community handlers are enabled via env var.
+
+    Set MINDSDB_COMMUNITY_HANDLERS=true to opt in.
+    Community handlers are disabled by default.
+    """
+    val = os.environ.get("MINDSDB_COMMUNITY_HANDLERS", "false").lower()
+    return val in ("1", "true", "yes", "enabled")
+
+
+def get_community_handlers_storage_dir(storage_root: Path) -> Path:
+    """Returns (and creates if needed) the community handlers storage directory."""
+    community_dir = storage_root / "community_handlers"
+    # Creating the directory, maybe can be done on init?
+    community_dir.mkdir(parents=True, exist_ok=True)
+    return community_dir
+
+
+def list_available_handlers() -> list:
+    """
+    Return handler metadata from the community index.json.
+
+    Each dict has keys: name, title, folder, type, support_level,
+    icon_path, description.
+    """
+    repo, branch, _ = _get_repo_config()
+    api_url = f"{GITHUB_API_BASE}/repos/{repo}/contents/index.json"
+    params = {"ref": branch}
+
+    try:
+        logger.debug("Fetching community handlers index from GitHub: %s", api_url)
+        resp = requests.get(api_url, params=params, headers=_github_headers(), timeout=30)
+        if resp.status_code == 200:
+            entry = resp.json()
+            raw = base64.b64decode(entry["content"]).decode("utf-8")
+            data = json.loads(raw)
+            return data.get("handlers", [])
+        logger.warning("Could not fetch community index: HTTP %s", resp.status_code)
+    except Exception as e:
+        logger.warning("Could not fetch community handlers index: %s", e)
+    return []
diff --git a/mindsdb/integrations/utilities/files/file_reader.py b/mindsdb/integrations/utilities/files/file_reader.py
index 4397569482c..7bebb3cbd8c 100644
--- a/mindsdb/integrations/utilities/files/file_reader.py
+++ b/mindsdb/integrations/utilities/files/file_reader.py
@@ -42,6 +42,7 @@ class _SINGLE_PAGE_FORMAT:
 @dataclass(frozen=True, slots=True)
 class _MULTI_PAGE_FORMAT:
     XLSX: str = "xlsx"
+    XLS: str = "xls"
 
 
 MULTI_PAGE_FORMAT = _MULTI_PAGE_FORMAT()
@@ -125,6 +126,10 @@ def __init__(
 
         self.parameters = {}
 
+    def close(self):
+        if self.file_obj is not None:
+            self.file_obj.close()
+
     def get_format(self) -> str:
         if self.format is not None:
             return self.format
@@ -162,9 +167,10 @@ def get_format_by_content(self):
         if file_type is not None:
             if file_type.mime in {
                 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-                "application/vnd.ms-excel",
             }:
                 return MULTI_PAGE_FORMAT.XLSX
+            if file_type.mime == "application/vnd.ms-excel":
+                return MULTI_PAGE_FORMAT.XLS
 
             if file_type.mime == "application/pdf":
                 return SINGLE_PAGE_FORMAT.PDF
@@ -624,3 +630,12 @@ def read_xlsx(
                 else:
                     df = pd.read_excel(xls, sheet_name=page_name)
                 yield page_name, df
+
+    @staticmethod
+    def read_xls(
+        file_obj: BytesIO,
+        page_name: str | None = None,
+        only_names: bool = False,
+        **kwargs,
+    ):
+        return FileReader.read_xlsx(file_obj, page_name=page_name, only_names=only_names, **kwargs)
diff --git a/mindsdb/integrations/utilities/install.py b/mindsdb/integrations/utilities/install.py
index 388edc9703d..9a56b2e4ae4 100644
--- a/mindsdb/integrations/utilities/install.py
+++ b/mindsdb/integrations/utilities/install.py
@@ -1,66 +1,77 @@
 import os
 import sys
 import subprocess
+from enum import Enum
 from typing import Text, List
 
 
-def install_dependencies(dependencies: List[Text]) -> dict:
+class InstallTool(Enum):
+    pip = (sys.executable, "-m", "pip")
+    uv = ("uv", "pip")
+
+
+def install_dependencies(dependencies: List[Text], tool: InstallTool = InstallTool.pip) -> dict:
     """
     Installs the dependencies for a handler by calling the `pip install` command via subprocess.
 
     Args:
         dependencies (List[Text]): List of dependencies for the handler.
+        tool (InstallTool): tool the tool that will be used to install dependencies
 
     Returns:
         dict: A dictionary containing the success status and an error message if an error occurs.
     """
-    outs = b''
-    errs = b''
-    result = {
-        'success': False,
-        'error_message': None
-    }
+    outs = b""
+    errs = b""
+    result = {"success": False, "error_message": None}
     code = None
 
     try:
         # Split the dependencies by parsing the contents of the requirements.txt file.
         split_dependencies = parse_dependencies(dependencies)
     except FileNotFoundError as file_not_found_error:
-        result['error_message'] = f"Error parsing dependencies, file not found: {str(file_not_found_error)}"
+        result["error_message"] = f"Error parsing dependencies, file not found: {str(file_not_found_error)}"
         return result
     except Exception as unknown_error:
-        result['error_message'] = f"Unknown error parsing dependencies: {str(unknown_error)}"
+        result["error_message"] = f"Unknown error parsing dependencies: {str(unknown_error)}"
         return result
 
     try:
-        # Install the dependencies using the `pip install` command.
+        # Install the dependencies using the selected tool.
         sp = subprocess.Popen(
-            [sys.executable, '-m', 'pip', 'install', *split_dependencies],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE
+            [*tool.value, "install", *split_dependencies], stdout=subprocess.PIPE, stderr=subprocess.PIPE
         )
         code = sp.wait()
         outs, errs = sp.communicate(timeout=1)
     except subprocess.TimeoutExpired as timeout_error:
         sp.kill()
-        result['error_message'] = f"Timeout error while installing dependencies: {str(timeout_error)}"
+        result["error_message"] = f"Timeout error while installing dependencies: {str(timeout_error)}"
+        return result
+    except FileNotFoundError as e:
+        if e.filename == "uv":
+            result["error_message"] = "The 'pip' and 'uv' tools are not found. Please install them."
+        else:
+            result["error_message"] = f"FileNotFoundError error while installing dependencies: {str(e)}"
         return result
     except Exception as unknown_error:
-        result['error_message'] = f"Unknown error while installing dependencies: {str(unknown_error)}"
+        result["error_message"] = f"Unknown error while installing dependencies: {str(unknown_error)}"
         return result
 
     # Return the result of the installation if successful, otherwise return an error message.
     if code != 0:
-        output = ''
+        output = ""
         if isinstance(outs, bytes) and len(outs) > 0:
-            output = output + 'Output: ' + outs.decode()
+            output = output + "Output: " + outs.decode()
         if isinstance(errs, bytes) and len(errs) > 0:
             if len(output) > 0:
-                output = output + '\n'
-            output = output + 'Errors: ' + errs.decode()
-        result['error_message'] = output
+                output = output + "\n"
+            output = output + "Errors: " + errs.decode()
+        if "no module named pip" in output.lower() and tool is InstallTool.pip:
+            # try with uv
+            return install_dependencies(dependencies, InstallTool.uv)
+        result["error_message"] = output
     else:
-        result['success'] = True
+        result["success"] = True
 
     return result
 
@@ -85,19 +96,19 @@ def parse_dependencies(dependencies: List[Text]) -> List[Text]:
     split_dependencies = []
     for dependency in dependencies:
         # ignore standalone comments
-        if dependency.startswith('#'):
+        if dependency.startswith("#"):
             continue
 
         # remove inline comments
-        if '#' in dependency:
-            dependency = dependency.split('#')[0].strip()
+        if "#" in dependency:
+            dependency = dependency.split("#")[0].strip()
 
         # check if the dependency is a path to a requirements file
-        if dependency.startswith('-r'):
+        if dependency.startswith("-r"):
             # get the path to the requirements file
-            req_path = dependency.split(' ')[1]
+            req_path = dependency.split(" ")[1]
             # create the absolute path to the requirements file
-            abs_req_path = os.path.abspath(os.path.join(script_path, req_path.replace('mindsdb/integrations', '..')))
+            abs_req_path = os.path.abspath(os.path.join(script_path, req_path.replace("mindsdb/integrations", "..")))
             # check if the file exists
             if os.path.exists(abs_req_path):
                 inner_dependencies, inner_split_dependencies = [], []
@@ -128,7 +139,7 @@ def read_dependencies(path: Text) -> List[Text]:
     """
     dependencies = []
     # read the dependencies from the file
-    with open(str(path), 'rt') as f:
-        dependencies = [x.strip(' \t\n') for x in f.readlines()]
+    with open(str(path), "rt") as f:
+        dependencies = [x.strip(" \t\n") for x in f.readlines()]
         dependencies = [x for x in dependencies if len(x) > 0]
     return dependencies
diff --git a/mindsdb/integrations/utilities/rag/config_loader.py b/mindsdb/integrations/utilities/rag/config_loader.py
deleted file mode 100644
index 51732358ca6..00000000000
--- a/mindsdb/integrations/utilities/rag/config_loader.py
+++ /dev/null
@@ -1,84 +0,0 @@
-"""Utility functions for RAG pipeline configuration"""
-
-from typing import Dict, Any, Optional
-
-from mindsdb.utilities.log import getLogger
-from mindsdb.integrations.utilities.rag.settings import (
-    RetrieverType,
-    MultiVectorRetrieverMode,
-    SearchType,
-    SearchKwargs,
-    VectorStoreConfig,
-    RerankerConfig,
-    RAGPipelineModel,
-    DEFAULT_COLLECTION_NAME,
-)
-
-logger = getLogger(__name__)
-
-
-def load_rag_config(
-    base_config: Dict[str, Any], kb_params: Optional[Dict[str, Any]] = None, embedding_model: Any = None
-) -> RAGPipelineModel:
-    """
-    Load and validate RAG configuration parameters. This function handles the conversion of configuration
-    parameters into their appropriate types and ensures all required settings are properly configured.
-
-    Args:
-        base_config: Base configuration dictionary containing RAG pipeline settings
-        kb_params: Optional knowledge base parameters to merge with base config
-        embedding_model: Optional embedding model instance to use in the RAG pipeline
-
-    Returns:
-        RAGPipelineModel: Validated RAG configuration model ready for pipeline creation
-
-    Raises:
-        ValueError: If configuration validation fails or required parameters are missing
-    """
-    # Create a shallow copy of the base config to avoid modifying the original
-    # We avoid deepcopy because some objects (like embedding_model) may contain unpickleable objects
-    rag_params = base_config.copy()
-
-    # Merge with knowledge base params if provided
-    if kb_params:
-        rag_params.update(kb_params)
-
-    # Set embedding model if provided
-    if embedding_model is not None:
-        rag_params["embedding_model"] = embedding_model
-
-    # Handle enums and type conversions
-    if "retriever_type" in rag_params:
-        rag_params["retriever_type"] = RetrieverType(rag_params["retriever_type"])
-    if "multi_retriever_mode" in rag_params:
-        rag_params["multi_retriever_mode"] = MultiVectorRetrieverMode(rag_params["multi_retriever_mode"])
-    if "search_type" in rag_params:
-        rag_params["search_type"] = SearchType(rag_params["search_type"])
-
-    # Handle search kwargs if present
-    if "search_kwargs" in rag_params and isinstance(rag_params["search_kwargs"], dict):
-        rag_params["search_kwargs"] = SearchKwargs(**rag_params["search_kwargs"])
-
-    # Summarization config removed - no longer supported
-
-    # Handle vector store config
-    if "vector_store_config" in rag_params:
-        if isinstance(rag_params["vector_store_config"], dict):
-            rag_params["vector_store_config"] = VectorStoreConfig(**rag_params["vector_store_config"])
-    else:
-        rag_params["vector_store_config"] = {}
-        logger.warning(
-            f"No collection_name specified for the retrieval tool, "
-            f"using default collection_name: '{DEFAULT_COLLECTION_NAME}'"
-            f"\nWarning: If this collection does not exist, no data will be retrieved"
-        )
-
-    if "reranker_config" in rag_params:
-        rag_params["reranker_config"] = RerankerConfig(**rag_params["reranker_config"])
-
-    # Convert to RAGPipelineModel with validation
-    try:
-        return RAGPipelineModel(**rag_params)
-    except Exception as e:
-        logger.exception("Invalid RAG configuration:")
-        raise ValueError(f"Configuration validation failed: {str(e)}") from e
diff --git a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/MDBVectorStore.py b/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/MDBVectorStore.py
deleted file mode 100644
index 8e5575af0bd..00000000000
--- a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/MDBVectorStore.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from mindsdb_sql_parser.ast import Select, BinaryOperation, Identifier, Constant, Star
-from mindsdb.integrations.libs.vectordatabase_handler import TableField
-
-from typing import Any, List, Optional
-
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.base_vector_store import VectorStore
-from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument
-
-
-class MDBVectorStore(VectorStore):
-    def __init__(self, kb_table) -> None:
-        self.kb_table = kb_table
-
-    @property
-    def embeddings(self) -> Optional[Any]:
-        return None
-
-    def similarity_search(
-        self,
-        query: str,
-        k: int = 4,
-        **kwargs: Any,
-    ) -> List[SimpleDocument]:
-        query = Select(
-            targets=[Star()],
-            where=BinaryOperation(op="=", args=[Identifier(TableField.CONTENT.value), Constant(query)]),
-            limit=Constant(k),
-        )
-
-        df = self.kb_table.select_query(query)
-
-        docs = []
-        for _, row in df.iterrows():
-            metadata = row[TableField.METADATA.value]
-            if metadata is None:
-                metadata = {}
-            docs.append(SimpleDocument(page_content=row[TableField.CONTENT.value], metadata=metadata))
-
-        return docs
-
-    def add_texts(self, *args, **kwargs) -> List[str]:
-        raise NotImplementedError
-
-    @classmethod
-    def from_texts(self, *args, **kwargs):
-        raise NotImplementedError
diff --git a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/__init__.py b/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/base_vector_store.py b/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/base_vector_store.py
deleted file mode 100644
index f8e12a70e8a..00000000000
--- a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/base_vector_store.py
+++ /dev/null
@@ -1,111 +0,0 @@
-"""Base VectorStore interface to replace langchain VectorStore"""
-
-from typing import Any, List, Optional, Tuple
-from abc import ABC, abstractmethod
-
-
-class VectorStore(ABC):
-    """Base class for vector stores to replace langchain VectorStore"""
-
-    @property
-    @abstractmethod
-    def embeddings(self) -> Optional[Any]:
-        """Return embeddings model if available"""
-        pass
-
-    @abstractmethod
-    def similarity_search(
-        self,
-        query: str,
-        k: int = 4,
-        **kwargs: Any,
-    ) -> List[Any]:
-        """Return most similar documents to query"""
-        pass
-
-    def similarity_search_with_score(
-        self,
-        query: str,
-        k: int = 4,
-        **kwargs: Any,
-    ) -> List[Tuple[Any, float]]:
-        """Return most similar documents with scores"""
-        # Default implementation using similarity_search
-        docs = self.similarity_search(query, k=k, **kwargs)
-        # Return with dummy scores if not overridden
-        return [(doc, 0.0) for doc in docs]
-
-    def as_retriever(self, **kwargs: Any) -> Any:
-        """Return a retriever interface"""
-
-        # Create a simple retriever wrapper
-        class SimpleRetriever:
-            def __init__(self, vector_store):
-                self.vector_store = vector_store
-
-            def get_relevant_documents(self, query: str) -> List[Any]:
-                return self.vector_store.similarity_search(query, **kwargs)
-
-            def invoke(self, query: str) -> List[Any]:
-                return self.get_relevant_documents(query)
-
-        return SimpleRetriever(self)
-
-    def add_texts(self, *args: Any, **kwargs: Any) -> List[str]:
-        """Add texts to the vector store"""
-        raise NotImplementedError("add_texts not implemented")
-
-    def add_documents(self, documents: List[Any], **kwargs: Any) -> List[str]:
-        """
-        Add documents to the vector store.
-        Extracts page_content and metadata from documents and calls add_texts.
-
-        Args:
-            documents: List of document-like objects with page_content and metadata attributes
-            **kwargs: Additional arguments to pass to add_texts
-
-        Returns:
-            List of document IDs (if supported by implementation)
-        """
-        texts = []
-        metadatas = []
-        for doc in documents:
-            # Use duck typing to access page_content and metadata
-            page_content = getattr(doc, "page_content", str(doc))
-            metadata = getattr(doc, "metadata", {})
-            texts.append(page_content)
-            metadatas.append(metadata)
-
-        # Call add_texts with texts and metadatas
-        return self.add_texts(texts, metadatas=metadatas, **kwargs)
-
-    @classmethod
-    def from_texts(cls, *args: Any, **kwargs: Any):
-        """Create vector store from texts"""
-        raise NotImplementedError("from_texts not implemented")
-
-    @classmethod
-    def from_documents(cls, documents: List[Any], embedding: Any, **kwargs: Any):
-        """
-        Create vector store from documents.
-        Extracts texts and metadata from documents and calls from_texts.
-
-        Args:
-            documents: List of document-like objects with page_content and metadata attributes
-            embedding: Embedding model/function
-            **kwargs: Additional arguments to pass to from_texts
-
-        Returns:
-            VectorStore instance
-        """
-        texts = []
-        metadatas = []
-        for doc in documents:
-            # Use duck typing to access page_content and metadata
-            page_content = getattr(doc, "page_content", str(doc))
-            metadata = getattr(doc, "metadata", {})
-            texts.append(page_content)
-            metadatas.append(metadata)
-
-        # Call from_texts with texts, metadatas, and embedding
-        return cls.from_texts(texts, embedding=embedding, metadatas=metadatas, **kwargs)
diff --git a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py b/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py
deleted file mode 100644
index bba23a53c23..00000000000
--- a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py
+++ /dev/null
@@ -1,226 +0,0 @@
-from typing import Any, List, Union, Optional, Dict, Tuple
-
-from pgvector.sqlalchemy import SPARSEVEC, Vector
-import sqlalchemy as sa
-from sqlalchemy.dialects.postgresql import JSON
-from sqlalchemy.orm import Session
-from sqlalchemy import create_engine
-from sqlalchemy.ext.declarative import declarative_base
-
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.base_vector_store import VectorStore
-from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-# SQLAlchemy declarative base
-Base = declarative_base()
-
-_generated_sa_tables = {}
-
-
-class PGVectorMDB(VectorStore):
-    """
-    Custom PGVector implementation for mindsdb vector store table structure
-    Replaces langchain_community.vectorstores.PGVector
-    """
-
-    def __init__(
-        self,
-        connection_string: str,
-        collection_name: str,
-        embedding_function: Any = None,
-        is_sparse: bool = False,
-        vector_size: Optional[int] = None,
-        **kwargs,
-    ):
-        """
-        Initialize PGVectorMDB
-
-        Args:
-            connection_string: PostgreSQL connection string
-            collection_name: Name of the table/collection
-            embedding_function: Embedding function/model
-            is_sparse: Whether to use sparse vectors
-            vector_size: Size of sparse vectors (required if is_sparse=True)
-        """
-        self.is_sparse = is_sparse
-        if is_sparse and vector_size is None:
-            raise ValueError("vector_size is required when is_sparse=True")
-        self.vector_size = vector_size
-        self.collection_name = collection_name
-        self.embedding_function = embedding_function
-
-        # Create SQLAlchemy engine
-        self._bind = create_engine(connection_string, pool_pre_ping=True)
-
-        # Initialize table structure
-        self.__post_init__()
-
-    def __post_init__(
-        self,
-    ) -> None:
-        """Initialize SQLAlchemy table structure"""
-        collection_name = self.collection_name
-
-        if collection_name not in _generated_sa_tables:
-
-            class EmbeddingStore(Base):
-                """Embedding store."""
-
-                __tablename__ = collection_name
-
-                id = sa.Column(sa.Integer, primary_key=True)
-                embedding = sa.Column(
-                    "embeddings",
-                    SPARSEVEC()
-                    if self.is_sparse
-                    else Vector()
-                    if self.vector_size is None
-                    else SPARSEVEC(self.vector_size)
-                    if self.is_sparse
-                    else Vector(self.vector_size),
-                )
-                document = sa.Column("content", sa.String, nullable=True)
-                cmetadata = sa.Column("metadata", JSON, nullable=True)
-
-            _generated_sa_tables[collection_name] = EmbeddingStore
-
-        self.EmbeddingStore = _generated_sa_tables[collection_name]
-
-    @property
-    def embeddings(self) -> Optional[Any]:
-        """Return embedding function if available"""
-        return self.embedding_function
-
-    def similarity_search(
-        self,
-        query: str,
-        k: int = 4,
-        **kwargs: Any,
-    ) -> List[SimpleDocument]:
-        """Return most similar documents to query"""
-        # Get embedding for query
-        if self.embedding_function is None:
-            raise ValueError("embedding_function is required for similarity_search")
-
-        # Embed the query
-        query_embedding = self.embedding_function.embed_query(query)
-
-        # Query collection
-        results = self.__query_collection(query_embedding, k=k, filter=kwargs.get("filter"))
-
-        # Convert to SimpleDocument objects
-        docs = []
-        for result in results:
-            embedding_store = result.EmbeddingStore
-            page_content = embedding_store.document or ""
-            metadata = embedding_store.cmetadata or {}
-            docs.append(SimpleDocument(page_content=page_content, metadata=metadata))
-
-        return docs
-
-    def similarity_search_with_score(
-        self,
-        query: str,
-        k: int = 4,
-        **kwargs: Any,
-    ) -> List[Tuple[SimpleDocument, float]]:
-        """Return most similar documents with scores"""
-        # Get embedding for query
-        if self.embedding_function is None:
-            raise ValueError("embedding_function is required for similarity_search_with_score")
-
-        # Embed the query
-        query_embedding = self.embedding_function.embed_query(query)
-
-        # Query collection
-        results = self.__query_collection(query_embedding, k=k, filter=kwargs.get("filter"))
-
-        # Convert to SimpleDocument objects with scores
-        docs_with_scores = []
-        for result in results:
-            embedding_store = result.EmbeddingStore
-            page_content = embedding_store.document or ""
-            metadata = embedding_store.cmetadata or {}
-            doc = SimpleDocument(page_content=page_content, metadata=metadata)
-            # Distance is already calculated in __query_collection
-            score = float(result.distance) if hasattr(result, "distance") else 0.0
-            docs_with_scores.append((doc, score))
-
-        return docs_with_scores
-
-    def __query_collection(
-        self,
-        embedding: Union[List[float], Dict[int, float], str],
-        k: int = 4,
-        filter: Optional[Dict[str, str]] = None,
-    ) -> List[Any]:
-        """Query the collection."""
-        with Session(self._bind) as session:
-            if self.is_sparse:
-                # Sparse vectors: expect string in format "{key:value,...}/size" or dictionary
-                if isinstance(embedding, dict):
-                    from pgvector.utils import SparseVector
-
-                    embedding = SparseVector(embedding, self.vector_size)
-                    embedding_str = embedding.to_text()
-                elif isinstance(embedding, str):
-                    # Use string as is - it should already be in the correct format
-                    embedding_str = embedding
-                # Use inner product for sparse vectors
-                distance_op = "<#>"
-                # For inner product, larger values are better matches
-                order_direction = "ASC"
-            else:
-                # Dense vectors: expect string in JSON array format or list of floats
-                if isinstance(embedding, list):
-                    embedding_str = f"[{','.join(str(x) for x in embedding)}]"
-                elif isinstance(embedding, str):
-                    embedding_str = embedding
-                # Use cosine similarity for dense vectors
-                distance_op = "<=>"
-                # For cosine similarity, smaller values are better matches
-                order_direction = "ASC"
-
-            # Use SQL directly for vector comparison
-            query = sa.text(
-                f"""
-            SELECT t.*, t.embeddings {distance_op} '{embedding_str}' as distance
-            FROM {self.collection_name} t
-            ORDER BY distance {order_direction}
-            LIMIT {k}
-            """
-            )
-            results = session.execute(query).all()
-
-            # Convert results to the expected format
-            formatted_results = []
-            for rec in results:
-                metadata = rec.metadata if bool(rec.metadata) else {0: 0}
-                embedding_store = self.EmbeddingStore()
-                embedding_store.document = rec.content
-                embedding_store.cmetadata = metadata
-                result = type("Result", (), {"EmbeddingStore": embedding_store, "distance": rec.distance})
-                formatted_results.append(result)
-
-            return formatted_results
-
-    # Aliases for compatibility
-    def _PGVector__query_collection(self, *args, **kwargs):
-        return self.__query_collection(*args, **kwargs)
-
-    def _query_collection(self, *args, **kwargs):
-        return self.__query_collection(*args, **kwargs)
-
-    def create_collection(self):
-        raise RuntimeError("Forbidden")
-
-    def delete_collection(self):
-        raise RuntimeError("Forbidden")
-
-    def delete(self, *args, **kwargs):
-        raise RuntimeError("Forbidden")
-
-    def add_embeddings(self, *args, **kwargs):
-        raise RuntimeError("Forbidden")
diff --git a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py b/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py
deleted file mode 100644
index a094f5e830e..00000000000
--- a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py
+++ /dev/null
@@ -1,82 +0,0 @@
-from typing import Any
-
-from pydantic import BaseModel
-
-from mindsdb.integrations.utilities.rag.settings import VectorStoreType, VectorStoreConfig
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.base_vector_store import VectorStore
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.MDBVectorStore import MDBVectorStore
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.pgvector import PGVectorMDB
-from mindsdb.utilities import log
-
-
-logger = log.getLogger(__name__)
-
-
-class VectorStoreLoader(BaseModel):
-    embedding_model: Any  # Embedding model interface
-    vector_store: VectorStore = None
-    config: VectorStoreConfig = None
-
-    class Config:
-        arbitrary_types_allowed = True
-        extra = "forbid"
-        validate_assignment = True
-
-    def load(self) -> VectorStore:
-        """
-        Loads the vector store based on the provided config and embeddings model
-        :return:
-        """
-        if (
-            self.config.is_sparse is not None
-            and self.config.vector_size is not None
-            and self.config.kb_table is not None
-        ):
-            # Only use PGVector store for sparse vectors.
-            db_handler = self.config.kb_table.get_vector_db()
-            db_args = db_handler.connection_args
-            # Assume we are always using PGVector & psycopg2.
-            connection_str = f"postgresql+psycopg2://{db_args.get('user')}:{db_args.get('password')}@{db_args.get('host')}:{db_args.get('port')}/{db_args.get('dbname', db_args.get('database'))}"
-
-            return PGVectorMDB(
-                connection_string=connection_str,
-                collection_name=self.config.kb_table._kb.vector_database_table,
-                embedding_function=self.embedding_model,
-                is_sparse=self.config.is_sparse,
-                vector_size=self.config.vector_size,
-            )
-        return MDBVectorStore(kb_table=self.config.kb_table)
-
-
-class VectorStoreFactory:
-    @staticmethod
-    def create(embedding_model: Any, config: VectorStoreConfig) -> VectorStore:
-        if config.vector_store_type == VectorStoreType.CHROMA:
-            return VectorStoreFactory._load_chromadb_store(embedding_model, config)
-        elif config.vector_store_type == VectorStoreType.PGVECTOR:
-            return VectorStoreFactory._load_pgvector_store(embedding_model, config)
-        else:
-            raise ValueError(f"Invalid vector store type, must be one either {VectorStoreType.__members__.keys()}")
-
-    @staticmethod
-    def _load_chromadb_store(embedding_model: Any, settings) -> VectorStore:
-        # Chroma still uses langchain, import only when needed
-        from langchain_community.vectorstores import Chroma
-
-        return Chroma(
-            persist_directory=settings.persist_directory,
-            collection_name=settings.collection_name,
-            embedding_function=embedding_model,
-        )
-
-    @staticmethod
-    def _load_pgvector_store(embedding_model: Any, settings) -> VectorStore:
-        from .pgvector import PGVectorMDB
-
-        return PGVectorMDB(
-            connection_string=settings.connection_string,
-            collection_name=settings.collection_name,
-            embedding_function=embedding_model,
-            is_sparse=settings.is_sparse,
-            vector_size=settings.vector_size,
-        )
diff --git a/mindsdb/integrations/utilities/rag/pipelines/__init__.py b/mindsdb/integrations/utilities/rag/pipelines/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindsdb/integrations/utilities/rag/pipelines/rag.py b/mindsdb/integrations/utilities/rag/pipelines/rag.py
deleted file mode 100644
index baf8ef8e117..00000000000
--- a/mindsdb/integrations/utilities/rag/pipelines/rag.py
+++ /dev/null
@@ -1,404 +0,0 @@
-from typing import Optional, Any, List, Union
-import asyncio
-
-from mindsdb.interfaces.knowledge_base.embedding_model_utils import construct_embedding_model_from_args
-from mindsdb.integrations.libs.vectordatabase_handler import DistanceFunction
-from mindsdb.integrations.utilities.rag.retrievers.auto_retriever import AutoRetriever
-from mindsdb.integrations.utilities.rag.retrievers.multi_vector_retriever import MultiVectorRetriever
-from mindsdb.integrations.utilities.rag.retrievers.sql_retriever import SQLRetriever
-from mindsdb.integrations.utilities.rag.rerankers.reranker_compressor import LLMReranker
-from mindsdb.integrations.utilities.rag.settings import (
-    RAGPipelineModel,
-    DEFAULT_AUTO_META_PROMPT_TEMPLATE,
-    SearchKwargs,
-    SearchType,
-    RerankerConfig,
-    VectorStoreConfig,
-)
-from mindsdb.integrations.utilities.rag.settings import DEFAULT_RERANKER_FLAG
-
-from mindsdb.integrations.utilities.rag.vector_store import VectorStoreOperator
-from mindsdb.interfaces.knowledge_base.llm_wrapper import create_chat_model
-from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class SimpleRAGPipeline:
-    """
-    Custom RAG pipeline implementation to replace LangChain LCEL components
-    """
-
-    def __init__(
-        self,
-        retriever_runnable: Any,
-        prompt_template: str,
-        llm: Any,
-        reranker: Optional[Any] = None,
-    ):
-        """
-        Initialize SimpleRAGPipeline
-
-        Args:
-            retriever_runnable: Retriever that can be invoked with question
-            prompt_template: Prompt template string with {question} and {context} placeholders
-            llm: Language model with invoke/ainvoke methods
-            reranker: Optional reranker for document reranking
-        """
-        self.retriever_runnable = retriever_runnable
-        self.prompt_template = prompt_template
-        self.llm = llm
-        self.reranker = reranker
-
-    def _format_docs(self, docs: Union[List[Any], str]) -> str:
-        """Format documents into context string"""
-        if isinstance(docs, str):
-            # Handle case where retriever returns a string (e.g., SQLRetriever)
-            return docs
-        if not docs:
-            return ""
-
-        # Sort by original document so we can group source summaries together
-        docs.sort(key=lambda d: d.metadata.get("original_row_id") if hasattr(d, "metadata") and d.metadata else 0)
-        original_document_id = None
-        summary_prepended_text = "Summary of the original document that the below context was taken from:\n"
-        document_content = ""
-
-        for d in docs:
-            metadata = d.metadata if hasattr(d, "metadata") else {}
-            if metadata.get("original_row_id") != original_document_id and metadata.get("summary"):
-                # We have a summary of a new document to prepend
-                original_document_id = metadata.get("original_row_id")
-                summary = f"{summary_prepended_text}{metadata.get('summary')}\n"
-                document_content += summary
-
-            page_content = d.page_content if hasattr(d, "page_content") else str(d)
-            document_content += f"{page_content}\n\n"
-
-        return document_content
-
-    def _format_prompt(self, question: str, context: str) -> str:
-        """Format prompt template with question and context"""
-        return self.prompt_template.format(question=question, context=context)
-
-    def _extract_llm_response(self, response: Any) -> str:
-        """Extract text content from LLM response"""
-        # Handle different response types
-        if isinstance(response, str):
-            return response
-        if hasattr(response, "content"):
-            return response.content
-        if hasattr(response, "text"):
-            return response.text
-        # Try to get from message if it's a message object
-        if hasattr(response, "message") and hasattr(response.message, "content"):
-            return response.message.content
-        # Fallback to string conversion
-        return str(response)
-
-    async def _retrieve_documents(self, question: str) -> List[Any]:
-        """Retrieve documents using retriever"""
-        # Try async first
-        if hasattr(self.retriever_runnable, "ainvoke"):
-            return await self.retriever_runnable.ainvoke(question)
-        elif hasattr(self.retriever_runnable, "invoke"):
-            return self.retriever_runnable.invoke(question)
-        elif hasattr(self.retriever_runnable, "get_relevant_documents"):
-            # Sync method, run in executor for async compatibility
-            loop = asyncio.get_event_loop()
-            return await loop.run_in_executor(None, self.retriever_runnable.get_relevant_documents, question)
-        else:
-            raise ValueError("Retriever must have ainvoke, invoke, or get_relevant_documents method")
-
-    async def ainvoke(self, question: Union[str, dict]) -> dict:
-        """Async invocation of the RAG pipeline"""
-        # Handle both string and dict input (for compatibility)
-        if isinstance(question, dict):
-            question = question.get("question", question.get("input", ""))
-
-        # 1. Retrieve documents
-        docs = await self._retrieve_documents(question)
-
-        # 2. Apply reranker if enabled
-        if self.reranker and docs:
-            try:
-                # Reranker should work with SimpleDocument via duck typing (page_content, metadata attributes)
-                docs = await self.reranker.acompress_documents(docs, question)
-                # Ensure all docs are SimpleDocument instances
-                simple_docs = []
-                for doc in docs:
-                    if isinstance(doc, SimpleDocument):
-                        simple_docs.append(doc)
-                    else:
-                        simple_docs.append(
-                            SimpleDocument(
-                                page_content=doc.page_content if hasattr(doc, "page_content") else str(doc),
-                                metadata=doc.metadata if hasattr(doc, "metadata") else {},
-                            )
-                        )
-                docs = simple_docs
-            except Exception as e:
-                logger.warning(f"Error during reranking, continuing without reranking: {e}")
-
-        # 3. Format documents into context
-        context = self._format_docs(docs)
-
-        # 4. Format prompt
-        formatted_prompt = self._format_prompt(question, context)
-
-        # 5. Generate answer using LLM
-        # Use dict format for messages instead of HumanMessage
-        messages = [{"role": "user", "content": formatted_prompt}]
-
-        # Try different LLM interfaces
-        if hasattr(self.llm, "abatch"):
-            # CustomLLMWrapper interface
-            responses = await self.llm.abatch([formatted_prompt])
-            llm_response = responses[0] if responses else None
-        elif hasattr(self.llm, "ainvoke"):
-            llm_response = await self.llm.ainvoke(messages)
-        elif hasattr(self.llm, "batch"):
-            # CustomLLMWrapper sync interface
-            responses = self.llm.batch([formatted_prompt])
-            llm_response = responses[0] if responses else None
-        elif hasattr(self.llm, "invoke"):
-            loop = asyncio.get_event_loop()
-            llm_response = await loop.run_in_executor(None, self.llm.invoke, messages)
-        else:
-            raise ValueError("LLM must have ainvoke, invoke, abatch, or batch method")
-
-        # 6. Extract text from LLM response
-        answer = self._extract_llm_response(llm_response)
-
-        # 7. Return dict with context, question, answer
-        return {"context": docs, "question": question, "answer": answer}
-
-    def invoke(self, question: Union[str, dict]) -> dict:
-        """Sync invocation of the RAG pipeline"""
-        return asyncio.run(self.ainvoke(question))
-
-
-class LangChainRAGPipeline:
-    """
-    Builds a RAG pipeline using langchain LCEL components
-
-    Args:
-        retriever_runnable: Base retriever component
-        prompt_template: Template for generating responses
-        llm: Language model for generating responses
-        reranker (bool): Whether to use reranking (default: False)
-        reranker_config (RerankerConfig): Configuration for the reranker, including:
-            - model: Model to use for reranking
-            - filtering_threshold: Minimum score to keep a document
-            - num_docs_to_keep: Maximum number of documents to keep
-            - max_concurrent_requests: Maximum concurrent API requests
-            - max_retries: Number of retry attempts for failed requests
-            - retry_delay: Delay between retries
-            - early_stop (bool): Whether to enable early stopping
-            - early_stop_threshold: Confidence threshold for early stopping
-        vector_store_config (VectorStoreConfig): Vector store configuration
-    """
-
-    def __init__(
-        self,
-        retriever_runnable,
-        prompt_template,
-        llm,
-        reranker: bool = DEFAULT_RERANKER_FLAG,
-        reranker_config: Optional[RerankerConfig] = None,
-        vector_store_config: Optional[VectorStoreConfig] = None,
-    ):
-        self.retriever_runnable = retriever_runnable
-        self.prompt_template = prompt_template
-        self.llm = llm
-        if reranker:
-            if reranker_config is None:
-                reranker_config = RerankerConfig()
-            # Convert config to dict and initialize reranker
-            reranker_kwargs = reranker_config.model_dump(exclude_none=True)
-            self.reranker = LLMReranker(**reranker_kwargs)
-        else:
-            self.reranker = None
-        self.vector_store_config = vector_store_config
-
-    def with_returned_sources(self) -> SimpleRAGPipeline:
-        """
-        Builds a RAG pipeline with returned sources
-        :return: SimpleRAGPipeline instance
-        """
-        # Ensure all the required components are not None
-        if self.prompt_template is None:
-            raise ValueError("One of the required components (prompt_template) is None")
-        if self.llm is None:
-            raise ValueError("One of the required components (llm) is None")
-
-        # Return SimpleRAGPipeline instance that handles all the pipeline logic
-        return SimpleRAGPipeline(
-            retriever_runnable=self.retriever_runnable,
-            prompt_template=self.prompt_template,
-            llm=self.llm,
-            reranker=self.reranker,
-        )
-
-    async def ainvoke(self, input_dict: dict) -> dict:
-        """Async invocation of the RAG pipeline."""
-        chain = self.with_returned_sources()
-        return await chain.ainvoke(input_dict)
-
-    def invoke(self, input_dict: dict) -> dict:
-        """Sync invocation of the RAG pipeline."""
-        import asyncio
-
-        return asyncio.run(self.ainvoke(input_dict))
-
-    @classmethod
-    def _apply_search_kwargs(
-        cls, retriever: Any, search_kwargs: Optional[SearchKwargs] = None, search_type: Optional[SearchType] = None
-    ) -> Any:
-        """Apply search kwargs and search type to the retriever if they exist"""
-        if hasattr(retriever, "search_kwargs") and search_kwargs:
-            # Convert search kwargs to dict, excluding None values
-            kwargs_dict = search_kwargs.model_dump(exclude_none=True)
-
-            # Only include relevant parameters based on search type
-            if search_type == SearchType.SIMILARITY:
-                # Remove MMR and similarity threshold specific params
-                kwargs_dict.pop("fetch_k", None)
-                kwargs_dict.pop("lambda_mult", None)
-                kwargs_dict.pop("score_threshold", None)
-            elif search_type == SearchType.MMR:
-                # Remove similarity threshold specific params
-                kwargs_dict.pop("score_threshold", None)
-            elif search_type == SearchType.SIMILARITY_SCORE_THRESHOLD:
-                # Remove MMR specific params
-                kwargs_dict.pop("fetch_k", None)
-                kwargs_dict.pop("lambda_mult", None)
-
-            retriever.search_kwargs.update(kwargs_dict)
-
-            # Set search type if supported by the retriever
-            if hasattr(retriever, "search_type") and search_type:
-                retriever.search_type = search_type.value
-
-        return retriever
-
-    @classmethod
-    def from_retriever(cls, config: RAGPipelineModel):
-        """
-        Builds a RAG pipeline with returned sources using a simple vector store retriever
-        :param config: RAGPipelineModel
-        :return:
-        """
-        vector_store_operator = VectorStoreOperator(
-            vector_store=config.vector_store,
-            documents=config.documents,
-            embedding_model=config.embedding_model,
-            vector_store_config=config.vector_store_config,
-        )
-        retriever = vector_store_operator.vector_store.as_retriever()
-        retriever = cls._apply_search_kwargs(retriever, config.search_kwargs, config.search_type)
-
-        return cls(
-            retriever,
-            config.rag_prompt_template,
-            config.llm,
-            vector_store_config=config.vector_store_config,
-            reranker=config.reranker,
-            reranker_config=config.reranker_config,
-        )
-
-    @classmethod
-    def from_auto_retriever(cls, config: RAGPipelineModel):
-        if not config.retriever_prompt_template:
-            config.retriever_prompt_template = DEFAULT_AUTO_META_PROMPT_TEMPLATE
-
-        retriever = AutoRetriever(config=config).as_runnable()
-        retriever = cls._apply_search_kwargs(retriever, config.search_kwargs, config.search_type)
-        return cls(
-            retriever,
-            config.rag_prompt_template,
-            config.llm,
-            reranker_config=config.reranker_config,
-            reranker=config.reranker,
-            vector_store_config=config.vector_store_config,
-            summarization_config=config.summarization_config,
-        )
-
-    @classmethod
-    def from_multi_vector_retriever(cls, config: RAGPipelineModel):
-        retriever = MultiVectorRetriever(config=config).as_runnable()
-        retriever = cls._apply_search_kwargs(retriever, config.search_kwargs, config.search_type)
-        return cls(
-            retriever,
-            config.rag_prompt_template,
-            config.llm,
-            reranker_config=config.reranker_config,
-            reranker=config.reranker,
-            vector_store_config=config.vector_store_config,
-            summarization_config=config.summarization_config,
-        )
-
-    @classmethod
-    def from_sql_retriever(cls, config: RAGPipelineModel):
-        retriever_config = config.sql_retriever_config
-        if retriever_config is None:
-            raise ValueError('Must provide "sql_retriever_config" for RAG pipeline config')
-        vector_store_config = config.vector_store_config
-        knowledge_base_table = vector_store_config.kb_table if vector_store_config is not None else None
-        if knowledge_base_table is None:
-            raise ValueError('Must provide valid "vector_store_config" for RAG pipeline config')
-        embedding_args = knowledge_base_table._kb.embedding_model.learn_args.get("using", {})
-        embeddings = construct_embedding_model_from_args(embedding_args)
-        sql_llm = create_chat_model(
-            {
-                "model_name": retriever_config.llm_config.model_name,
-                "provider": retriever_config.llm_config.provider,
-                **retriever_config.llm_config.params,
-            }
-        )
-        vector_store_operator = VectorStoreOperator(
-            vector_store=config.vector_store,
-            documents=config.documents,
-            embedding_model=config.embedding_model,
-            vector_store_config=config.vector_store_config,
-        )
-        vector_store_retriever = vector_store_operator.vector_store.as_retriever()
-        vector_store_retriever = cls._apply_search_kwargs(
-            vector_store_retriever, config.search_kwargs, config.search_type
-        )
-        distance_function = DistanceFunction.SQUARED_EUCLIDEAN_DISTANCE
-        if config.vector_store_config.is_sparse and config.vector_store_config.vector_size is not None:
-            # Use negative dot product for sparse retrieval.
-            distance_function = DistanceFunction.NEGATIVE_DOT_PRODUCT
-        retriever = SQLRetriever(
-            fallback_retriever=vector_store_retriever,
-            vector_store_handler=knowledge_base_table.get_vector_db(),
-            min_k=retriever_config.min_k,
-            max_filters=retriever_config.max_filters,
-            filter_threshold=retriever_config.filter_threshold,
-            database_schema=retriever_config.database_schema,
-            embeddings_model=embeddings,
-            search_kwargs=config.search_kwargs,
-            rewrite_prompt_template=retriever_config.rewrite_prompt_template,
-            table_prompt_template=retriever_config.table_prompt_template,
-            column_prompt_template=retriever_config.column_prompt_template,
-            value_prompt_template=retriever_config.value_prompt_template,
-            boolean_system_prompt=retriever_config.boolean_system_prompt,
-            generative_system_prompt=retriever_config.generative_system_prompt,
-            num_retries=retriever_config.num_retries,
-            embeddings_table=knowledge_base_table._kb.vector_database_table,
-            source_table=retriever_config.source_table,
-            source_id_column=retriever_config.source_id_column,
-            distance_function=distance_function,
-            llm=sql_llm,
-        )
-        return cls(
-            retriever,
-            config.rag_prompt_template,
-            config.llm,
-            reranker_config=config.reranker_config,
-            reranker=config.reranker,
-            vector_store_config=config.vector_store_config,
-            summarization_config=config.summarization_config,
-        )
diff --git a/mindsdb/integrations/utilities/rag/rag_pipeline_builder.py b/mindsdb/integrations/utilities/rag/rag_pipeline_builder.py
deleted file mode 100644
index f9709989069..00000000000
--- a/mindsdb/integrations/utilities/rag/rag_pipeline_builder.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import pandas as pd
-from typing import Any
-from mindsdb.integrations.utilities.rag.storage.in_memory_byte_store import InMemoryByteStore
-from mindsdb.integrations.utilities.rag.pipelines.rag import LangChainRAGPipeline
-from mindsdb.integrations.utilities.rag.settings import RetrieverType, RAGPipelineModel
-from mindsdb.integrations.utilities.rag.utils import documents_to_df
-from mindsdb.integrations.utilities.rag.retrievers.multi_hop_retriever import MultiHopRetriever
-from mindsdb.integrations.utilities.rag.splitters.custom_splitters import RecursiveCharacterTextSplitter
-from mindsdb.utilities.log import getLogger
-
-logger = getLogger(__name__)
-
-_retriever_strategies = {
-    RetrieverType.VECTOR_STORE: lambda config: _create_pipeline_from_vector_store(config),
-    RetrieverType.AUTO: lambda config: _create_pipeline_from_auto_retriever(config),
-    RetrieverType.MULTI: lambda config: _create_pipeline_from_multi_retriever(config),
-    RetrieverType.SQL: lambda config: _create_pipeline_from_sql_retriever(config),
-    RetrieverType.MULTI_HOP: lambda config: _create_pipeline_from_multi_hop_retriever(config),
-}
-
-
-def _create_pipeline_from_vector_store(config: RAGPipelineModel) -> LangChainRAGPipeline:
-    return LangChainRAGPipeline.from_retriever(config=config)
-
-
-def _create_pipeline_from_auto_retriever(config: RAGPipelineModel) -> LangChainRAGPipeline:
-    return LangChainRAGPipeline.from_auto_retriever(config=config)
-
-
-def _create_pipeline_from_multi_retriever(config: RAGPipelineModel) -> LangChainRAGPipeline:
-    if config.text_splitter is None:
-        config.text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=config.chunk_size, chunk_overlap=config.chunk_overlap
-        )
-    if config.parent_store is None:
-        config.parent_store = InMemoryByteStore()
-
-    return LangChainRAGPipeline.from_multi_vector_retriever(config=config)
-
-
-def _create_pipeline_from_sql_retriever(config: RAGPipelineModel) -> LangChainRAGPipeline:
-    return LangChainRAGPipeline.from_sql_retriever(config=config)
-
-
-def _create_pipeline_from_multi_hop_retriever(config: RAGPipelineModel) -> LangChainRAGPipeline:
-    retriever = MultiHopRetriever.from_config(config)
-    return LangChainRAGPipeline(
-        retriever_runnable=retriever,
-        prompt_template=config.rag_prompt_template,
-        llm=config.llm,
-        reranker_config=config.reranker_config,
-        reranker=config.reranker,
-        vector_store_config=config.vector_store_config,
-    )
-
-
-def _process_documents_to_df(config: RAGPipelineModel) -> pd.DataFrame:
-    return documents_to_df(
-        config.content_column_name, config.documents, embedding_model=config.embedding_model, with_embeddings=True
-    )
-
-
-def get_pipeline_from_retriever(config: RAGPipelineModel) -> Any:
-    retriever_strategy = _retriever_strategies.get(config.retriever_type)
-    if retriever_strategy:
-        return retriever_strategy(config).with_returned_sources()
-    else:
-        raise ValueError(
-            f"Invalid retriever type, must be one of: {list(_retriever_strategies.keys())}. Got {config.retriever_type}"
-        )
-
-
-class RAG:
-    def __init__(self, config: RAGPipelineModel):
-        self.pipeline = get_pipeline_from_retriever(config)
-
-    def __call__(self, question: str) -> dict:
-        logger.info(f"Processing question using rag pipeline: {question}")
-        result = self.pipeline.invoke(question)
-
-        returned_sources = [docs.page_content for docs in result["context"]]
-        logger.info(f"retrieved context used to answer question: {returned_sources}")
-
-        return result
diff --git a/mindsdb/integrations/utilities/rag/rerankers/base_reranker.py b/mindsdb/integrations/utilities/rag/rerankers/base_reranker.py
index 2542967672f..3fc7951530d 100644
--- a/mindsdb/integrations/utilities/rag/rerankers/base_reranker.py
+++ b/mindsdb/integrations/utilities/rag/rerankers/base_reranker.py
@@ -1,13 +1,12 @@
 from __future__ import annotations
 
 import re
+import os
 import json
+import math
 import asyncio
 import logging
-import math
-import os
 import random
-from abc import ABC
 from typing import Any, List, Optional, Tuple
 
 from openai import AsyncOpenAI, AsyncAzureOpenAI
@@ -35,7 +34,11 @@
     DEFAULT_VALID_CLASS_TOKENS,
     RerankerMode,
 )
-from mindsdb.integrations.libs.base import BaseMLEngine
+
+from mindsdb.interfaces.knowledge_base.providers.bedrock import AsyncBedrockClient
+from mindsdb.interfaces.knowledge_base.providers.gemini import GeminiClient
+from mindsdb.interfaces.knowledge_base.providers.snowflake import SnowflakeClient
+
 
 log = logging.getLogger(__name__)
 
@@ -50,7 +53,7 @@ def get_event_loop():
     return loop
 
 
-class BaseLLMReranker(BaseModel, ABC):
+class BaseLLMReranker(BaseModel):
     filtering_threshold: float = 0.0  # Default threshold for filtering
     provider: str = "openai"
     model: str = DEFAULT_RERANKING_MODEL  # Model to use for reranking
@@ -59,10 +62,10 @@ class BaseLLMReranker(BaseModel, ABC):
     base_url: Optional[str] = None
     api_version: Optional[str] = None
     num_docs_to_keep: Optional[int] = None  # How many of the top documents to keep after reranking & compressing.
-    method: str = "multi-class"  # Scoring method: 'multi-class' or 'binary'
+    method: str = "no-logprobs"  # Scoring method: 'multi-class' or 'no-logprobs'
     mode: RerankerMode = RerankerMode.POINTWISE
     _api_key_var: str = "OPENAI_API_KEY"
-    client: Optional[AsyncOpenAI | BaseMLEngine] = None
+    client: Optional[AsyncOpenAI | AsyncBedrockClient | GeminiClient | SnowflakeClient] = None
     _semaphore: Optional[asyncio.Semaphore] = None
     max_concurrent_requests: int = 20
     max_retries: int = 4
@@ -102,6 +105,9 @@ def _get_semaphore(self):
 
     def _init_client(self):
         if self.client is None:
+            if self.provider == "google":
+                self.provider = "gemini"
+
             if self.provider == "azure_openai":
                 azure_api_key = self.api_key or os.getenv("AZURE_OPENAI_API_KEY")
                 azure_api_endpoint = self.base_url or os.environ.get("AZURE_OPENAI_ENDPOINT")
@@ -113,11 +119,21 @@ def _init_client(self):
                     timeout=self.request_timeout,
                     max_retries=2,
                 )
+                self.method = "multi-class"
+            elif self.provider == "bedrock":
+                kwargs = self.model_extra.copy()
+                self.client = AsyncBedrockClient(**kwargs)
+            elif self.provider == "gemini":
+                self.client = GeminiClient(api_key=self.api_key)
+            elif self.provider == "snowflake":
+                kwargs = self.model_extra.copy()
+                self.client = SnowflakeClient(api_key=self.api_key, **kwargs)
             elif self.provider in ("openai", "ollama"):
                 if self.provider == "ollama":
-                    self.method = "no-logprobs"
                     if self.api_key is None:
                         self.api_key = "n/a"
+                else:
+                    self.method = "multi-class"
 
                 api_key_var: str = "OPENAI_API_KEY"
                 openai_api_key = self.api_key or os.getenv(api_key_var)
@@ -157,24 +173,15 @@ def _init_client(self):
                 self.method = "no-logprobs"
 
             else:
-                # try to use litellm
-                from mindsdb.api.executor.controllers.session_controller import SessionController
-
-                session = SessionController()
-                module = session.integration_controller.get_handler_module("litellm")
+                raise NotImplementedError(f'Provider "{self.provider}" is not supported')
 
-                if module is None or module.Handler is None:
-                    raise ValueError(f'Unable to use "{self.provider}" provider. Litellm handler is not installed')
-
-                self.client = module.Handler
-                self.method = "no-logprobs"
-
-    async def _call_llm(self, messages):
+    async def _call_llm(self, messages) -> str:
         if self.provider in ("azure_openai", "openai", "ollama"):
-            return await self.client.chat.completions.create(
+            response = await self.client.chat.completions.create(
                 model=self.model,
                 messages=messages,
             )
+            return response.choices[0].message.content
         elif self.provider in ("google", "google_genai"):
             # Convert OpenAI message format to Google Gen AI prompt format
             prompt_parts = []
@@ -228,12 +235,7 @@ def __init__(self, text):
 
                 return CompletionResponse(response.text)
         else:
-            kwargs = self.model_extra.copy()
-
-            if self.api_key is not None:
-                kwargs["api_key"] = self.api_key
-
-            return await self.client.acompletion(self.provider, model=self.model, messages=messages, args=kwargs)
+            return await self.client.acompletion(model_name=self.model, messages=messages)
 
     async def _rank(self, query_document_pairs: List[Tuple[str, str]], rerank_callback=None) -> List[Tuple[str, float]]:
         ranked_results = []
@@ -314,7 +316,7 @@ async def search_relevancy(self, query: str, document: str) -> Any:
             temperature=self.temperature,
             n=1,
             logprobs=True,
-            max_tokens=1,
+            max_completion_tokens=1,
         )
 
         # Extract response and logprobs
@@ -344,12 +346,10 @@ async def search_relevancy_no_logprob(self, query: str, document: str) -> Any:
             f"Search query: {query}"
         )
 
-        response = await self._call_llm(
+        answer = await self._call_llm(
             messages=[{"role": "system", "content": prompt}, {"role": "user", "content": document}],
         )
 
-        answer = response.choices[0].message.content
-
         try:
             value = re.findall(r"[\d]+", answer)[0]
             score = float(value) / 100
@@ -462,7 +462,7 @@ async def search_relevancy_score(self, query: str, document: str) -> Any:
             n=self.n,
             logprobs=self.logprobs,
             top_logprobs=self.top_logprobs,
-            max_tokens=self.max_tokens,
+            max_completion_tokens=self.max_tokens,
         )
 
         # Extract response and logprobs
@@ -610,8 +610,8 @@ async def _rank_single_batch(
 
         for attempt in range(self.max_retries):
             try:
-                response = await self._call_llm(messages)
-                content = response.choices[0].message.content
+                content = await self._call_llm(messages)
+
                 scores = self._extract_scores(content, len(documents))
                 return list(zip(documents, scores))
             except Exception as exc:
diff --git a/mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py b/mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py
deleted file mode 100644
index 9fdc083ccbf..00000000000
--- a/mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import logging
-from typing import Any, Dict, Optional, Sequence
-
-from mindsdb.integrations.utilities.rag.rerankers.base_reranker import BaseLLMReranker
-
-log = logging.getLogger(__name__)
-
-
-def _dispatch_custom_event(event_name: str, data: dict):
-    """Simple event dispatcher replacement for langchain's dispatch_custom_event.
-
-    This is a no-op implementation. If custom event handling is needed,
-    it can be extended to dispatch events to registered handlers.
-    """
-    # No-op for now - can be extended if needed
-    pass
-
-
-class LLMReranker(BaseLLMReranker):
-    remove_irrelevant: bool = True  # New flag to control removal of irrelevant documents
-
-    def _dispatch_rerank_event(self, data):
-        """Dispatch rerank event using custom event dispatcher"""
-        _dispatch_custom_event("rerank", data)
-
-    async def acompress_documents(
-        self,
-        documents: Sequence[Any],
-        query: str,
-        callbacks: Optional[Any] = None,
-    ) -> Sequence[Any]:
-        """
-        Async compress documents using reranking with proper error handling.
-
-        Args:
-            documents: Sequence of document objects with page_content and metadata attributes
-            query: Query string for reranking
-            callbacks: Optional callbacks object with on_retriever_start, on_retriever_end,
-                     on_text, and on_retriever_error methods
-
-        Returns:
-            Sequence of filtered and reranked documents
-        """
-        if callbacks and hasattr(callbacks, "on_retriever_start"):
-            try:
-                await callbacks.on_retriever_start({"query": query}, "Reranking documents")
-            except Exception as e:
-                log.warning(f"Error in callback on_retriever_start: {e}")
-
-        log.info(f"Async compressing documents. Initial count: {len(documents)}")
-        if not documents:
-            if callbacks and hasattr(callbacks, "on_retriever_end"):
-                try:
-                    await callbacks.on_retriever_end({"documents": []})
-                except Exception as e:
-                    log.warning(f"Error in callback on_retriever_end: {e}")
-            return []
-
-        # Stream reranking update.
-        _dispatch_custom_event("rerank_begin", {"num_documents": len(documents)})
-
-        try:
-            # Prepare query-document pairs
-            # Use duck typing to access page_content attribute
-            query_document_pairs = [(query, doc.page_content) for doc in documents]
-
-            if callbacks and hasattr(callbacks, "on_text"):
-                try:
-                    await callbacks.on_text("Starting document reranking...")
-                except Exception as e:
-                    log.warning(f"Error in callback on_text: {e}")
-
-            # Get ranked results
-            ranked_results = await self._rank(query_document_pairs, rerank_callback=self._dispatch_rerank_event)
-
-            # Sort by score in descending order
-            ranked_results.sort(key=lambda x: x[1], reverse=True)
-
-            # Filter based on threshold and num_docs_to_keep
-            filtered_docs = []
-            for doc, score in ranked_results:
-                if score >= self.filtering_threshold:
-                    matching_doc = next(d for d in documents if d.page_content == doc)
-                    # Use duck typing to access and update metadata
-                    metadata = getattr(matching_doc, "metadata", None) or {}
-                    matching_doc.metadata = {**metadata, "relevance_score": score}
-                    filtered_docs.append(matching_doc)
-
-                    if callbacks and hasattr(callbacks, "on_text"):
-                        try:
-                            await callbacks.on_text(f"Document scored {score:.2f}")
-                        except Exception as e:
-                            log.warning(f"Error in callback on_text: {e}")
-
-                    if self.num_docs_to_keep and len(filtered_docs) >= self.num_docs_to_keep:
-                        break
-
-            log.info(f"Async compression complete. Final count: {len(filtered_docs)}")
-
-            if callbacks and hasattr(callbacks, "on_retriever_end"):
-                try:
-                    await callbacks.on_retriever_end({"documents": filtered_docs})
-                except Exception as e:
-                    log.warning(f"Error in callback on_retriever_end: {e}")
-
-            return filtered_docs
-
-        except Exception as e:
-            error_msg = "Error during async document compression:"
-            log.exception(error_msg)
-            if callbacks and hasattr(callbacks, "on_retriever_error"):
-                try:
-                    await callbacks.on_retriever_error(f"{error_msg} {e}")
-                except Exception as callback_error:
-                    log.warning(f"Error in callback on_retriever_error: {callback_error}")
-            return documents  # Return original documents on error
-
-    def compress_documents(
-        self,
-        documents: Sequence[Any],
-        query: str,
-        callbacks: Optional[Any] = None,
-    ) -> Sequence[Any]:
-        """
-        Sync wrapper for async compression.
-
-        Args:
-            documents: Sequence of document objects with page_content and metadata attributes
-            query: Query string for reranking
-            callbacks: Optional callbacks object
-
-        Returns:
-            Sequence of filtered and reranked documents
-        """
-        return asyncio.run(self.acompress_documents(documents, query, callbacks))
-
-    @property
-    def _identifying_params(self) -> Dict[str, Any]:
-        """Get the identifying parameters."""
-        return {
-            "model": self.model,
-            "temperature": self.temperature,
-            "remove_irrelevant": self.remove_irrelevant,
-            "method": self.method,
-        }
diff --git a/mindsdb/integrations/utilities/rag/retrievers/__init__.py b/mindsdb/integrations/utilities/rag/retrievers/__init__.py
deleted file mode 100644
index 94e359da03a..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from mindsdb.integrations.utilities.rag.retrievers.multi_hop_retriever import MultiHopRetriever
-
-__all__ = ['MultiHopRetriever']
\ No newline at end of file
diff --git a/mindsdb/integrations/utilities/rag/retrievers/auto_retriever.py b/mindsdb/integrations/utilities/rag/retrievers/auto_retriever.py
deleted file mode 100644
index ba260693c92..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/auto_retriever.py
+++ /dev/null
@@ -1,228 +0,0 @@
-from typing import List, Any
-import json
-import asyncio
-
-import pandas as pd
-
-from mindsdb.integrations.utilities.rag.retrievers.base import BaseRetriever, RunnableRetriever
-from mindsdb.integrations.utilities.rag.utils import documents_to_df
-from mindsdb.integrations.utilities.rag.vector_store import VectorStoreOperator
-from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class AutoRetriever(BaseRetriever):
-    """
-    AutoRetrieval is a class that uses LLM to extract metadata from documents and query vectorstore using self-query retrievers.
-    """
-
-    def __init__(self, config: RAGPipelineModel):
-        """
-
-        :param config: RAGPipelineModel
-
-
-        """
-
-        self.documents = config.documents
-        self.content_column_name = config.content_column_name
-        self.vectorstore = config.vector_store
-        self.filter_columns = config.auto_retriever_filter_columns
-        self.document_description = config.dataset_description
-        self.llm = config.llm
-        self.embedding_model = config.embedding_model
-        self.prompt_template = config.retriever_prompt_template
-        self.cardinality_threshold = config.cardinality_threshold
-
-    def _get_low_cardinality_columns(self, data: pd.DataFrame):
-        """
-        Given a dataframe, return a list of columns with low cardinality if datatype is not bool.
-        :return:
-        """
-        low_cardinality_columns = []
-        columns = data.columns if self.filter_columns is None else self.filter_columns
-        for column in columns:
-            if data[column].dtype != "bool":
-                if data[column].nunique() < self.cardinality_threshold:
-                    low_cardinality_columns.append(column)
-        return low_cardinality_columns
-
-    def get_metadata_field_info(self):
-        """
-        Given a list of Document, use llm to extract metadata from it.
-        :return:
-        """
-
-        def _alter_description(data: pd.DataFrame, low_cardinality_columns: list, result: List[dict]):
-            """
-            For low cardinality columns, alter the description to include the sorted valid values.
-            :param data: pd.DataFrame
-            :param low_cardinality_columns: list
-            :param result: List[dict]
-            """
-            for column_name in low_cardinality_columns:
-                valid_values = sorted(data[column_name].unique())
-                for entry in result:
-                    if entry["name"] == column_name:
-                        entry["description"] += f". Valid values: {valid_values}"
-
-        data = documents_to_df(self.content_column_name, self.documents)
-
-        prompt = self.prompt_template.format(dataframe=data.head().to_json(), description=self.document_description)
-        # Call LLM and extract response
-        llm_response = self.llm.invoke(prompt)
-        # Extract content from LLM response
-        if hasattr(llm_response, "content"):
-            response_text = llm_response.content
-        elif isinstance(llm_response, str):
-            response_text = llm_response
-        else:
-            response_text = str(llm_response)
-
-        result: List[dict] = json.loads(response_text)
-
-        _alter_description(data, self._get_low_cardinality_columns(data), result)
-
-        return result
-
-    def get_vectorstore(self):
-        """
-
-        :return:
-        """
-        return VectorStoreOperator(
-            vector_store=self.vectorstore, documents=self.documents, embedding_model=self.embedding_model
-        ).vector_store
-
-    def as_runnable(self) -> RunnableRetriever:
-        """
-        Return a custom self-query retriever
-        :return: CustomSelfQueryRetriever instance
-        """
-        vectorstore = self.get_vectorstore()
-        metadata_field_info = self.get_metadata_field_info()
-
-        return CustomSelfQueryRetriever(
-            llm=self.llm,
-            vectorstore=vectorstore,
-            document_contents=self.document_description,
-            metadata_field_info=metadata_field_info,
-        )
-
-
-class CustomSelfQueryRetriever:
-    """
-    Custom implementation of SelfQueryRetriever to replace langchain's SelfQueryRetriever.
-    Uses LLM to generate metadata filters and queries vectorstore with those filters.
-    """
-
-    def __init__(self, llm: Any, vectorstore: Any, document_contents: str, metadata_field_info: List[dict]):
-        """
-        Initialize CustomSelfQueryRetriever
-
-        Args:
-            llm: LLM instance with invoke method
-            vectorstore: Vector store with similarity_search_with_score method
-            document_contents: Description of document contents
-            metadata_field_info: List of metadata field information dicts
-        """
-        self.llm = llm
-        self.vectorstore = vectorstore
-        self.document_contents = document_contents
-        self.metadata_field_info = metadata_field_info
-
-    def _generate_metadata_filters(self, query: str) -> dict:
-        """
-        Use LLM to generate metadata filters from query
-
-        Args:
-            query: User query string
-
-        Returns:
-            Dictionary of metadata filters
-        """
-        # Create prompt for LLM to generate metadata filters
-        metadata_info_str = json.dumps(self.metadata_field_info, indent=2)
-        prompt = f"""Given the following query and metadata field information, generate a structured query with metadata filters.
-
-Query: {query}
-
-Document contents description: {self.document_contents}
-
-Available metadata fields:
-{metadata_info_str}
-
-Generate a JSON object with the query string and any applicable metadata filters. 
-Format: {{"query": "extracted query", "filters": {{"field_name": "value"}}}}
-"""
-
-        try:
-            llm_response = self.llm.invoke(prompt)
-            # Extract content from LLM response
-            if hasattr(llm_response, "content"):
-                response_text = llm_response.content
-            elif isinstance(llm_response, str):
-                response_text = llm_response
-            else:
-                response_text = str(llm_response)
-
-            # Parse JSON response
-            parsed = json.loads(response_text)
-            return parsed.get("filters", {})
-        except Exception as e:
-            logger.warning(f"Error generating metadata filters: {e}")
-            return {}
-
-    def _query_vectorstore(self, query: str, filters: dict) -> List[Any]:
-        """
-        Query vectorstore with query and metadata filters
-
-        Args:
-            query: Query string
-            filters: Metadata filters dictionary
-
-        Returns:
-            List of documents
-        """
-        # Use vectorstore's similarity_search method
-        # If vectorstore supports metadata filtering, apply filters
-        if hasattr(self.vectorstore, "similarity_search"):
-            # Try to pass filters if supported
-            if filters:
-                try:
-                    # Some vectorstores support filter parameter
-                    if hasattr(self.vectorstore, "similarity_search_with_score"):
-                        docs_with_scores = self.vectorstore.similarity_search_with_score(query, k=4, filter=filters)
-                        return [doc for doc, _ in docs_with_scores]
-                    else:
-                        return self.vectorstore.similarity_search(query, k=4, filter=filters)
-                except TypeError:
-                    # If filter not supported, just do regular search
-                    return self.vectorstore.similarity_search(query, k=4)
-            else:
-                return self.vectorstore.similarity_search(query, k=4)
-        else:
-            raise ValueError("Vectorstore must have similarity_search method")
-
-    def invoke(self, query: str) -> List[Any]:
-        """Sync invocation - retrieve documents for a query"""
-        # Generate metadata filters
-        filters = self._generate_metadata_filters(query)
-
-        # Extract query string (LLM might have rewritten it)
-        # For now, use original query
-        # In a full implementation, we'd extract the rewritten query from LLM response
-
-        # Query vectorstore
-        return self._query_vectorstore(query, filters)
-
-    async def ainvoke(self, query: str) -> List[Any]:
-        """Async invocation - retrieve documents for a query"""
-        loop = asyncio.get_event_loop()
-        return await loop.run_in_executor(None, self.invoke, query)
-
-    def get_relevant_documents(self, query: str) -> List[Any]:
-        """Get relevant documents (sync)"""
-        return self.invoke(query)
diff --git a/mindsdb/integrations/utilities/rag/retrievers/base.py b/mindsdb/integrations/utilities/rag/retrievers/base.py
deleted file mode 100644
index 1c136436aec..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/base.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Protocol, List, Any
-
-
-class RunnableRetriever(Protocol):
-    """Protocol for retriever runnable objects that can be invoked to retrieve documents"""
-
-    def invoke(self, query: str) -> List[Any]:
-        """Sync invocation - retrieve documents for a query"""
-        ...
-
-    async def ainvoke(self, query: str) -> List[Any]:
-        """Async invocation - retrieve documents for a query"""
-        ...
-
-    def get_relevant_documents(self, query: str) -> List[Any]:
-        """Get relevant documents (sync) - alternative interface"""
-        ...
-
-
-class BaseRetriever(ABC):
-    """Represents a base retriever for a RAG pipeline"""
-
-    @abstractmethod
-    def as_runnable(self) -> RunnableRetriever:
-        """
-        Return a runnable retriever object that can be invoked.
-
-        Returns:
-            RunnableRetriever: An object that implements invoke(), ainvoke(), or get_relevant_documents()
-        """
-        pass
diff --git a/mindsdb/integrations/utilities/rag/retrievers/multi_hop_retriever.py b/mindsdb/integrations/utilities/rag/retrievers/multi_hop_retriever.py
deleted file mode 100644
index d5bca056e0a..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/multi_hop_retriever.py
+++ /dev/null
@@ -1,130 +0,0 @@
-from typing import List, Optional, Any
-
-import json
-from pydantic import Field, PrivateAttr
-
-from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel, DEFAULT_QUESTION_REFORMULATION_TEMPLATE
-from mindsdb.integrations.utilities.rag.retrievers.retriever_factory import create_retriever
-from mindsdb.integrations.utilities.rag.retrievers.base import BaseRetriever, RunnableRetriever
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class MultiHopRetriever(BaseRetriever):
-    """A retriever that implements multi-hop question reformulation strategy.
-
-    This retriever takes a base retriever and uses an LLM to generate follow-up
-    questions based on the initial results. It then retrieves documents for each
-    follow-up question and combines all results.
-    """
-
-    base_retriever: Any = Field(
-        description="Base retriever to use for document lookup (must have get_relevant_documents or invoke method)"
-    )
-    llm: Any = Field(description="LLM to use for generating follow-up questions (must have invoke method)")
-    max_hops: int = Field(default=3, description="Maximum number of follow-up questions to generate")
-    reformulation_template: str = Field(
-        default=DEFAULT_QUESTION_REFORMULATION_TEMPLATE, description="Template for reformulating questions"
-    )
-
-    _asked_questions: set = PrivateAttr(default_factory=set)
-
-    @classmethod
-    def from_config(cls, config: RAGPipelineModel) -> "MultiHopRetriever":
-        """Create a MultiHopRetriever from a RAGPipelineModel config."""
-        if config.multi_hop_config is None:
-            raise ValueError("multi_hop_config must be set for MultiHopRetriever")
-
-        # Create base retriever based on type
-        base_retriever = create_retriever(config, config.multi_hop_config.base_retriever_type)
-
-        return cls(
-            base_retriever=base_retriever,
-            llm=config.llm,
-            max_hops=config.multi_hop_config.max_hops,
-            reformulation_template=config.multi_hop_config.reformulation_template,
-        )
-
-    def _get_relevant_documents(self, query: str, *, run_manager: Optional[Any] = None) -> List[Any]:
-        """
-        Get relevant documents using multi-hop retrieval.
-
-        Args:
-            query: Query string
-            run_manager: Optional callback manager (not used, kept for compatibility)
-
-        Returns:
-            List of documents with page_content and metadata attributes
-        """
-        if query in self._asked_questions:
-            return []
-
-        self._asked_questions.add(query)
-
-        # Get initial documents using duck typing
-        docs = self._retrieve_from_base_retriever(query)
-        if not docs or len(self._asked_questions) >= self.max_hops:
-            return docs
-
-        # Generate follow-up questions
-        context = "\n".join(doc.page_content if hasattr(doc, "page_content") else str(doc) for doc in docs)
-        prompt = self.reformulation_template.format(question=query, context=context)
-
-        try:
-            # Call LLM - handle both string and message formats
-            llm_response = self.llm.invoke(prompt)
-            # Extract content from LLM response
-            if hasattr(llm_response, "content"):
-                response_text = llm_response.content
-            elif isinstance(llm_response, str):
-                response_text = llm_response
-            else:
-                response_text = str(llm_response)
-
-            follow_up_questions = json.loads(response_text)
-            if not isinstance(follow_up_questions, list):
-                return docs
-        except (json.JSONDecodeError, TypeError, Exception) as e:
-            logger.warning(f"Error parsing follow-up questions: {e}")
-            return docs
-
-        # Get documents for follow-up questions
-        for question in follow_up_questions:
-            if isinstance(question, str):
-                follow_up_docs = self._get_relevant_documents(question)
-                docs.extend(follow_up_docs)
-
-        return docs
-
-    def _retrieve_from_base_retriever(self, query: str) -> List[Any]:
-        """Retrieve documents from base retriever using duck typing"""
-        if hasattr(self.base_retriever, "_get_relevant_documents"):
-            return self.base_retriever._get_relevant_documents(query)
-        elif hasattr(self.base_retriever, "get_relevant_documents"):
-            return self.base_retriever.get_relevant_documents(query)
-        elif hasattr(self.base_retriever, "invoke"):
-            return self.base_retriever.invoke(query)
-        else:
-            raise ValueError(
-                "Base retriever must have _get_relevant_documents, get_relevant_documents, or invoke method"
-            )
-
-    def invoke(self, query: str) -> List[Any]:
-        """Sync invocation - retrieve documents for a query"""
-        return self._get_relevant_documents(query)
-
-    async def ainvoke(self, query: str) -> List[Any]:
-        """Async invocation - retrieve documents for a query"""
-        import asyncio
-
-        loop = asyncio.get_event_loop()
-        return await loop.run_in_executor(None, self._get_relevant_documents, query)
-
-    def get_relevant_documents(self, query: str) -> List[Any]:
-        """Get relevant documents (sync)"""
-        return self._get_relevant_documents(query)
-
-    def as_runnable(self) -> RunnableRetriever:
-        """Return self as a runnable retriever"""
-        return self
diff --git a/mindsdb/integrations/utilities/rag/retrievers/multi_vector_retriever.py b/mindsdb/integrations/utilities/rag/retrievers/multi_vector_retriever.py
deleted file mode 100644
index c7d2c5918bd..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/multi_vector_retriever.py
+++ /dev/null
@@ -1,193 +0,0 @@
-from typing import List, Tuple, Any
-import uuid
-import asyncio
-
-from mindsdb.integrations.utilities.rag.retrievers.base import BaseRetriever, RunnableRetriever
-from mindsdb.integrations.utilities.rag.settings import DEFAULT_LLM_MODEL, MultiVectorRetrieverMode, RAGPipelineModel
-from mindsdb.integrations.utilities.rag.vector_store import VectorStoreOperator
-from mindsdb.integrations.utilities.rag.retrievers.safe_output_parser import SafeOutputParser
-from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class MultiVectorRetriever(BaseRetriever):
-    """
-    MultiVectorRetriever stores multiple vectors per document.
-    """
-
-    def __init__(self, config: RAGPipelineModel):
-        self.vectorstore = config.vector_store
-        self.parent_store = config.parent_store
-        self.id_key = config.id_key
-        self.documents = config.documents
-        self.text_splitter = config.text_splitter
-        self.embedding_model = config.embedding_model
-        self.max_concurrency = config.max_concurrency
-        self.mode = config.multi_retriever_mode
-
-    def _generate_id_and_split_document(self, doc: Any) -> Tuple[str, List[Any]]:
-        """
-        Generate a unique id for the document and split it into sub-documents.
-        :param doc: Document with page_content and metadata
-        :return: Tuple of (doc_id, list of sub_documents)
-        """
-        doc_id = str(uuid.uuid4())
-        sub_docs = self.text_splitter.split_documents([doc])
-        for sub_doc in sub_docs:
-            # Use duck typing to access metadata
-            if not hasattr(sub_doc, "metadata"):
-                sub_doc.metadata = {}
-            sub_doc.metadata[self.id_key] = doc_id
-        return doc_id, sub_docs
-
-    def _split_documents(self) -> Tuple[List[Any], List[str]]:
-        """
-        Split the documents into sub-documents and generate unique ids for each document.
-        :return: Tuple of (list of split_docs, list of doc_ids)
-        """
-        split_info = list(map(self._generate_id_and_split_document, self.documents))
-        doc_ids, split_docs_lists = zip(*split_info)
-        split_docs = [doc for sublist in split_docs_lists for doc in sublist]
-        return split_docs, list(doc_ids)
-
-    def _create_retriever_and_vs_operator(
-        self, docs: List[Any]
-    ) -> Tuple["CustomMultiVectorRetriever", VectorStoreOperator]:
-        vstore_operator = VectorStoreOperator(
-            vector_store=self.vectorstore,
-            documents=docs,
-            embedding_model=self.embedding_model,
-        )
-        retriever = CustomMultiVectorRetriever(
-            vectorstore=vstore_operator.vector_store, byte_store=self.parent_store, id_key=self.id_key
-        )
-        return retriever, vstore_operator
-
-    def _get_document_summaries(self, llm: Any) -> List[str]:
-        """
-        Get document summaries using LLM
-
-        Args:
-            llm: LLM instance with invoke method
-
-        Returns:
-            List of summary strings
-        """
-        summaries = []
-        prompt_template = "Summarize the following document:\n\n{doc}"
-
-        for doc in self.documents:
-            # Extract page_content using duck typing
-            page_content = doc.page_content if hasattr(doc, "page_content") else str(doc)
-            prompt = prompt_template.format(doc=page_content)
-
-            try:
-                # Call LLM
-                llm_response = llm.invoke(prompt)
-                # Extract content from LLM response
-                if hasattr(llm_response, "content"):
-                    summary = llm_response.content
-                elif isinstance(llm_response, str):
-                    summary = llm_response
-                else:
-                    summary = str(llm_response)
-
-                # Use SafeOutputParser to clean the output (extract actual text from parse result)
-                parser = SafeOutputParser()
-                parsed_result = parser.parse(summary)
-                summary = parser.extract_output(parsed_result)
-                summaries.append(summary)
-            except Exception as e:
-                logger.warning(f"Error generating summary for document: {e}")
-                # Fallback to empty summary or first part of content
-                summaries.append(page_content[:200] if len(page_content) > 200 else page_content)
-
-        return summaries
-
-    def as_runnable(self) -> RunnableRetriever:
-        # Get LLM from config - need to check how it's passed
-        # For now, assume we need to get it from somewhere
-        # This might need to be passed in config
-        llm = getattr(self, "llm", None)
-        if llm is None:
-            # Try to create a default LLM - this might need adjustment
-            from mindsdb.interfaces.knowledge_base.llm_wrapper import create_chat_model
-
-            llm = create_chat_model({"model_name": DEFAULT_LLM_MODEL, "provider": "openai"})
-
-        if self.mode in {MultiVectorRetrieverMode.SPLIT, MultiVectorRetrieverMode.BOTH}:
-            split_docs, doc_ids = self._split_documents()
-            retriever, vstore_operator = self._create_retriever_and_vs_operator(split_docs)
-            summaries = self._get_document_summaries(llm)
-            summary_docs = [
-                SimpleDocument(page_content=s, metadata={self.id_key: doc_ids[i]}) for i, s in enumerate(summaries)
-            ]
-            vstore_operator.add_documents(summary_docs)
-            retriever.docstore.mset(list(zip(doc_ids, self.documents)))
-            return retriever
-
-        elif self.mode == MultiVectorRetrieverMode.SUMMARIZE:
-            summaries = self._get_document_summaries(llm)
-            doc_ids = [str(uuid.uuid4()) for _ in self.documents]
-            summary_docs = [
-                SimpleDocument(page_content=s, metadata={self.id_key: doc_ids[i]}) for i, s in enumerate(summaries)
-            ]
-            retriever, vstore_operator = self._create_retriever_and_vs_operator(summary_docs)
-            retriever.docstore.mset(list(zip(doc_ids, self.documents)))
-            return retriever
-
-        else:
-            raise ValueError(f"Invalid mode: {self.mode}")
-
-
-class CustomMultiVectorRetriever:
-    """
-    Custom implementation of MultiVectorRetriever to replace langchain's MultiVectorRetriever.
-    Stores parent documents in docstore and sub-documents/summaries in vectorstore.
-    """
-
-    def __init__(self, vectorstore: Any, byte_store: Any, id_key: str = "doc_id"):
-        """
-        Initialize CustomMultiVectorRetriever
-
-        Args:
-            vectorstore: Vector store for storing sub-documents/summaries
-            byte_store: Store for parent documents (must have mset and mget methods)
-            id_key: Key used to link sub-documents to parent documents
-        """
-        self.vectorstore = vectorstore
-        self.docstore = byte_store
-        self.id_key = id_key
-
-    def invoke(self, query: str) -> List[Any]:
-        """Sync invocation - retrieve documents for a query"""
-        # Get sub-documents from vectorstore
-        sub_docs = self.vectorstore.similarity_search(query, k=4)
-
-        # Get parent document IDs from sub-documents
-        parent_ids = []
-        for doc in sub_docs:
-            metadata = getattr(doc, "metadata", {})
-            if self.id_key in metadata:
-                parent_ids.append(metadata[self.id_key])
-
-        # Get parent documents from docstore
-        parent_docs = []
-        if parent_ids and hasattr(self.docstore, "mget"):
-            parent_docs = self.docstore.mget(parent_ids)
-        elif parent_ids and hasattr(self.docstore, "get"):
-            parent_docs = [self.docstore.get(pid) for pid in parent_ids if self.docstore.get(pid) is not None]
-
-        # Return parent documents (or sub-docs if no parent store)
-        return parent_docs if parent_docs else sub_docs
-
-    async def ainvoke(self, query: str) -> List[Any]:
-        """Async invocation - retrieve documents for a query"""
-        loop = asyncio.get_event_loop()
-        return await loop.run_in_executor(None, self.invoke, query)
-
-    def get_relevant_documents(self, query: str) -> List[Any]:
-        """Get relevant documents (sync)"""
-        return self.invoke(query)
diff --git a/mindsdb/integrations/utilities/rag/retrievers/retriever_factory.py b/mindsdb/integrations/utilities/rag/retrievers/retriever_factory.py
deleted file mode 100644
index 15fec83adcf..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/retriever_factory.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""Factory functions for creating retrievers."""
-
-from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel, RetrieverType
-from mindsdb.integrations.utilities.rag.vector_store import VectorStoreOperator
-from mindsdb.integrations.utilities.rag.retrievers.auto_retriever import AutoRetriever
-from mindsdb.integrations.utilities.rag.retrievers.sql_retriever import SQLRetriever
-
-
-def create_vector_store_retriever(config: RAGPipelineModel):
-    """Create a vector store retriever."""
-    if getattr(config.vector_store, '_mock_return_value', None) is not None:
-        # If vector_store is mocked, return a simple mock retriever for testing
-        from unittest.mock import MagicMock
-        mock_retriever = MagicMock()
-        mock_retriever._get_relevant_documents.return_value = [
-            {"page_content": "The Wright brothers invented the airplane."}
-        ]
-        return mock_retriever
-
-    vector_store_operator = VectorStoreOperator(
-        vector_store=config.vector_store,
-        documents=config.documents,
-        embedding_model=config.embedding_model,
-        vector_store_config=config.vector_store_config
-    )
-    return vector_store_operator.vector_store.as_retriever()
-
-
-def create_auto_retriever(config: RAGPipelineModel):
-    """Create an auto retriever."""
-    return AutoRetriever(
-        vector_store=config.vector_store,
-        documents=config.documents,
-        embedding_model=config.embedding_model
-    )
-
-
-def create_sql_retriever(config: RAGPipelineModel):
-    """Create a SQL retriever."""
-    return SQLRetriever(
-        sql_source=config.sql_source,
-        llm=config.llm
-    )
-
-
-def create_retriever(config: RAGPipelineModel, retriever_type: RetrieverType = None):
-    """Create a retriever based on type."""
-    retriever_type = retriever_type or config.retriever_type
-
-    if retriever_type == RetrieverType.VECTOR_STORE:
-        return create_vector_store_retriever(config)
-    elif retriever_type == RetrieverType.AUTO:
-        return create_auto_retriever(config)
-    elif retriever_type == RetrieverType.SQL:
-        return create_sql_retriever(config)
-    else:
-        raise ValueError(f"Unsupported retriever type: {retriever_type}")
diff --git a/mindsdb/integrations/utilities/rag/retrievers/safe_output_parser.py b/mindsdb/integrations/utilities/rag/retrievers/safe_output_parser.py
deleted file mode 100644
index 0460714eb12..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/safe_output_parser.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import re
-from typing import Union
-from dataclasses import dataclass
-
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-# Default format instructions for conversational agent
-# This is a simplified version - can be customized if needed
-FORMAT_INSTRUCTIONS = """Use the following format:
-
-Question: the input question you must answer
-Thought: you should think about what to do
-Action: the action to take, should be one of the available tools
-Action Input: the input to the action
-Observation: the result of the action
-... (this Thought/Action/Action Input/Observation can repeat N times)
-Thought: I now know the final answer
-Final Answer: the final answer to the original input question"""
-
-
-@dataclass
-class AgentAction:
-    """Custom AgentAction class to replace langchain AgentAction"""
-
-    tool: str
-    tool_input: str
-    log: str
-
-
-@dataclass
-class AgentFinish:
-    """Custom AgentFinish class to replace langchain AgentFinish"""
-
-    return_values: dict
-    log: str
-
-
-class SafeOutputParser:
-    """Output parser for the conversational agent that does not throw OutputParserException."""
-
-    def __init__(self, ai_prefix: str = "AI", format_instructions: str = FORMAT_INSTRUCTIONS):
-        self.ai_prefix = ai_prefix
-        self.format_instructions = format_instructions
-
-    def get_format_instructions(self) -> str:
-        """Returns formatting instructions for the given output parser."""
-        return self.format_instructions
-
-    def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
-        """Parses outputted text from an LLM.
-
-        Args:
-            text (str): Outputted text to parse.
-
-        Returns:
-            Union[AgentAction, AgentFinish]: Parsed agent action or finish result
-        """
-        regex = r"Action: (.*?)[\n]*Action Input:([\s\S]*)"
-        match = re.search(regex, text, re.DOTALL)
-        if match is not None:
-            action = match.group(1)
-            action_input = match.group(2)
-            return AgentAction(action.strip(), action_input.strip(" ").strip('"'), text)
-        output = text
-        if f"{self.ai_prefix}:" in text:
-            output = text.split(f"{self.ai_prefix}:")[-1].strip()
-        return AgentFinish({"output": output}, text)
-
-    def extract_output(self, result: Union[AgentAction, AgentFinish, str]) -> str:
-        """Extract the actual output text from a parse result.
-
-        Args:
-            result: Result from parse() method or a string
-
-        Returns:
-            str: The actual output text
-        """
-        if isinstance(result, str):
-            return result
-        elif isinstance(result, AgentFinish):
-            return result.return_values.get("output", result.log)
-        elif isinstance(result, AgentAction):
-            # For AgentAction, return the log or tool_input
-            return result.tool_input if result.tool_input else result.log
-        else:
-            return str(result)
-
-    @property
-    def _type(self) -> str:
-        return "conversational"
diff --git a/mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py b/mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py
deleted file mode 100644
index 34d7a1e0b89..00000000000
--- a/mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py
+++ /dev/null
@@ -1,949 +0,0 @@
-import re
-import math
-import logging
-import collections
-import json
-from typing import List, Any, Optional, Dict, Tuple, Union, Callable
-
-from pydantic import BaseModel, Field
-
-from mindsdb.integrations.utilities.rag.retrievers.base import BaseRetriever, RunnableRetriever
-from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument
-
-from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
-from mindsdb.integrations.libs.response import HandlerResponse
-from mindsdb.integrations.libs.vectordatabase_handler import (
-    DistanceFunction,
-    VectorStoreHandler,
-)
-from mindsdb.integrations.utilities.rag.settings import (
-    DatabaseSchema,
-    TableSchema,
-    ColumnSchema,
-    ValueSchema,
-    SearchKwargs,
-)
-from mindsdb.utilities import log
-
-import numpy as np
-
-logger = log.getLogger(__name__)
-
-
-class MetadataFilter(BaseModel):
-    """Represents an LLM generated metadata filter to apply to a PostgreSQL query."""
-
-    attribute: str = Field(description="Database column to apply filter to")
-    comparator: str = Field(description="PostgreSQL comparator to use to filter database column")
-    value: Any = Field(description="Value to use to filter database column")
-
-
-class AblativeMetadataFilter(MetadataFilter):
-    """Adds additional fields to support ablation."""
-
-    schema_table: str = Field(description="schema name of the table for this filter")
-    schema_column: str = Field(description="schema name of the column for this filter")
-    schema_value: str = Field(description="schema name of the value for this filter")
-
-
-class MetadataFilters(BaseModel):
-    """List of LLM generated metadata filters to apply to a PostgreSQL query."""
-
-    filters: List[MetadataFilter] = Field(description="List of PostgreSQL metadata filters to apply for user query")
-
-
-class SQLRetriever(BaseRetriever):
-    """Retriever that uses a LLM to generate pgvector queries to do similarity search with metadata filters.
-
-    How it works:
-
-    1. Use a LLM to rewrite the user input to something more suitable for retrieval. For example:
-    "Show me documents containing how to finetune a LLM please" --> "how to finetune a LLM"
-
-    2. Use a LLM to generate structured metadata filters based on the user input. Provided
-    metadata schemas & examples are used as additional context.
-
-    3. Generate a prepared PostgreSQL query from the structured metadata filters.
-
-    4. Actually execute the query against our vector database to retrieve documents & return them.
-    """
-
-    fallback_retriever: Any  # Must have get_relevant_documents or invoke method
-    vector_store_handler: VectorStoreHandler
-    # search parameters
-    max_filters: int
-    filter_threshold: float
-    min_k: int
-
-    # Schema description
-    database_schema: Optional[DatabaseSchema] = None
-
-    # Embeddings
-    embeddings_model: Any  # Must have embed_query method
-    search_kwargs: SearchKwargs
-
-    # prompt templates
-    rewrite_prompt_template: str
-
-    # schema templates
-    table_prompt_template: str
-    column_prompt_template: str
-    value_prompt_template: str
-
-    # formatting templates
-    boolean_system_prompt: str
-    generative_system_prompt: str
-
-    # SQL search config
-    num_retries: int
-    embeddings_table: str
-    source_table: str
-    source_id_column: str
-    distance_function: DistanceFunction
-
-    # Re-rank and metadata generation model.
-    llm: Any  # Must have invoke method
-
-    def _sort_schema_by_priority_key(
-        self,
-        schema_dict_item: Tuple[str, Union[TableSchema, ColumnSchema, ValueSchema]],
-    ):
-        return schema_dict_item[1].priority
-
-    def _sort_schema_by_relevance_key(
-        self,
-        schema_dict_item: Tuple[str, Union[TableSchema, ColumnSchema, ValueSchema]],
-    ):
-        if schema_dict_item[1].relevance is not None:
-            return schema_dict_item[1].relevance
-        else:
-            return 0
-
-    def _sort_schema_by_key(
-        self,
-        schema: Union[DatabaseSchema, TableSchema, ColumnSchema],
-        key: Callable,
-        update: Dict[str, Any] = None,
-    ) -> Union[DatabaseSchema, TableSchema, ColumnSchema]:
-        """Takes a schema and converts its dict into an OrderedDict"""
-        if isinstance(schema, DatabaseSchema):
-            collection_key = "tables"
-        elif isinstance(schema, TableSchema):
-            collection_key = "columns"
-        elif isinstance(schema, ColumnSchema):
-            collection_key = "values"
-        else:
-            raise Exception("schema must be either a DatabaseSchema, TableSchema, or ColumnSchema.")
-
-        if update is not None:
-            ordered = collections.OrderedDict(sorted(update.items(), key=key, reverse=True))
-        else:
-            ordered = collections.OrderedDict(sorted(getattr(schema, collection_key).items(), key=key, reverse=True))
-        schema = schema.model_copy(update={collection_key: ordered})
-
-        return schema
-
-    def _sort_database_schema_by_key(self, database_schema: DatabaseSchema, key: Callable) -> DatabaseSchema:
-        """Re-build schema with OrderedDicts"""
-        tables = {}
-        # build new tables dict
-        for table_key, table_schema in database_schema.tables.items():
-            columns = {}
-            # build new column dict
-            for column_key, column_schema in table_schema.columns.items():
-                # sort values directly and update column schema
-                columns[column_key] = self._sort_schema_by_key(schema=column_schema, key=key)
-            # update table schema and sort
-            tables[table_key] = self._sort_schema_by_key(schema=table_schema, key=key, update=columns)
-        # update table schema and sort
-        database_schema = self._sort_schema_by_key(schema=database_schema, key=key, update=tables)
-
-        return database_schema
-
-    def _prepare_value_prompt(
-        self,
-        value_schema: ValueSchema,
-        column_schema: ColumnSchema,
-        table_schema: TableSchema,
-        boolean_system_prompt: bool = True,
-        format_instructions: Optional[str] = None,
-    ) -> str:
-        if boolean_system_prompt is True:
-            system_prompt = self.boolean_system_prompt
-        else:
-            system_prompt = self.generative_system_prompt
-
-        prepared_column_prompt = self._prepare_column_prompt(column_schema=column_schema, table_schema=table_schema)
-        # Extract column schema string from prepared prompt (it's now a string)
-        column_schema_str = (
-            prepared_column_prompt.split("Query:")[0] if "Query:" in prepared_column_prompt else prepared_column_prompt
-        )
-
-        value_str = ""
-        header_str = ""
-        if type(value_schema.value) in [str, int, float, bool]:
-            header_str = f"This schema describes a single value in the {column_schema.column} column."
-
-            value_str = f"""
- -**Value**: {value_schema.value}
-"""
-
-        elif type(value_schema.value) is dict:
-            header_str = f"This schema describes enumerated values in the {column_schema.column} column."
-
-            value_str = """
-## **Enumerated Values**
-
-The values in the column are an enumeration of named values. These are listed below with format **[Column Value]**: [named value].
-"""
-            for value, value_name in value_schema.value.items():
-                value_str += f"""
-- **{value}:** {value_name}"""
-
-        elif type(value_schema.value) is list:
-            header_str = f"This schema describes some of the values in the {column_schema.column} column."
-
-            value_str = """
-## **Sample Values**
-
-There are too many values in this column to list exhaustively. Below is a sampling of values found in the column:
-"""
-            for value in value_schema.value:
-                value_str += f"""
-- {value}"""
-
-        if getattr(value_schema, "comparator", None) is not None:
-            comparator_str = """
-
-## **Comparators**
-
-Below is a list of comparison operators for constructing filters for this value schema:
-"""
-            if type(value_schema.comparator) is str:
-                comparator_str += f"""- {value_schema.comparator}
-"""
-            else:
-                for comp in value_schema.comparator:
-                    comparator_str += f"""- {comp}
-"""
-        else:
-            comparator_str = ""
-
-        if getattr(value_schema, "example_questions", None) is not None:
-            example_str = """## **Example Questions**
-"""
-            for i, example in enumerate(value_schema.example_questions):
-                example_str += f"""{i}. **Query:** {example.input} **Answer:** {example.output}
-"""
-        else:
-            example_str = ""
-
-        # Format prompt as string instead of ChatPromptTemplate
-        format_instructions_str = format_instructions or ""
-        prompt = f"""{system_prompt}
-
-{self.value_prompt_template}
-
-Format Instructions:
-{format_instructions_str}
-
-Header:
-{header_str}
-
-Column Schema:
-{column_schema_str}
-
-Value:
-{value_str}
-
-Comparator:
-{comparator_str}
-
-Type: {value_schema.type}
-Description: {value_schema.description}
-Usage: {value_schema.usage}
-
-Examples:
-{example_str}
-
-Query: {{query}}"""
-        return prompt
-
-    def _prepare_column_prompt(
-        self,
-        column_schema: ColumnSchema,
-        table_schema: TableSchema,
-        boolean_system_prompt: bool = True,
-    ) -> str:
-        if boolean_system_prompt is True:
-            system_prompt = self.boolean_system_prompt
-        else:
-            system_prompt = self.generative_system_prompt
-
-        prepared_table_prompt = self._prepare_table_prompt(
-            table_schema=table_schema, boolean_system_prompt=boolean_system_prompt
-        )
-        # Extract table schema string from prepared prompt (it's now a string)
-        table_schema_str = (
-            prepared_table_prompt.split("Query:")[0] if "Query:" in prepared_table_prompt else prepared_table_prompt
-        )
-
-        header_str = f"This schema describes a column in the {table_schema.table} table."
-
-        value_str = """
-## **Content**
-
-Below is a description of the contents in this column in list format:
-"""
-        for value_schema in column_schema.values.values():
-            value_str += f"""
-- {value_schema.description}
-"""
-        value_str += """
-**Important:** The above descriptions are not the actual values stored in this column. See the Value schema for actual values.
-"""
-
-        if getattr(column_schema, "examples", None) is not None:
-            example_str = """## **Example Questions**
-"""
-            for example in column_schema.examples:
-                example_str += f"""- {example}
-"""
-        else:
-            example_str = ""
-
-        # Format prompt as string instead of ChatPromptTemplate
-        prompt = f"""{system_prompt}
-
-{self.column_prompt_template}
-
-Header:
-{header_str}
-
-Table Schema:
-{table_schema_str}
-
-Column: {column_schema.column}
-Type: {column_schema.type}
-Description: {column_schema.description}
-Usage: {column_schema.usage}
-
-Values:
-{value_str}
-
-Examples:
-{example_str}
-
-Query: {{query}}"""
-        return prompt
-
-    def _prepare_table_prompt(self, table_schema: TableSchema, boolean_system_prompt: bool = True) -> str:
-        if boolean_system_prompt is True:
-            system_prompt = self.boolean_system_prompt
-        else:
-            system_prompt = self.generative_system_prompt
-
-        header_str = "This schema describes a table in the database."
-
-        columns_str = ""
-        for column_key, column_schema in table_schema.columns.items():
-            columns_str += f"""
-- **{column_schema.column}:** {column_schema.description}
-"""
-
-        if getattr(table_schema, "examples", None) is not None:
-            example_str = """## **Example Questions**
-"""
-            for example in table_schema.examples:
-                example_str += f"""- {example}
-"""
-        else:
-            example_str = ""
-
-        # Format prompt as string instead of ChatPromptTemplate
-        prompt = f"""{system_prompt}
-
-{self.table_prompt_template}
-
-Header:
-{header_str}
-
-Table: {table_schema.table}
-Description: {table_schema.description}
-Usage: {table_schema.usage}
-
-Columns:
-{columns_str}
-
-Examples:
-{example_str}
-
-Query: {{query}}"""
-        return prompt
-
-    def _rank_schema(self, prompt: str, query: str) -> float:
-        """
-        Rank schema by calling LLM with prompt and query.
-
-        Args:
-            prompt: Prompt template string with {query} placeholder
-            query: Query string
-
-        Returns:
-            Relevance score between 0 and 1
-        """
-        # Format prompt with query
-        formatted_prompt = prompt.format(query=query)
-
-        try:
-            # Call LLM - try to get logprobs if supported
-            if hasattr(self.llm, "bind") and hasattr(self.llm.bind(logprobs=True), "invoke"):
-                llm_with_logprobs = self.llm.bind(logprobs=True)
-                output = llm_with_logprobs.invoke(formatted_prompt)
-            else:
-                # Fallback to regular invoke
-                output = self.llm.invoke(formatted_prompt)
-
-            # Try to extract logprobs from response
-            score = None
-            if hasattr(output, "response_metadata") and "logprobs" in output.response_metadata:
-                logprobs = output.response_metadata["logprobs"]
-                if "content" in logprobs:
-                    for content in logprobs["content"]:
-                        token = content.get("token", "").lower().strip()
-                        logprob = content.get("logprob", 0.0)
-                        if token == "yes":
-                            score = (1 + math.exp(logprob)) / 2
-                            break
-                        elif token == "no":
-                            score = (1 - math.exp(logprob)) / 2
-                            break
-
-            # If no logprobs, try to parse yes/no from content
-            if score is None:
-                content_text = ""
-                if hasattr(output, "content"):
-                    content_text = output.content.lower().strip()
-                elif isinstance(output, str):
-                    content_text = output.lower().strip()
-                else:
-                    content_text = str(output).lower().strip()
-
-                if "yes" in content_text:
-                    score = 0.75  # Default positive score
-                elif "no" in content_text:
-                    score = 0.25  # Default negative score
-                else:
-                    score = 0.5  # Neutral score
-
-            if score is None:
-                score = 0.0
-
-        except Exception as e:
-            logger.warning(f"Error ranking schema: {e}")
-            score = 0.0
-
-        return score
-
-    def _breadth_first_search(self, query: str, greedy: bool = False) -> Tuple:
-        """Search breadth wise through Tables, then Columns, then Values.Uses a greedy strategy to maximize quota if greedy=True, otherwise a dynamic strategy."""
-
-        # sort based on priority
-        ordered_database_schema = self._sort_database_schema_by_key(
-            database_schema=self.database_schema, key=self._sort_schema_by_priority_key
-        )
-
-        #  Rank Tables ########################################################
-        greedy_count = 0
-        tables = {}
-        # rank tables by relevance
-        for table_key, table_schema in ordered_database_schema.tables.items():
-            prompt: str = self._prepare_table_prompt(table_schema=table_schema, boolean_system_prompt=True)
-            table_schema.relevance = self._rank_schema(prompt=prompt, query=query)
-
-            # only keep greedy tables
-            tables[table_key] = table_schema
-
-            if greedy:
-                if table_schema.relevance >= ordered_database_schema.filter_threshold:
-                    greedy_count += 1
-                if greedy_count >= ordered_database_schema.max_filters:
-                    break
-
-        #  sort tables
-        ordered_database_schema = self._sort_schema_by_key(
-            schema=ordered_database_schema,
-            key=self._sort_schema_by_relevance_key,
-            update=tables,
-        )
-
-        #  Rank Columns #######################################################
-        #  iterate through tables to rank columns
-        tables = {}
-        table_count = 0  # take only the top n number of tables specified by the databases max filters
-        for table_key, table_schema in ordered_database_schema.tables.items():
-            # only drop into tables above the filter threshold
-            if table_schema.relevance >= ordered_database_schema.filter_threshold:
-                greedy_count = 0
-                # rank columns by relevance
-                columns = {}
-                for column_key, column_schema in table_schema.columns.items():
-                    prompt: str = self._prepare_column_prompt(
-                        column_schema=column_schema,
-                        table_schema=table_schema,
-                        boolean_system_prompt=True,
-                    )
-                    column_schema.relevance = self._rank_schema(prompt=prompt, query=query)
-
-                    columns[column_key] = column_schema
-
-                    if greedy:
-                        if column_schema.relevance >= table_schema.filter_threshold:
-                            greedy_count += 1
-                        if greedy_count >= table_schema.max_filters:
-                            break
-
-                # sort columns and keep only columns that made the cut.
-                tables[table_key] = self._sort_schema_by_key(
-                    table_schema, key=self._sort_schema_by_relevance_key, update=columns
-                )
-
-                table_count += 1
-                if table_count >= ordered_database_schema.max_filters:
-                    break
-
-        # sort tables and keep only tables that made the cut.
-        ordered_database_schema = self._sort_schema_by_key(
-            ordered_database_schema,
-            key=self._sort_schema_by_relevance_key,
-            update=tables,
-        )
-
-        #  Rank Values ########################################################
-        #  iterate through tables to rank values
-        tables = {}
-        for table_key, table_schema in ordered_database_schema.tables.items():
-            columns = {}
-            column_count = 0
-            # iterate through columns to rank values
-            for column_key, column_schema in table_schema.columns.items():
-                if column_schema.relevance >= table_schema.filter_threshold:
-                    greedy_count = 0
-                    values = {}
-                    #  rank values by relevance
-                    for value_key, value_schema in column_schema.values.items():
-                        prompt: str = self._prepare_value_prompt(
-                            value_schema=value_schema,
-                            column_schema=column_schema,
-                            table_schema=table_schema,
-                            boolean_system_prompt=True,
-                        )
-                        value_schema.relevance = self._rank_schema(prompt=prompt, query=query)
-
-                        values[value_key] = value_schema
-
-                        if greedy:
-                            if value_schema.relevance >= column_schema.filter_threshold:
-                                greedy_count += 1
-                            if greedy_count >= column_schema.max_filters:
-                                break
-
-                    # sort values and keep only values that make the cut
-                    columns[column_key] = self._sort_schema_by_key(
-                        column_schema,
-                        key=self._sort_schema_by_relevance_key,
-                        update=values,
-                    )
-
-                    column_count += 1
-                    if column_count >= table_schema.max_filters:
-                        break
-
-            # sort columns and keep only columns that made the cut
-            tables[table_key] = self._sort_schema_by_key(
-                table_schema, key=self._sort_schema_by_relevance_key, update=columns
-            )
-
-        # sort tables and keep only tables that made the cut.
-        ordered_database_schema = self._sort_schema_by_key(
-            ordered_database_schema,
-            key=self._sort_schema_by_relevance_key,
-            update=tables,
-        )
-
-        #  discard low ranked values ###################################################################################
-        tables = {}
-        for table_key, table_schema in ordered_database_schema.tables.items():
-            columns = {}
-            # iterate through columns to rank values
-            for column_key, column_schema in table_schema.columns.items():
-                value_count = 0
-                values = {}
-                #  rank values by relevance
-                for value_key, value_schema in column_schema.values.items():
-                    if value_schema.relevance >= column_schema.filter_threshold:
-                        values[value_key] = value_schema
-
-                        value_count += 1
-                        if value_count >= column_schema.max_filters:
-                            break
-
-                # sort values and keep only values that make the cut
-                columns[column_key] = self._sort_schema_by_key(
-                    column_schema,
-                    key=self._sort_schema_by_relevance_key,
-                    update=values,
-                )
-
-            # sort columns and keep only columns that made the cut
-            tables[table_key] = self._sort_schema_by_key(
-                table_schema, key=self._sort_schema_by_relevance_key, update=columns
-            )
-
-        # sort tables and keep only tables that made the cut.
-        ordered_database_schema = self._sort_schema_by_key(
-            ordered_database_schema,
-            key=self._sort_schema_by_relevance_key,
-            update=tables,
-        )
-
-        ranked_database_schema = ordered_database_schema
-
-        #  Build Ablation #####################################################
-
-        ablation_value_dict = {}
-        # assemble a relevance dictionary
-        for table_key, table_schema in ordered_database_schema.tables.items():
-            for column_key, column_schema in table_schema.columns.items():
-                for value_key, value_schema in column_schema.values.items():
-                    ablation_value_dict[(table_key, column_key, value_key)] = value_schema.relevance
-
-        ablation_value_dict = collections.OrderedDict(sorted(ablation_value_dict.items(), key=lambda x: x[1]))
-
-        relevance_scores = list(ablation_value_dict.values())
-        if len(relevance_scores) > 0:
-            ablation_quantiles = np.quantile(relevance_scores, np.linspace(0, 1, self.num_retries + 2)[1:-1])
-        else:
-            ablation_quantiles = None
-
-        return ranked_database_schema, ablation_value_dict, ablation_quantiles
-
-    def _dynamic_ablation(
-        self,
-        metadata_filters: List[AblativeMetadataFilter],
-        ablation_value_dict,
-        ablation_quantiles,
-        retry: int,
-    ):
-        """Ablate metadata filters in aggregate by quantiles until the required minimum number of documents are returned."""
-
-        ablated_dict = {}
-        for key, value in ablation_value_dict.items():
-            if value >= ablation_quantiles[retry]:
-                ablated_dict[key] = value
-
-        #  discard low ranked filters ##################################################################################
-        ablated_filters = []
-        for filter in metadata_filters:
-            for key in ablated_dict.keys():
-                if filter.schema_table in key and filter.schema_column in key and filter.schema_value in key:
-                    ablated_filters.append(filter)
-
-        return ablated_filters
-
-    def depth_first_search(self, greedy=True):
-        """Search depth wise through Tables, then Columns, then Values. Uses a greedy strategy to maximize quota if greedy=True, otherwise a dynamic strategy."""
-        pass
-
-    def depth_first_ablation(self):
-        """Ablate metadata filters in reverse depth first search until the required minimum number of documents are returned."""
-        pass
-
-    def _prepare_retrieval_query(self, query: str) -> str:
-        """Rewrite query to be suitable for retrieval using LLM"""
-        # Format prompt with query
-        formatted_prompt = self.rewrite_prompt_template.format(input=query)
-
-        # Call LLM
-        llm_response = self.llm.invoke(formatted_prompt)
-
-        # Extract content from LLM response
-        if hasattr(llm_response, "content"):
-            return llm_response.content
-        elif isinstance(llm_response, str):
-            return llm_response
-        else:
-            return str(llm_response)
-
-    def _prepare_pgvector_query(
-        self,
-        ranked_database_schema: DatabaseSchema,
-        metadata_filters: List[AblativeMetadataFilter],
-        retry: int = 0,
-    ) -> str:
-        # Base select JOINed with document source table.
-        base_query = f"""SELECT * FROM {self.embeddings_table} AS e INNER JOIN {self.source_table} AS s ON (e.metadata->>'original_row_id')::int = s."{self.source_id_column}" """
-
-        # return an empty string if schema has not been ranked
-        if not ranked_database_schema:
-            return ""
-
-        # Add Table JOIN statements
-        join_clauses = set()
-        for metadata_filter in metadata_filters:
-            join_clause = ranked_database_schema.tables[metadata_filter.schema_table].join
-            if join_clause in join_clauses:
-                continue
-            else:
-                join_clauses.add(join_clause)
-                base_query += join_clause + " "
-
-        # Add WHERE conditions from metadata filters
-        if metadata_filters:
-            base_query += "WHERE "
-        for i, filter in enumerate(metadata_filters):
-            value = filter.value
-            if isinstance(value, str):
-                value = f"'{value}'"
-            base_query += f'"{filter.attribute}" {filter.comparator} {value}'
-            if i < len(metadata_filters) - 1:
-                base_query += " AND "
-
-        base_query += (
-            f" ORDER BY e.embeddings {self.distance_function.value[0]} '{{embeddings}}' LIMIT {self.search_kwargs.k};"
-        )
-        return base_query
-
-    def _generate_filter(self, prompt: str, query: str) -> MetadataFilter:
-        """Generate metadata filter using LLM"""
-        # Format prompt with query
-        formatted_prompt = prompt.format(query=query)
-
-        # Call LLM
-        llm_response = self.llm.invoke(formatted_prompt)
-
-        # Extract content from LLM response
-        if hasattr(llm_response, "content"):
-            response_text = llm_response.content
-        elif isinstance(llm_response, str):
-            response_text = llm_response
-        else:
-            response_text = str(llm_response)
-
-        # Parse JSON response to get MetadataFilter
-        try:
-            parsed = json.loads(response_text)
-            # If it's a dict, try to create MetadataFilter
-            if isinstance(parsed, dict):
-                return MetadataFilter(**parsed)
-            else:
-                # If it's already a MetadataFilter-like object
-                return parsed
-        except (json.JSONDecodeError, TypeError, Exception) as e:
-            logger.warning(f"Error parsing filter response: {e}")
-            # Return empty filter on error
-            return MetadataFilter(attribute="", comparator="=", value="")
-
-    def _generate_metadata_filters(
-        self, query: str, ranked_database_schema
-    ) -> Union[List[AblativeMetadataFilter], HandlerResponse]:
-        """Generate metadata filters using LLM"""
-
-        metadata_filter_list = []
-        #  iterate through tables to rank values
-        for table_key, table_schema in ranked_database_schema.tables.items():
-            # iterate through columns to rank values
-            for column_key, column_schema in table_schema.columns.items():
-                if column_schema.relevance >= table_schema.filter_threshold:
-                    #  generate filters
-                    for value_key, value_schema in column_schema.values.items():
-                        # must use generation if field is a dictionary of tuples or a list
-                        if type(value_schema.value) in [list, dict]:
-                            try:
-                                # Create format instructions for JSON output
-                                format_instructions = """Return a JSON object with the following structure:
-{
-  "attribute": "column_name",
-  "comparator": "comparison_operator",
-  "value": "filter_value"
-}"""
-
-                                metadata_prompt: str = self._prepare_value_prompt(
-                                    format_instructions=format_instructions,
-                                    value_schema=value_schema,
-                                    column_schema=column_schema,
-                                    table_schema=table_schema,
-                                    boolean_system_prompt=False,
-                                )
-
-                                # Call LLM directly
-                                formatted_prompt = metadata_prompt.format(query=query)
-                                llm_response = self.llm.invoke(formatted_prompt)
-
-                                # Extract content from LLM response
-                                if hasattr(llm_response, "content"):
-                                    metadata_filter_output = llm_response.content
-                                elif isinstance(llm_response, str):
-                                    metadata_filter_output = llm_response
-                                else:
-                                    metadata_filter_output = str(llm_response)
-
-                                # If the LLM outputs raw JSON, use it as-is.
-                                # If the LLM outputs anything including a json markdown section, use the last one.
-                                json_markdown_output = re.findall(r"```json.*?```", metadata_filter_output, re.DOTALL)
-                                if json_markdown_output:
-                                    metadata_filter_output = json_markdown_output[-1]
-                                    # Clean the json tags.
-                                    metadata_filter_output = metadata_filter_output[7:]
-                                    metadata_filter_output = metadata_filter_output[:-3]
-
-                                # Parse JSON directly instead of using PydanticOutputParser
-                                parsed = json.loads(metadata_filter_output.strip())
-                                model_dump = {
-                                    "attribute": parsed.get("attribute", ""),
-                                    "comparator": parsed.get("comparator", "="),
-                                    "value": parsed.get("value", ""),
-                                    "schema_table": table_key,
-                                    "schema_column": column_key,
-                                    "schema_value": value_key,
-                                }
-                                metadata_filter = AblativeMetadataFilter(**model_dump)
-                            except (json.JSONDecodeError, TypeError, Exception) as e:
-                                logger.warning(
-                                    f"LLM failed to generate structured metadata filters: {e}",
-                                    exc_info=logger.isEnabledFor(logging.DEBUG),
-                                )
-                                return HandlerResponse(RESPONSE_TYPE.ERROR, error_message=str(e))
-                        else:
-                            metadata_filter = AblativeMetadataFilter(
-                                attribute=column_schema.column,
-                                comparator=value_schema.comparator,
-                                value=value_schema.value,
-                                schema_table=table_key,
-                                schema_column=column_key,
-                                schema_value=value_key,
-                            )
-                        metadata_filter_list.append(metadata_filter)
-
-        return metadata_filter_list
-
-    def _prepare_and_execute_query(
-        self,
-        ranked_database_schema: DatabaseSchema,
-        metadata_filters: List[AblativeMetadataFilter],
-        embeddings_str: str,
-    ) -> HandlerResponse:
-        try:
-            checked_sql_query = self._prepare_pgvector_query(ranked_database_schema, metadata_filters)
-            checked_sql_query_with_embeddings = checked_sql_query.format(embeddings=embeddings_str)
-            return self.vector_store_handler.native_query(checked_sql_query_with_embeddings)
-        except Exception as e:
-            logger.warning(
-                f"Failed to prepare and execute SQL query from structured metadata: {e}",
-                exc_info=logger.isEnabledFor(logging.DEBUG),
-            )
-            return HandlerResponse(RESPONSE_TYPE.ERROR, error_message=str(e))
-
-    def _get_relevant_documents(self, query: str, *, run_manager: Optional[Any] = None) -> List[Any]:
-        # Rewrite query to be suitable for retrieval.
-        retrieval_query = self._prepare_retrieval_query(query)
-
-        # Embed the rewritten retrieval query & include it in the similarity search pgvector query.
-        embedded_query = self.embeddings_model.embed_query(retrieval_query)
-
-        # Search for relevant filters
-        ranked_database_schema, ablation_value_dict, ablation_quantiles = self._breadth_first_search(query=query)
-
-        # Generate metadata filters
-        metadata_filters = self._generate_metadata_filters(query=query, ranked_database_schema=ranked_database_schema)
-
-        if type(metadata_filters) is list:
-            # Initial Execution of the similarity search with metadata filters.
-            document_response = self._prepare_and_execute_query(
-                ranked_database_schema=ranked_database_schema,
-                metadata_filters=metadata_filters,
-                embeddings_str=str(embedded_query),
-            )
-            num_retries = 0
-            while num_retries < self.num_retries:
-                if (
-                    document_response.resp_type != RESPONSE_TYPE.ERROR
-                    and len(document_response.data_frame) >= self.min_k
-                ):
-                    # Successfully retrieved k documents to send to re-ranker.
-                    break
-                elif document_response.resp_type == RESPONSE_TYPE.ERROR:
-                    # LLMs won't always generate structured metadata so we should have a fallback after retrying.
-                    logger.info(f"SQL Retriever query failed with error {document_response.error_message}")
-                else:
-                    logger.info(
-                        f"SQL Retriever did not retrieve {self.min_k} documents: {len(document_response.data_frame)} documents retrieved."
-                    )
-
-                ablated_metadata_filters = self._dynamic_ablation(
-                    metadata_filters=metadata_filters,
-                    ablation_value_dict=ablation_value_dict,
-                    ablation_quantiles=ablation_quantiles,
-                    retry=num_retries,
-                )
-
-                document_response = self._prepare_and_execute_query(
-                    ranked_database_schema=ranked_database_schema,
-                    metadata_filters=ablated_metadata_filters,
-                    embeddings_str=str(embedded_query),
-                )
-
-                num_retries += 1
-
-            retrieved_documents = []
-            if document_response.resp_type != RESPONSE_TYPE.ERROR:
-                document_df = document_response.data_frame
-                for _, document_row in document_df.iterrows():
-                    retrieved_documents.append(
-                        SimpleDocument(
-                            page_content=document_row.get("content", ""),
-                            metadata=document_row.get("metadata", {}),
-                        )
-                    )
-            if retrieved_documents:
-                return retrieved_documents
-
-            # If the SQL query constructed did not return any documents, fallback.
-            logger.info("No documents returned from SQL retriever, using fallback retriever.")
-            return self._retrieve_from_fallback_retriever(retrieval_query)
-        else:
-            # If no metadata fields could be generated fallback.
-            logger.info("No metadata fields were successfully generated, using fallback retriever.")
-            return self._retrieve_from_fallback_retriever(retrieval_query)
-
-    def _retrieve_from_fallback_retriever(self, query: str) -> List[Any]:
-        """Retrieve documents from fallback retriever using duck typing"""
-        if hasattr(self.fallback_retriever, "_get_relevant_documents"):
-            return self.fallback_retriever._get_relevant_documents(query)
-        elif hasattr(self.fallback_retriever, "get_relevant_documents"):
-            return self.fallback_retriever.get_relevant_documents(query)
-        elif hasattr(self.fallback_retriever, "invoke"):
-            return self.fallback_retriever.invoke(query)
-        else:
-            raise ValueError(
-                "Fallback retriever must have _get_relevant_documents, get_relevant_documents, or invoke method"
-            )
-
-    def invoke(self, query: str) -> List[Any]:
-        """Sync invocation - retrieve documents for a query"""
-        return self._get_relevant_documents(query)
-
-    async def ainvoke(self, query: str) -> List[Any]:
-        """Async invocation - retrieve documents for a query"""
-        import asyncio
-
-        loop = asyncio.get_event_loop()
-        return await loop.run_in_executor(None, self._get_relevant_documents, query)
-
-    def get_relevant_documents(self, query: str) -> List[Any]:
-        """Get relevant documents (sync)"""
-        return self._get_relevant_documents(query)
-
-    def as_runnable(self) -> RunnableRetriever:
-        """Return self as a runnable retriever"""
-        return self
diff --git a/mindsdb/integrations/utilities/rag/settings.py b/mindsdb/integrations/utilities/rag/settings.py
index 56a8306295f..c4eb9a6a162 100644
--- a/mindsdb/integrations/utilities/rag/settings.py
+++ b/mindsdb/integrations/utilities/rag/settings.py
@@ -1,366 +1,20 @@
 from enum import Enum
-from typing import List, Union, Any, Optional, Dict, OrderedDict
+from typing import List, Any, Optional, Dict
 
-from pydantic import BaseModel, Field, field_validator, ConfigDict
-from mindsdb.integrations.utilities.rag.splitters.custom_splitters import RecursiveCharacterTextSplitter as TextSplitter
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.base_vector_store import VectorStore
+from pydantic import BaseModel, Field, ConfigDict
 
-DEFAULT_COLLECTION_NAME = "default_collection"
 
-# Multi retriever specific
-DEFAULT_ID_KEY = "doc_id"
-DEFAULT_MAX_CONCURRENCY = 5
-DEFAULT_K = 20
-
-DEFAULT_CARDINALITY_THRESHOLD = 40
-DEFAULT_MAX_SUMMARIZATION_TOKENS = 4000
 DEFAULT_CHUNK_SIZE = 1000
 DEFAULT_CHUNK_OVERLAP = 200
-DEFAULT_POOL_RECYCLE = 3600
 DEFAULT_LLM_MODEL = "gpt-4o"
+DEFAULT_LLM_ENDPOINT = "https://api.openai.com/v1"
 DEFAULT_LLM_MODEL_PROVIDER = "openai"
-DEFAULT_CONTENT_COLUMN_NAME = "body"
-DEFAULT_DATASET_DESCRIPTION = "email inbox"
-DEFAULT_TEST_TABLE_NAME = "test_email"
-DEFAULT_RERANKER_FLAG = False
 DEFAULT_RERANKING_MODEL = "gpt-4o"
-DEFAULT_LLM_ENDPOINT = "https://api.openai.com/v1"
 DEFAULT_RERANKER_N = 1
 DEFAULT_RERANKER_LOGPROBS = True
 DEFAULT_RERANKER_TOP_LOGPROBS = 4
 DEFAULT_RERANKER_MAX_TOKENS = 100
 DEFAULT_VALID_CLASS_TOKENS = ["1", "2", "3", "4"]
-DEFAULT_AUTO_META_PROMPT_TEMPLATE = """
-Below is a json representation of a table with information about {description}.
-Return a JSON list with an entry for each column. Each entry should have
-{{"name": "column name", "description": "column description", "type": "column data type"}}
-\n\n{dataframe}\n\nJSON:\n
-"""
-DEFAULT_RAG_PROMPT_TEMPLATE = """You are an assistant for
-question-answering tasks. Use the following pieces of retrieved context
-to answer the question. If you don't know the answer, just say that you
-don't know. Use two sentences maximum and keep the answer concise.
-Question: {question}
-Context: {context}
-Answer:"""
-
-DEFAULT_QA_GENERATION_PROMPT_TEMPLATE = """You are an assistant for
-generating sample questions and answers from the given document and metadata. Given
-a document and its metadata as context, generate a question and answer from that document and its metadata.
-
-The document will be a string. The metadata will be a JSON string. You need
-to parse the JSON to understand it.
-
-Generate a question that requires BOTH the document and metadata to answer, if possible.
-Otherwise, generate a question that requires ONLY the document to answer.
-
-Return a JSON dictionary with the question and answer like this:
-{{ "question": , "answer":  }}
-
-Make sure the JSON string is valid before returning it. You must return the question and answer
-in the specified JSON format no matter what.
-
-Document: {document}
-Metadata: {metadata}
-Answer:"""
-
-DEFAULT_MAP_PROMPT_TEMPLATE = """The following is a set of documents
-{docs}
-Based on this list of docs, please summarize based on the user input.
-
-User input: {input}
-
-Helpful Answer:"""
-
-DEFAULT_REDUCE_PROMPT_TEMPLATE = """The following is set of summaries:
-{docs}
-Take these and distill it into a final, consolidated summary related to the user input.
-
-User input: {input}
-
-Helpful Answer:"""
-
-DEFAULT_SEMANTIC_PROMPT_TEMPLATE = """Provide a better search query for web search engine to answer the given question.
-
-<< EXAMPLES >>
-1. Input: "Show me documents containing how to finetune a LLM please"
-Output: "how to finetune a LLM"
-
-Output only a single better search query and nothing else like in the example.
-
-Here is the user input: {input}
-"""
-
-DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE = """Construct a list of PostgreSQL metadata filters to filter documents in the database based on the user input.
-
-<< INSTRUCTIONS >>
-{format_instructions}
-
-RETURN ONLY THE FINAL JSON. DO NOT EXPLAIN, JUST RETURN THE FINAL JSON.
-
-<< TABLES YOU HAVE ACCESS TO >>
-
-{schema}
-
-<< EXAMPLES >>
-
-{examples}
-
-Here is the user input:
-{input}
-"""
-
-DEFAULT_BOOLEAN_PROMPT_TEMPLATE = """**Task:** Determine Schema Relevance for Database Search Queries
-
-As an expert in constructing database search queries, you are provided with database schemas detailing tables, columns, and values. Your task is to assess whether these elements can be used to effectively search the database in relation to a given user query.
-
-**Instructions:**
-
-- **Evaluate the Schema**:
-  - Analyze the tables, columns, and values described.
-  - Consider their potential usefulness in retrieving information pertinent to the user query.
-
-- **Decision Criteria**:
-  - Determine if any part of the schema could assist in forming a relevant search query for the information requested.
-
-- **Response**:
-  - Reply with a single word: 'yes' if the schema components are useful, otherwise 'no'.
-
-**Note:** Provide your answer based solely on the relevance of the described schema to the user query."""
-
-DEFAULT_GENERATIVE_SYSTEM_PROMPT = """You are an expert database analyst that can assist in building SQL queries by providing structured output. Follow these format instructions precisely to generate a metadata filter given the provided schema description.
-
-## Format instructions:
-{format_instructions}
- """
-
-DEFAULT_VALUE_PROMPT_TEMPLATE = """
-{column_schema}
-
-# **Value Schema**
-{header}
-
-- The type of the value: {type}
-
-## **Description**
-{description}
-
-{value}{comparator}
-
-## **Usage**
-{usage}
-
-{examples}
-
-## **Query**
-{query}
-
-"""
-
-DEFAULT_COLUMN_PROMPT_TEMPLATE = """
-{table_schema}
-
-# **Column Schema**
-{header}
-
-- The column name in the database table: {column}
-- The type of the values in this column: {type}
-
-## **Description**
-{description}
-
-## **Usage**
-{usage}
-
-{examples}
-
-## **Query**
-{query}
-"""
-
-DEFAULT_TABLE_PROMPT_TEMPLATE = """# **Table Schema**
-{header}
-
-- The name of this table in the database: {table}
-
-## **Description**
-{description}
-
-## **Usage**
-{usage}
-
-## **Column Descriptions**
-Below are descriptions of each column in this table:
-
-{columns}
-
-{examples}
-
-## **Query**
-{query}
-"""
-
-DEFAULT_SQL_PROMPT_TEMPLATE = """
-Construct a valid {dialect} SQL query to select documents relevant to the user input.
-Source documents are found in the {source_table} table. You may need to join with other tables to get additional document metadata.
-
-The JSON col "metadata" in the {embeddings_table} has a string field called "original_row_id". This "original_row_id" string field in the
-"metadata" col is the document ID associated with a row in the {embeddings_table} table.
-You MUST always join with the {embeddings_table} table containing vector embeddings for the documents. For example, for a table named sd with an id column "Id":
-JOIN {embeddings_table} v ON (v."metadata"->>'original_row_id')::int = sd."Id"
-
-You MUST always order the embeddings by the {distance_function} comparator with '{{embeddings}}'.
-You MUST always limit by {k} returned documents.
-For example:
-ORDER BY v.embeddings {distance_function} '{{embeddings}}' LIMIT {k};
-
-
-<< TABLES YOU HAVE ACCESS TO >>
-1. {embeddings_table} - Contains document chunks, vector embeddings, and metadata for documents.
-You MUST always include the metadata column in your SELECT statement.
-You MUST always join with the {embeddings_table} table containing vector embeddings for the documents.
-You MUST always order by the provided embeddings vector using the {distance_function} comparator.
-You MUST always limit by {k} returned documents.
-
-Columns:
-```json
-{{
-    "id": {{
-        "type": "string",
-        "description": "Unique ID for this document chunk"
-    }},
-    "content": {{
-        "type": "string",
-        "description": "A document chunk (subset of the original document)"
-    }},
-    "embeddings": {{
-        "type": "vector",
-        "description": "Vector embeddings for the document chunk. ALWAYS order by the provided embeddings vector using the {distance_function} comparator."
-    }},
-    "metadata": {{
-        "type": "jsonb",
-        "description": "Metadata for the document chunk. Always select metadata and always join with the {source_table} table on the string metadata field 'original_row_id'"
-    }}
-}}
-
-{schema}
-
-<< EXAMPLES >>
-
-{examples}
-
-Output the {dialect} SQL query that is ready to be executed only WITHOUT ANY DELIMITERS. Make sure to properly quote identifiers.
-
-Here is the user input:
-{input}
-"""
-
-DEFAULT_QUESTION_REFORMULATION_TEMPLATE = """Given the original question and the retrieved context,
-analyze what additional information is needed for a complete, accurate answer.
-
-Original Question: {question}
-
-Retrieved Context:
-{context}
-
-Analysis Instructions:
-1. Evaluate Context Coverage:
-   - Identify key entities and concepts from the question
-   - Check for temporal information (dates, periods, sequences)
-   - Verify causal relationships are explained
-   - Confirm presence of requested quantitative data
-   - Assess if geographic or spatial context is sufficient
-
-2. Quality Assessment:
-   If the retrieved context is:
-   - Irrelevant or tangential
-   - Too general or vague
-   - Potentially contradictory
-   - Missing key perspectives
-   - Lacking proper evidence
-   Generate questions to address these specific gaps.
-
-3. Follow-up Question Requirements:
-   - Questions must directly contribute to answering the original query
-   - Break complex relationships into simpler, sequential steps
-   - Maintain specificity rather than broad inquiries
-   - Avoid questions answerable from existing context
-   - Ensure questions build on each other logically
-   - Limit questions to 150 characters each
-   - Each question must be self-contained
-   - Questions must end with a question mark
-
-4. Response Format:
-   - Return a JSON array of strings
-   - Use square brackets and double quotes
-   - Questions must be unique (no duplicates)
-   - If context is sufficient, return empty array []
-   - Maximum 3 follow-up questions
-   - Minimum length per question: 30 characters
-   - No null values or empty strings
-
-Example:
-Original: "How did the development of antibiotics affect military casualties in WWII?"
-
-Invalid responses:
-{'questions': ['What are antibiotics?']}  // Wrong format
-['What is WWII?']  // Too basic
-['How did it impact things?']  // Too vague
-['', 'Question 2']  // Contains empty string
-['Same question?', 'Same question?']  // Duplicate
-
-Valid response:
-["What were military casualty rates from infections before widespread antibiotic use in 1942?",
- "How did penicillin availability change throughout different stages of WWII?",
- "What were the primary battlefield infections treated with antibiotics during WWII?"]
-
-or [] if context fully answers the original question.
-
-Your task: Based on the analysis of the original question and context,
-output ONLY a JSON array of follow-up questions needed to provide a complete answer.
-If no additional information is needed, output an empty array [].
-
-Follow-up Questions:"""
-
-DEFAULT_QUERY_RETRY_PROMPT_TEMPLATE = """
-{query}
-
-The {dialect} query above failed with the error message: {error}.
-
-<< TABLES YOU HAVE ACCESS TO >>
-1. {embeddings_table} - Contains document chunks, vector embeddings, and metadata for documents.
-
-Columns:
-```json
-{{
-    "id": {{
-        "type": "string",
-        "description": "Unique ID for this document chunk"
-    }},
-    "content": {{
-        "type": "string",
-        "description": "A document chunk (subset of the original document)"
-    }},
-    "embeddings": {{
-        "type": "vector",
-        "description": "Vector embeddings for the document chunk."
-    }},
-    "metadata": {{
-        "type": "jsonb",
-        "description": "Metadata for the document chunk."
-    }}
-}}
-
-{schema}
-
-Rewrite the query so it works.
-
-Output the final SQL query only.
-
-SQL Query:
-"""
-
-DEFAULT_NUM_QUERY_RETRIES = 2
 
 
 class LLMConfig(BaseModel):
@@ -373,313 +27,6 @@ class LLMConfig(BaseModel):
     model_config = ConfigDict(protected_namespaces=())
 
 
-class MultiVectorRetrieverMode(Enum):
-    """
-    Enum for MultiVectorRetriever types.
-    """
-
-    SPLIT = "split"
-    SUMMARIZE = "summarize"
-    BOTH = "both"
-
-
-class VectorStoreType(Enum):
-    CHROMA = "chromadb"
-    PGVECTOR = "pgvector"
-
-
-class VectorStoreConfig(BaseModel):
-    vector_store_type: VectorStoreType = VectorStoreType.CHROMA
-    persist_directory: str = None
-    collection_name: str = DEFAULT_COLLECTION_NAME
-    connection_string: str = None
-    kb_table: Any = None
-    is_sparse: bool = False
-    vector_size: Optional[int] = None
-
-    class Config:
-        arbitrary_types_allowed = True
-        extra = "forbid"
-
-
-class RetrieverType(str, Enum):
-    """Retriever type for RAG pipeline"""
-
-    VECTOR_STORE = "vector_store"
-    AUTO = "auto"
-    MULTI = "multi"
-    SQL = "sql"
-    MULTI_HOP = "multi_hop"
-
-
-class SearchType(Enum):
-    """
-    Enum for vector store search types.
-    """
-
-    SIMILARITY = "similarity"
-    MMR = "mmr"
-    SIMILARITY_SCORE_THRESHOLD = "similarity_score_threshold"
-
-
-class SearchKwargs(BaseModel):
-    k: int = Field(default=DEFAULT_K, description="Amount of documents to return", ge=1)
-    filter: Optional[Dict[str, Any]] = Field(default=None, description="Filter by document metadata")
-    # For similarity_score_threshold search type
-    score_threshold: Optional[float] = Field(
-        default=None,
-        description="Minimum relevance threshold for similarity_score_threshold search",
-        ge=0.0,
-        le=1.0,
-    )
-    # For MMR search type
-    fetch_k: Optional[int] = Field(default=None, description="Amount of documents to pass to MMR algorithm", ge=1)
-    lambda_mult: Optional[float] = Field(
-        default=None,
-        description="Diversity of results returned by MMR (1=min diversity, 0=max)",
-        ge=0.0,
-        le=1.0,
-    )
-
-    def model_dump(self, *args, **kwargs):
-        # Override model_dump to exclude None values by default
-        kwargs["exclude_none"] = True
-        return super().model_dump(*args, **kwargs)
-
-
-class LLMExample(BaseModel):
-    input: str = Field(description="User input for the example")
-    output: str = Field(description="What the LLM should generate for this example's input")
-
-
-class ValueSchema(BaseModel):
-    value: Union[
-        Union[str, int, float],
-        Dict[Union[str, int, float], str],
-        List[Union[str, int, float]],
-    ] = Field(
-        description="One of the following. The value as it exists in the table column. A dict of {table_value: descriptive value, ...}, where table_value is the value in the table. A list of sample values taken from the column."
-    )
-    comparator: Optional[Union[str, List[str]]] = Field(
-        description="The posgtres sql operators used to compare two values. For example: `>`, `<`, `=`, or `%`.",
-        default="=",
-    )
-    type: str = Field(
-        description="A valid postgres type for this value. One of: int, string, float, or bool. When numbers appear they should be of type int or float."
-    )
-    description: str = Field(description="Description of what the value represents.")
-    usage: str = Field(description="How and when to use this value for search.")
-    example_questions: Optional[List[LLMExample]] = Field(
-        default=None, description="Example questions where this value is set."
-    )
-    filter_threshold: Optional[float] = Field(
-        default=0.0,
-        description="Minimum relevance threshold to include metadata filters from this column.",
-        exclude=True,
-    )
-    priority: Optional[int] = Field(
-        default=0,
-        description="Priority level for this column, lower numbers will be processed first.",
-    )
-    relevance: Optional[float] = Field(
-        default=None,
-        description="Relevance computed during search. Should not be set by the end user.",
-        exclude=True,
-    )
-
-
-class MetadataConfig(BaseModel):
-    """Class to configure metadata for retrieval. Only supports very basic document name lookup at the moment."""
-
-    table: str = Field(description="Source table for metadata.")
-    max_document_context: int = Field(
-        # To work well with models with context window of 32768.
-        default=16384,
-        description="Truncate a document before using as context with an LLM if it exceeds this amount of tokens",
-    )
-    embeddings_table: str = Field(default="embeddings", description="Source table for embeddings")
-    id_column: str = Field(default="Id", description="Name of ID column in metadata table")
-    name_column: str = Field(default="Title", description="Name of column containing name or title of document")
-    name_column_index: Optional[str] = Field(default=None, description="Name of GIN index to use when looking up name.")
-    content_column: str = Field(
-        default="content", description="Name of column in embeddings table containing chunk content"
-    )
-    embeddings_metadata_column: str = Field(
-        default="metadata", description="Name of column in embeddings table containing chunk metadata"
-    )
-    doc_id_key: str = Field(
-        default="original_row_id", description="Metadata field that links an embedded chunk back to source document ID"
-    )
-
-
-class ColumnSchema(BaseModel):
-    column: str = Field(description="Name of the column in the database")
-    type: str = Field(description="Type of the column (e.g. int, string, datetime)")
-    description: str = Field(description="Description of what the column represents")
-    usage: str = Field(description="How and when to use this Table for search.")
-    values: Optional[
-        Union[
-            OrderedDict[Union[str, int, float], ValueSchema],
-            Dict[Union[str, int, float], ValueSchema],
-        ]
-    ] = Field(
-        default=None,
-        description="One of the following. A dict or ordered dict of {schema_value: ValueSchema, ...}, where schema value is the name given for this value description in the schema.",
-    )
-    example_questions: Optional[List[LLMExample]] = Field(
-        default=None, description="Example questions where this table is useful."
-    )
-    max_filters: Optional[int] = Field(default=1, description="Maximum number of filters to generate for this column.")
-    filter_threshold: Optional[float] = Field(
-        default=0.0,
-        description="Minimum relevance threshold to include metadata filters from this column.",
-    )
-    priority: Optional[int] = Field(
-        default=1,
-        description="Priority level for this column, lower numbers will be processed first.",
-    )
-    relevance: Optional[float] = Field(
-        default=None,
-        description="Relevance computed during search. Should not be set by the end user.",
-    )
-
-
-class TableSchema(BaseModel):
-    table: str = Field(description="Name of table in the database")
-    description: str = Field(description="Description of what the table represents")
-    usage: str = Field(description="How and when to use this Table for search.")
-    columns: Optional[Union[OrderedDict[str, ColumnSchema], Dict[str, ColumnSchema]]] = Field(
-        description="Dict or Ordered Dict of {column_name: ColumnSchemas} describing the metadata columns available for the table"
-    )
-    example_questions: Optional[List[LLMExample]] = Field(
-        default=None, description="Example questions where this table is useful."
-    )
-    join: str = Field(
-        description="SQL join string to join this table with source documents table",
-        default="",
-    )
-    max_filters: Optional[int] = Field(default=1, description="Maximum number of filters to generate for this table.")
-    filter_threshold: Optional[float] = Field(
-        default=0.0,
-        description="Minimum relevance required to use this table to generate filters.",
-    )
-    priority: Optional[int] = Field(
-        default=1,
-        description="Priority level for this table, lower numbers will be processed first.",
-    )
-    relevance: Optional[float] = Field(
-        default=None,
-        description="Relevance computed during search. Should not be set by the end user.",
-    )
-
-
-class DatabaseSchema(BaseModel):
-    database: str = Field(description="Name of database in the Database")
-    description: str = Field(description="Description of what the Database represents")
-    usage: str = Field(description="How and when to use this Database for search.")
-    tables: Union[OrderedDict[str, TableSchema], Dict[str, TableSchema]] = Field(
-        description="Dict of {column_name: ColumnSchemas} describing the metadata columns available for the table"
-    )
-    example_questions: Optional[List[LLMExample]] = Field(
-        default=None, description="Example questions where this Database is useful."
-    )
-    max_filters: Optional[int] = Field(
-        default=1,
-        description="Maximum number of filters to generate for this Database.",
-    )
-    filter_threshold: Optional[float] = Field(
-        default=0.0,
-        description="Minimum relevance required to use this Database to generate filters.",
-    )
-    priority: Optional[int] = Field(
-        default=0,
-        description="Priority level for this Database, lower numbers will be processed first.",
-    )
-    relevance: Optional[float] = Field(
-        default=None,
-        description="Relevance computed during search. Should not be set by the end user.",
-    )
-
-
-class SQLRetrieverConfig(BaseModel):
-    llm_config: LLMConfig = Field(
-        default_factory=LLMConfig,
-        description="LLM configuration to use for generating the final SQL query for retrieval",
-    )
-    metadata_filters_prompt_template: str = Field(
-        default=DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE,
-        description="Prompt template to generate PostgreSQL metadata filters. Has 'format_instructions', 'schema', 'examples', and 'input' input variables",
-    )
-    num_retries: int = Field(
-        default=DEFAULT_NUM_QUERY_RETRIES,
-        description="How many times for an LLM to try rewriting a failed SQL query before using the fallback retriever.",
-    )
-    rewrite_prompt_template: str = Field(
-        default=DEFAULT_SEMANTIC_PROMPT_TEMPLATE,
-        description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
-    )
-    table_prompt_template: str = Field(
-        default=DEFAULT_TABLE_PROMPT_TEMPLATE,
-        description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
-    )
-    column_prompt_template: str = Field(
-        default=DEFAULT_COLUMN_PROMPT_TEMPLATE,
-        description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
-    )
-    value_prompt_template: str = Field(
-        default=DEFAULT_VALUE_PROMPT_TEMPLATE,
-        description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
-    )
-    boolean_system_prompt: str = Field(
-        default=DEFAULT_BOOLEAN_PROMPT_TEMPLATE,
-        description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
-    )
-    generative_system_prompt: str = Field(
-        default=DEFAULT_GENERATIVE_SYSTEM_PROMPT,
-        description="Prompt template to rewrite user input to be better suited for retrieval. Has 'input' input variable.",
-    )
-    source_table: str = Field(
-        description="Name of the source table containing the original documents that were embedded"
-    )
-    source_id_column: str = Field(description="Name of the column containing the UUID.", default="Id")
-    max_filters: Optional[int] = Field(description="Maximum number of filters to generate for sql queries.", default=10)
-    filter_threshold: Optional[float] = Field(
-        description="Minimum relevance required to use this Database to generate filters.",
-        default=0.0,
-    )
-    min_k: Optional[int] = Field(
-        description="Minimum number of documents accepted from a generated sql query.",
-        default=10,
-    )
-    database_schema: Optional[DatabaseSchema] = Field(
-        default=None,
-        description="DatabaseSchema describing the database.",
-    )
-    examples: Optional[List[LLMExample]] = Field(
-        default=None,
-        description="Optional examples of final generated pgvector queries based on user input.",
-    )
-
-
-class SummarizationConfig(BaseModel):
-    llm_config: LLMConfig = Field(
-        default_factory=LLMConfig,
-        description="LLM configuration to use for summarization",
-    )
-    map_prompt_template: str = Field(
-        default=DEFAULT_MAP_PROMPT_TEMPLATE,
-        description="Prompt for an LLM to summarize a single document",
-    )
-    reduce_prompt_template: str = Field(
-        default=DEFAULT_REDUCE_PROMPT_TEMPLATE,
-        description="Prompt for an LLM to summarize a set of summaries of documents into one",
-    )
-    max_summarization_tokens: int = Field(
-        default=DEFAULT_MAX_SUMMARIZATION_TOKENS,
-        description="Max number of tokens for summarized documents",
-    )
-
-
 class RerankerMode(str, Enum):
     POINTWISE = "pointwise"
     LISTWISE = "listwise"
@@ -696,7 +43,7 @@ def _missing_(cls, value):
 
 class RerankerConfig(BaseModel):
     model: str = DEFAULT_RERANKING_MODEL
-    base_url: str = DEFAULT_LLM_ENDPOINT
+    base_url: Optional[str] = None
     filtering_threshold: float = 0.5
     num_docs_to_keep: Optional[int] = None
     mode: RerankerMode = Field(
@@ -714,144 +61,3 @@ class RerankerConfig(BaseModel):
     top_logprobs: int = DEFAULT_RERANKER_TOP_LOGPROBS  # Number of top log probabilities to include
     max_tokens: int = DEFAULT_RERANKER_MAX_TOKENS  # Maximum tokens to generate
     valid_class_tokens: List[str] = DEFAULT_VALID_CLASS_TOKENS  # Valid class tokens to look for in the response
-
-
-class MultiHopRetrieverConfig(BaseModel):
-    """Configuration for multi-hop retrieval"""
-
-    base_retriever_type: RetrieverType = Field(
-        default=RetrieverType.VECTOR_STORE,
-        description="Type of base retriever to use for multi-hop retrieval",
-    )
-    max_hops: int = Field(default=3, description="Maximum number of follow-up questions to generate", ge=1)
-    reformulation_template: str = Field(
-        default=DEFAULT_QUESTION_REFORMULATION_TEMPLATE,
-        description="Template for reformulating questions",
-    )
-    llm_config: LLMConfig = Field(
-        default_factory=LLMConfig,
-        description="LLM configuration to use for generating follow-up questions",
-    )
-
-
-class RAGPipelineModel(BaseModel):
-    documents: Optional[List[Any]] = Field(default=None, description="List of documents")
-
-    vector_store_config: VectorStoreConfig = Field(
-        default_factory=VectorStoreConfig, description="Vector store configuration"
-    )
-
-    llm: Optional[Any] = Field(default=None, description="Language model")
-    llm_model_name: str = Field(default=DEFAULT_LLM_MODEL, description="Language model name")
-    llm_provider: Optional[str] = Field(default=None, description="Language model provider")
-    vector_store: Optional[VectorStore] = Field(
-        default=None,
-        description="Vector store",
-    )
-    db_connection_string: Optional[str] = Field(default=None, description="Database connection string")
-    metadata_config: Optional[MetadataConfig] = Field(
-        default=None, description="Configuration for metadata to be used for retrieval"
-    )
-    table_name: str = Field(default=DEFAULT_TEST_TABLE_NAME, description="Table name")
-    embedding_model: Optional[Any] = Field(default=None, description="Embedding model")
-    rag_prompt_template: str = Field(default=DEFAULT_RAG_PROMPT_TEMPLATE, description="RAG prompt template")
-    retriever_prompt_template: Optional[Union[str, dict]] = Field(default=None, description="Retriever prompt template")
-    retriever_type: RetrieverType = Field(default=RetrieverType.VECTOR_STORE, description="Retriever type")
-    search_type: SearchType = Field(default=SearchType.SIMILARITY, description="Type of search to perform")
-    search_kwargs: SearchKwargs = Field(
-        default_factory=SearchKwargs,
-        description="Search configuration for the retriever",
-    )
-    summarization_config: Optional[SummarizationConfig] = Field(
-        default=None,
-        description="Configuration for summarizing retrieved documents as context",
-    )
-    # SQL retriever specific.
-    sql_retriever_config: Optional[SQLRetrieverConfig] = Field(
-        default=None,
-        description="Configuration for retrieving documents by generating SQL to filter by metadata & order by distance function",
-    )
-
-    # Multi retriever specific
-    multi_retriever_mode: MultiVectorRetrieverMode = Field(
-        default=MultiVectorRetrieverMode.BOTH, description="Multi retriever mode"
-    )
-    max_concurrency: int = Field(default=DEFAULT_MAX_CONCURRENCY, description="Maximum concurrency")
-    id_key: int = Field(default=DEFAULT_ID_KEY, description="ID key")
-    parent_store: Optional[Any] = Field(default=None, description="Parent store")
-    text_splitter: Optional[TextSplitter] = Field(default=None, description="Text splitter")
-    chunk_size: int = Field(default=DEFAULT_CHUNK_SIZE, description="Chunk size")
-    chunk_overlap: int = Field(default=DEFAULT_CHUNK_OVERLAP, description="Chunk overlap")
-
-    # Auto retriever specific
-    auto_retriever_filter_columns: Optional[List[str]] = Field(default=None, description="Filter columns")
-    cardinality_threshold: int = Field(default=DEFAULT_CARDINALITY_THRESHOLD, description="Cardinality threshold")
-    content_column_name: str = Field(
-        default=DEFAULT_CONTENT_COLUMN_NAME,
-        description="Content column name (the column we will get embeddings)",
-    )
-    dataset_description: str = Field(default=DEFAULT_DATASET_DESCRIPTION, description="Description of the dataset")
-    reranker: bool = Field(default=DEFAULT_RERANKER_FLAG, description="Whether to use reranker")
-    reranker_config: RerankerConfig = Field(default_factory=RerankerConfig, description="Reranker configuration")
-
-    multi_hop_config: Optional[MultiHopRetrieverConfig] = Field(
-        default=None,
-        description="Configuration for multi-hop retrieval. Required when retriever_type is MULTI_HOP.",
-    )
-
-    @field_validator("multi_hop_config")
-    @classmethod
-    def validate_multi_hop_config(cls, v: Optional[MultiHopRetrieverConfig], info):
-        """Validate that multi_hop_config is set when using multi-hop retrieval."""
-        values = info.data
-        if values.get("retriever_type") == RetrieverType.MULTI_HOP and v is None:
-            raise ValueError("multi_hop_config must be set when using multi-hop retrieval")
-        return v
-
-    class Config:
-        arbitrary_types_allowed = True
-        extra = "forbid"
-
-        json_schema_extra = {
-            "example": {
-                "retriever_type": RetrieverType.VECTOR_STORE.value,
-                "multi_retriever_mode": MultiVectorRetrieverMode.BOTH.value,
-                # add more examples here
-            }
-        }
-
-    @classmethod
-    def get_field_names(cls):
-        return list(cls.model_fields.keys())
-
-    @field_validator("search_kwargs")
-    @classmethod
-    def validate_search_kwargs(cls, v: SearchKwargs, info) -> SearchKwargs:
-        search_type = info.data.get("search_type", SearchType.SIMILARITY)
-
-        # Validate MMR-specific parameters
-        if search_type == SearchType.MMR:
-            if v.fetch_k is not None and v.fetch_k <= v.k:
-                raise ValueError("fetch_k must be greater than k")
-            if v.lambda_mult is not None and (v.lambda_mult < 0 or v.lambda_mult > 1):
-                raise ValueError("lambda_mult must be between 0 and 1")
-            if v.fetch_k is None and v.lambda_mult is not None:
-                raise ValueError("fetch_k is required when using lambda_mult with MMR search type")
-            if v.lambda_mult is None and v.fetch_k is not None:
-                raise ValueError("lambda_mult is required when using fetch_k with MMR search type")
-        elif search_type != SearchType.MMR:
-            if v.fetch_k is not None:
-                raise ValueError("fetch_k is only valid for MMR search type")
-            if v.lambda_mult is not None:
-                raise ValueError("lambda_mult is only valid for MMR search type")
-
-        # Validate similarity_score_threshold parameters
-        if search_type == SearchType.SIMILARITY_SCORE_THRESHOLD:
-            if v.score_threshold is not None and (v.score_threshold < 0 or v.score_threshold > 1):
-                raise ValueError("score_threshold must be between 0 and 1")
-            if v.score_threshold is None:
-                raise ValueError("score_threshold is required for similarity_score_threshold search type")
-        elif search_type != SearchType.SIMILARITY_SCORE_THRESHOLD and v.score_threshold is not None:
-            raise ValueError("score_threshold is only valid for similarity_score_threshold search type")
-
-        return v
diff --git a/mindsdb/integrations/utilities/rag/splitters/custom_splitters.py b/mindsdb/integrations/utilities/rag/splitters/custom_splitters.py
index 0d932f40f23..525bcafc99b 100644
--- a/mindsdb/integrations/utilities/rag/splitters/custom_splitters.py
+++ b/mindsdb/integrations/utilities/rag/splitters/custom_splitters.py
@@ -44,7 +44,7 @@ def split_text(self, text: str) -> List[str]:
         Split text into chunks
 
         Args:
-            text: Text to split
+            text (str): Text to split
 
         Returns:
             List of text chunks
diff --git a/mindsdb/integrations/utilities/rag/storage/__init__.py b/mindsdb/integrations/utilities/rag/storage/__init__.py
deleted file mode 100644
index 0a80c3fc8cb..00000000000
--- a/mindsdb/integrations/utilities/rag/storage/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Storage utilities for RAG pipeline"""
diff --git a/mindsdb/integrations/utilities/rag/storage/in_memory_byte_store.py b/mindsdb/integrations/utilities/rag/storage/in_memory_byte_store.py
deleted file mode 100644
index 825fd2f1a47..00000000000
--- a/mindsdb/integrations/utilities/rag/storage/in_memory_byte_store.py
+++ /dev/null
@@ -1,96 +0,0 @@
-"""Custom in-memory byte store implementation to replace langchain's InMemoryByteStore"""
-
-from typing import List, Tuple, Any, Dict
-
-
-class InMemoryByteStore:
-    """
-    Custom implementation of InMemoryByteStore to replace langchain's version.
-    Stores key-value pairs in memory using a dictionary.
-    """
-
-    def __init__(self):
-        """Initialize the in-memory store with an empty dictionary"""
-        self._store: Dict[str, Any] = {}
-
-    def mset(self, key_value_pairs: List[Tuple[str, Any]]) -> None:
-        """
-        Store multiple key-value pairs
-
-        Args:
-            key_value_pairs: List of (key, value) tuples to store
-        """
-        for key, value in key_value_pairs:
-            self._store[str(key)] = value
-
-    def mget(self, keys: List[str]) -> List[Any]:
-        """
-        Retrieve multiple values by keys
-
-        Args:
-            keys: List of keys to retrieve
-
-        Returns:
-            List of values corresponding to keys (None for missing keys)
-        """
-        return [self._store.get(str(key)) for key in keys]
-
-    def get(self, key: str, default: Any = None) -> Any:
-        """
-        Retrieve a single value by key
-
-        Args:
-            key: Key to retrieve
-            default: Default value to return if key is not found
-
-        Returns:
-            Value associated with key, or default if not found
-        """
-        return self._store.get(str(key), default)
-
-    def set(self, key: str, value: Any) -> None:
-        """
-        Store a single key-value pair
-
-        Args:
-            key: Key to store
-            value: Value to store
-        """
-        self._store[str(key)] = value
-
-    def delete(self, key: str) -> bool:
-        """
-        Delete a key-value pair
-
-        Args:
-            key: Key to delete
-
-        Returns:
-            True if key was found and deleted, False otherwise
-        """
-        key_str = str(key)
-        if key_str in self._store:
-            del self._store[key_str]
-            return True
-        return False
-
-    def clear(self) -> None:
-        """Clear all stored key-value pairs"""
-        self._store.clear()
-
-    def keys(self) -> List[str]:
-        """
-        Get all keys in the store
-
-        Returns:
-            List of all keys
-        """
-        return list(self._store.keys())
-
-    def __contains__(self, key: str) -> bool:
-        """Check if a key exists in the store"""
-        return str(key) in self._store
-
-    def __len__(self) -> int:
-        """Get the number of key-value pairs in the store"""
-        return len(self._store)
diff --git a/mindsdb/integrations/utilities/rag/utils.py b/mindsdb/integrations/utilities/rag/utils.py
deleted file mode 100644
index 7461eed1a05..00000000000
--- a/mindsdb/integrations/utilities/rag/utils.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from typing import List, Any
-
-import pandas as pd
-from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument
-
-
-def df_to_documents(df: pd.DataFrame, content_column_name: str) -> List[SimpleDocument]:
-    """
-    Given a dataframe, convert it to a list of documents.
-
-    :param df: pd.DataFrame
-    :param content_column_name: str
-
-    :return: List[SimpleDocument]
-    """
-    documents = []
-    for _, row in df.iterrows():
-        metadata = row.to_dict()
-        page_content = metadata.pop(content_column_name)
-        documents.append(SimpleDocument(page_content=page_content, metadata=metadata))
-    return documents
-
-
-def documents_to_df(
-    content_column_name: str, documents: List[Any], embedding_model: Any = None, with_embeddings: bool = False
-) -> pd.DataFrame:
-    """
-    Given a list of documents, convert it to a dataframe.
-
-    :param content_column_name: str
-    :param documents: List of document-like objects with page_content and metadata attributes
-    :param embedding_model: Embedding model with embed_documents method
-    :param with_embeddings: bool
-
-    :return: pd.DataFrame
-    """
-    df = pd.DataFrame([doc.metadata for doc in documents])
-
-    df[content_column_name] = [doc.page_content for doc in documents]
-
-    if "date" in df.columns:
-        df["date"] = pd.to_datetime(df["date"], errors="coerce")
-
-    # Reordering the columns to have the content column first.
-    df = df[[content_column_name] + [col for col in df.columns if col != content_column_name]]
-
-    if with_embeddings and embedding_model is not None:
-        df["embeddings"] = embedding_model.embed_documents(df[content_column_name].tolist())
-
-    return df
diff --git a/mindsdb/integrations/utilities/rag/vector_store.py b/mindsdb/integrations/utilities/rag/vector_store.py
deleted file mode 100644
index 69f860d29b1..00000000000
--- a/mindsdb/integrations/utilities/rag/vector_store.py
+++ /dev/null
@@ -1,93 +0,0 @@
-import time
-from datetime import timedelta
-from typing import List, Any, Optional
-
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.base_vector_store import VectorStore
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.vector_store_loader import VectorStoreLoader
-from mindsdb.integrations.utilities.rag.settings import VectorStoreConfig, SearchKwargs
-
-# gpt-3.5-turbo
-_DEFAULT_TPM_LIMIT = 60000
-_DEFAULT_RATE_LIMIT_INTERVAL = timedelta(seconds=10)
-_INITIAL_TOKEN_USAGE = 0
-
-
-class VectorStoreOperator:
-    """
-    Encapsulates the logic for adding documents to a vector store with rate limiting.
-    """
-
-    def __init__(
-        self,
-        vector_store: VectorStore,
-        embedding_model: Any,
-        documents: Optional[List[Any]] = None,
-        vector_store_config: VectorStoreConfig = None,
-        token_per_minute_limit: int = _DEFAULT_TPM_LIMIT,
-        rate_limit_interval: timedelta = _DEFAULT_RATE_LIMIT_INTERVAL,
-        search_kwargs: SearchKwargs = None,
-    ):
-        self.documents = documents
-        self.embedding_model = embedding_model
-        self.token_per_minute_limit = token_per_minute_limit
-        self.rate_limit_interval = rate_limit_interval
-        self.current_token_usage = _INITIAL_TOKEN_USAGE
-        self._vector_store = None
-        self.vector_store_config = vector_store_config
-        self.search_kwargs = search_kwargs or SearchKwargs()
-
-        self.verify_vector_store(vector_store, documents)
-
-    def verify_vector_store(self, vector_store, documents):
-        if documents:
-            self._add_documents_to_store(documents, vector_store)
-        elif isinstance(vector_store, VectorStore):
-            # checking is it instance or subclass instance
-            self._vector_store = vector_store
-        elif issubclass(vector_store, VectorStore):
-            # if it is subclass instance, then create instance of it using vector_store_config
-            self._vector_store = load_vector_store(self.embedding_model, self.vector_store_config)
-
-    @property
-    def vector_store(self):
-        return self._vector_store
-
-    @staticmethod
-    def _calculate_token_usage(document):
-        return len(document.page_content)
-
-    def _rate_limit(self):
-        if self.current_token_usage >= self.token_per_minute_limit:
-            time.sleep(self.rate_limit_interval.total_seconds())
-            self.current_token_usage = _INITIAL_TOKEN_USAGE
-
-    def _update_token_usage(self, document: Any):
-        self._rate_limit()
-        self.current_token_usage += self._calculate_token_usage(document)
-
-    def _add_document(self, document: Any):
-        self._update_token_usage(document)
-        self.vector_store.add_documents([document])
-
-    def _add_documents_to_store(self, documents: List[Any], vector_store: VectorStore):
-        self._init_vector_store(documents, vector_store)
-        self.add_documents(documents)
-
-    def _init_vector_store(self, documents: List[Any], vector_store: VectorStore):
-        if len(documents) > 0:
-            self._vector_store = vector_store.from_documents(documents=[documents[0]], embedding=self.embedding_model)
-
-    def add_documents(self, documents: List[Any]):
-        for document in documents:
-            self._add_document(document)
-
-
-def load_vector_store(embedding_model: Any, config: VectorStoreConfig) -> VectorStore:
-    """
-    Loads the vector store based on the provided config and embeddings model
-    :param embedding_model:
-    :param config:
-    :return:
-    """
-    loader = VectorStoreLoader(embedding_model=embedding_model, config=config)
-    return loader.load()
diff --git a/mindsdb/interfaces/agents/agents_controller.py b/mindsdb/interfaces/agents/agents_controller.py
index 90e809ab568..504c2891af6 100644
--- a/mindsdb/interfaces/agents/agents_controller.py
+++ b/mindsdb/interfaces/agents/agents_controller.py
@@ -1,7 +1,9 @@
 import datetime
-from typing import Dict, Iterator, List, Union, Tuple, Optional, Any
+from typing import Dict, Iterator, List, Union, Tuple, Optional, Any, Text
 import copy
 
+from enum import Enum
+from pydantic import BaseModel
 from sqlalchemy.orm.attributes import flag_modified
 from sqlalchemy import null
 import pandas as pd
@@ -13,19 +15,58 @@
 from mindsdb.interfaces.model.functions import PredictorRecordNotFound
 from mindsdb.interfaces.model.model_controller import ModelController
 from mindsdb.utilities.config import config
+from mindsdb.utilities.utils import validate_pydantic_params
 from mindsdb.utilities import log
+from mindsdb.interfaces.agents.utils.sql_toolkit import MindsDBQuery
 
 from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
 
 from .utils.constants import ASSISTANT_COLUMN, SUPPORTED_PROVIDERS, PROVIDER_TO_MODELS
 from .utils.pydantic_ai_model_factory import get_llm_provider
-
+from .pydantic_ai_agent import check_agent_llm
 
 logger = log.getLogger(__name__)
 
 default_project = config.get("default_project")
 
 
+def check_agent_data(data):
+    tables = data.get("tables", [])
+    knowledge_bases = data.get("knowledge_bases", [])
+    if tables or knowledge_bases:
+        sql_toolkit = MindsDBQuery(tables=tables, knowledge_bases=knowledge_bases)
+
+        if tables and len(sql_toolkit.get_usable_table_names(lazy=False)) == 0:
+            raise ValueError(f"No tables found: {tables}")
+
+        if knowledge_bases and len(sql_toolkit.get_usable_knowledge_base_names(lazy=False)) == 0:
+            raise ValueError(f"No knowledge bases found: {knowledge_bases}")
+
+
+class AgentParamsData(BaseModel):
+    knowledge_bases: List[str] | None = None
+    tables: List[str] | None = None
+
+    class Config:
+        extra = "forbid"
+
+
+class AgentMode(Enum):
+    TEXT = "text"
+    SQL = "sql"
+
+
+class AgentParams(BaseModel):
+    prompt_template: str | None = None
+    model: Dict[Text, Any] | None = None
+    data: AgentParamsData | None = None
+    timeout: int | None = None
+    mode: AgentMode = AgentMode.TEXT
+
+    class Config:
+        extra = "forbid"
+
+
 class AgentsController:
     """Handles CRUD operations at the database level for Agents"""
 
@@ -149,8 +190,7 @@ def add_agent(
         self,
         name: str,
         project_name: str = None,
-        model_name: Union[str, dict] = None,
-        provider: str = None,
+        model: dict = None,
         params: Dict[str, Any] = None,
     ) -> db.Agents:
         """
@@ -159,25 +199,16 @@ def add_agent(
         Parameters:
             name (str): The name of the new agent
             project_name (str): The containing project
-            model_name (str | dict): The name of the existing ML model the agent will use
-            provider (str): The provider of the model
+            model: Dict, parameters for the model to use
+                - provider: The provider of the model (e.g., 'openai', 'google')
+                - Other model-specific parameters like 'api_key', 'model_name', etc.
+
             params (Dict[str, str]): Parameters to use when running the agent
                 data: Dict, data sources for an agent, keys:
                   - knowledge_bases: List of KBs to use
                   - tables: list of tables to use
-                model: Dict, parameters for the model to use
-                  - provider: The provider of the model (e.g., 'openai', 'google')
-                  - Other model-specific parameters like 'api_key', 'model_name', etc.
                 _api_key: API key for the provider (e.g., openai_api_key)
 
-                # Deprecated parameters:
-                database: The database to use (default is 'mindsdb')
-                knowledge_base_database: The database to use for knowledge base queries (default is 'mindsdb')
-                include_tables: List of tables to include
-                ignore_tables: List of tables to ignore
-                include_knowledge_bases: List of knowledge bases to include
-                ignore_knowledge_bases: List of knowledge bases to ignore
-
         Returns:
             agent (db.Agents): The created agent
 
@@ -195,61 +226,19 @@ def add_agent(
 
         # No need to copy params since we're not preserving the original reference
         params = params or {}
+        params["model"] = model
 
-        if isinstance(model_name, dict):
-            # move into params
-            params["model"] = model_name
-            model_name = None
+        # check agent params
+        validate_pydantic_params(params, AgentParams, "agent")
 
-        if model_name is not None:
-            _, provider = self.check_model_provider(model_name, provider)
+        # check llm works
+        llm_params = self.get_agent_llm_params(model)
+        check_agent_llm(llm_params)
 
-        if model_name is None:
-            logger.warning("'model_name' param is not provided. Using default global llm model at runtime.")
-
-        # If model_name is not provided, we use default global llm model at runtime
-        # Default parameters will be applied at runtime via get_agent_llm_params
-        # This allows global default updates to apply to all agents immediately
-
-        # Extract API key if provided in the format _api_key
-        if provider is not None:
-            provider_api_key_param = f"{provider.lower()}_api_key"
-            if provider_api_key_param in params:
-                # Keep the API key in params for the agent to use
-                # It will be picked up by get_api_key() in handler_utils.py
-                pass
-
-        # Handle generic api_key parameter if provided
-        if "api_key" in params:
-            # Keep the generic API key in params for the agent to use
-            # It will be picked up by get_api_key() in handler_utils.py
-            pass
-
-        depreciated_params = [
-            "database",
-            "knowledge_base_database",
-            "include_tables",
-            "ignore_tables",
-            "include_knowledge_bases",
-            "ignore_knowledge_bases",
-        ]
-        if any(param in params for param in depreciated_params):
-            raise ValueError(
-                f"Parameters {', '.join(depreciated_params)} are deprecated. "
-                "Use 'data' parameter with 'tables' and 'knowledge_bases' keys instead."
-            )
-
-        include_tables = None
-        include_knowledge_bases = None
-        if "data" in params:
-            include_knowledge_bases = params["data"].get("knowledge_bases")
-            include_tables = params["data"].get("tables")
-
-        # Convert string parameters to lists if needed
-        if isinstance(include_tables, str):
-            include_tables = [t.strip() for t in include_tables.split(",")]
-        if isinstance(include_knowledge_bases, str):
-            include_knowledge_bases = [kb.strip() for kb in include_knowledge_bases.split(",")]
+        # check data
+        data = params.get("data", {})
+        if data:
+            check_agent_data(data)
 
         agent = db.Agents(
             name=name,
@@ -257,8 +246,6 @@ def add_agent(
             company_id=ctx.company_id,
             user_id=ctx.user_id,
             user_class=ctx.user_class,
-            model_name=model_name,
-            provider=provider,
             params=params,
         )
 
@@ -272,9 +259,8 @@ def update_agent(
         agent_name: str,
         project_name: str = default_project,
         name: str = None,
-        model_name: Union[str, dict] = None,
-        provider: str = None,
-        params: Dict[str, str] = None,
+        model: dict = None,
+        params: Dict[str, Any] = None,
     ):
         """
         Updates an agent in the database.
@@ -283,8 +269,7 @@ def update_agent(
             agent_name (str): The name of the new agent, or existing agent to update
             project_name (str): The containing project
             name (str): The updated name of the agent
-            model_name (str | dict): The name of the existing ML model the agent will use
-            provider (str): The provider of the model
+            model dict: model parameters
             params: (Dict[str, str]): Parameters to use when running the agent
 
         Returns:
@@ -301,12 +286,7 @@ def update_agent(
         existing_params = existing_agent.params or {}
 
         is_demo = (existing_agent.params or {}).get("is_demo", False)
-        if is_demo and (
-            (name is not None and name != agent_name)
-            or (model_name is not None and existing_agent.model_name != model_name)
-            or (provider is not None and existing_agent.provider != provider)
-            or (isinstance(params, dict) and len(params) > 0 and "prompt_template" not in params)
-        ):
+        if is_demo:
             raise ValueError("It is forbidden to change properties of the demo object")
 
         if name is not None and name != agent_name:
@@ -316,27 +296,34 @@ def update_agent(
                 raise EntityExistsError(f"Agent with updated name already exists: {name}")
             existing_agent.name = name
 
-        if model_name or provider:
-            if isinstance(model_name, dict):
-                # move into params
-                existing_params["model"] = model_name
-                model_name = None
-
-            # check model and provider
-            _, provider = self.check_model_provider(model_name, provider)
-            # Update model and provider
-            existing_agent.model_name = model_name
-            existing_agent.provider = provider
-
-        if params is not None:
-            # Merge params on update
-            existing_params.update(params)
-            # Remove None values entirely.
-            params = {k: v for k, v in existing_params.items() if v is not None}
-            existing_agent.params = params
-            # Some versions of SQL Alchemy won't handle JSON updates correctly without this.
-            # See: https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.attributes.flag_modified
-            flag_modified(existing_agent, "params")
+        params = params or {}
+
+        if model:
+            params["model"] = model
+
+        if params:
+            validate_pydantic_params(params, AgentParams, "agent")
+        else:
+            # do nothing
+            return existing_agent
+
+        if model:
+            # check llm works
+            llm_params = self.get_agent_llm_params(model)
+            check_agent_llm(llm_params)
+
+        data = params.get("data", {})
+        if data:
+            check_agent_data(data)
+
+        # Merge params on update
+        existing_params.update(params)
+        # Remove None values entirely.
+        params = {k: v for k, v in existing_params.items() if v is not None}
+        existing_agent.params = params
+        # Some versions of SQL Alchemy won't handle JSON updates correctly without this.
+        # See: https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.attributes.flag_modified
+        flag_modified(existing_agent, "params")
         db.session.commit()
 
         return existing_agent
@@ -362,32 +349,12 @@ def delete_agent(self, agent_name: str, project_name: str = default_project):
         agent.deleted_at = datetime.datetime.now()
         db.session.commit()
 
-    def get_agent_llm_params(self, agent):
+    def get_agent_llm_params(self, model_params):
         """
         Get agent LLM parameters by combining default config with user provided parameters.
         Uses the same pattern as knowledge bases get_model_params function.
         """
 
-        agent_params = agent.params
-
-        # Get model params from agent params (same structure as knowledge bases)
-        if "model" in agent_params:
-            model_params = agent_params.get("model", {})
-            if not isinstance(model_params, dict):
-                raise ValueError("Model parameters must be passed as a JSON object")
-        else:
-            # params for LLM can be arbitrary (backward compatibility)
-            model_params = copy.deepcopy(agent_params)
-            model_params.pop("mode", None)
-            model_params.pop("prompt_template", None)
-
-            _, provider = self.check_model_provider(agent.model_name, agent.provider)
-
-            if agent.model_name is not None:
-                model_params["model_name"] = agent.model_name
-            if provider is not None:
-                model_params["provider"] = provider
-
         combined_model_params = copy.deepcopy(config.get("default_llm", {}))
 
         if model_params:
@@ -433,7 +400,7 @@ def get_completion(
         from .pydantic_ai_agent import PydanticAIAgent
 
         # Get agent parameters and combine with default LLM parameters at runtime
-        llm_params = self.get_agent_llm_params(agent)
+        llm_params = self.get_agent_llm_params(agent.params.get("model"))
 
         pydantic_agent = PydanticAIAgent(agent, llm_params=llm_params)
 
diff --git a/mindsdb/interfaces/agents/callback_handlers.py b/mindsdb/interfaces/agents/callback_handlers.py
deleted file mode 100644
index f4735b737c7..00000000000
--- a/mindsdb/interfaces/agents/callback_handlers.py
+++ /dev/null
@@ -1,177 +0,0 @@
-import io
-import logging
-import contextlib
-from typing import Any, Dict, List, Union, Callable
-
-from langchain_core.agents import AgentAction, AgentFinish
-from langchain_core.callbacks.base import BaseCallbackHandler
-from langchain_core.messages.base import BaseMessage
-from langchain_core.outputs import LLMResult
-from langchain_core.callbacks import StdOutCallbackHandler
-
-
-class ContextCaptureCallback(BaseCallbackHandler):
-    def __init__(self):
-        self.context = None
-
-    def on_retriever_end(self, documents: List[Any], *, run_id: str, parent_run_id: Union[str, None] = None, **kwargs: Any) -> Any:
-        self.context = [{
-            'page_content': doc.page_content,
-            'metadata': doc.metadata
-        } for doc in documents]
-
-    def get_contexts(self):
-        return self.context
-
-
-class VerboseLogCallbackHandler(StdOutCallbackHandler):
-    def __init__(self, logger: logging.Logger, verbose: bool):
-        self.logger = logger
-        self.verbose = verbose
-        super().__init__()
-
-    def __call(self, method: Callable, *args: List[Any], **kwargs: Any) -> Any:
-        if self.verbose is False:
-            return
-        f = io.StringIO()
-        with contextlib.redirect_stdout(f):
-            method(*args, **kwargs)
-        output = f.getvalue()
-        self.logger.info(output)
-
-    def on_chain_start(self, *args: List[Any], **kwargs: Any) -> None:
-        self.__call(super().on_chain_start, *args, **kwargs)
-
-    def on_chain_end(self, *args: List[Any], **kwargs: Any) -> None:
-        self.__call(super().on_chain_end, *args, **kwargs)
-
-    def on_agent_action(self, *args: List[Any], **kwargs: Any) -> None:
-        self.__call(super().on_agent_action, *args, **kwargs)
-
-    def on_tool_end(self, *args: List[Any], **kwargs: Any) -> None:
-        self.__call(super().on_tool_end, *args, **kwargs)
-
-    def on_text(self, *args: List[Any], **kwargs: Any) -> None:
-        self.__call(super().on_text, *args, **kwargs)
-
-    def on_agent_finish(self, *args: List[Any], **kwargs: Any) -> None:
-        self.__call(super().on_agent_finish, *args, **kwargs)
-
-
-class LogCallbackHandler(BaseCallbackHandler):
-    '''Langchain callback handler that logs agent and chain executions.'''
-
-    def __init__(self, logger: logging.Logger, verbose: bool = True):
-        logger.setLevel('DEBUG')
-        self.logger = logger
-        self._num_running_chains = 0
-        self.generated_sql = None
-        self.verbose_log_handler = VerboseLogCallbackHandler(logger, verbose)
-
-    def on_llm_start(
-        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
-    ) -> Any:
-        '''Run when LLM starts running.'''
-        self.logger.debug('LLM started with prompts:')
-        for prompt in prompts:
-            self.logger.debug(prompt[:50])
-        self.verbose_log_handler.on_llm_start(serialized, prompts, **kwargs)
-
-    def on_chat_model_start(
-            self,
-            serialized: Dict[str, Any],
-            messages: List[List[BaseMessage]], **kwargs: Any
-    ) -> Any:
-        '''Run when Chat Model starts running.'''
-        self.logger.debug('Chat model started with messages:')
-        for message_list in messages:
-            for message in message_list:
-                self.logger.debug(message.pretty_repr())
-
-    def on_llm_new_token(self, token: str, **kwargs: Any) -> Any:
-        '''Run on new LLM token. Only available when streaming is enabled.'''
-        pass
-
-    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> Any:
-        '''Run when LLM ends running.'''
-        self.logger.debug('LLM ended with response:')
-        self.logger.debug(str(response.llm_output))
-
-    def on_llm_error(
-        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
-    ) -> Any:
-        '''Run when LLM errors.'''
-        self.logger.debug(f'LLM encountered an error: {str(error)}')
-
-    def on_chain_start(
-        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
-    ) -> Any:
-        '''Run when chain starts running.'''
-        self._num_running_chains += 1
-        self.logger.info('Entering new LLM chain ({} total)'.format(
-            self._num_running_chains))
-        self.logger.debug('Inputs: {}'.format(inputs))
-
-        self.verbose_log_handler.on_chain_start(serialized=serialized, inputs=inputs, **kwargs)
-
-    def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
-        '''Run when chain ends running.'''
-        self._num_running_chains -= 1
-        self.logger.info('Ended LLM chain ({} total)'.format(
-            self._num_running_chains))
-        self.logger.debug('Outputs: {}'.format(outputs))
-
-        self.verbose_log_handler.on_chain_end(outputs=outputs, **kwargs)
-
-    def on_chain_error(
-        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
-    ) -> Any:
-        '''Run when chain errors.'''
-        self._num_running_chains -= 1
-        self.logger.error(
-            'LLM chain encountered an error ({} running): {}'.format(
-                self._num_running_chains, error))
-
-    def on_tool_start(
-        self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
-    ) -> Any:
-        '''Run when tool starts running.'''
-        pass
-
-    def on_tool_end(self, output: str, **kwargs: Any) -> Any:
-        '''Run when tool ends running.'''
-        self.verbose_log_handler.on_tool_end(output=output, **kwargs)
-
-    def on_tool_error(
-        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
-    ) -> Any:
-        '''Run when tool errors.'''
-        pass
-
-    def on_text(self, text: str, **kwargs: Any) -> Any:
-        '''Run on arbitrary text.'''
-        self.verbose_log_handler.on_text(text=text, **kwargs)
-
-    def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
-        '''Run on agent action.'''
-        self.logger.debug(f'Running tool {action.tool} with input:')
-        self.logger.debug(action.tool_input)
-
-        stop_block = 'Observation: '
-        if stop_block in action.tool_input:
-            action.tool_input = action.tool_input[: action.tool_input.find(stop_block)]
-
-        if action.tool.startswith("sql_db_query"):
-            # Save the generated SQL query
-            self.generated_sql = action.tool_input
-
-        # fix for mistral
-        action.tool = action.tool.replace('\\', '')
-
-        self.verbose_log_handler.on_agent_action(action=action, **kwargs)
-
-    def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:
-        '''Run on agent end.'''
-        self.logger.debug('Agent finished with return values:')
-        self.logger.debug(str(finish.return_values))
-        self.verbose_log_handler.on_agent_finish(finish=finish, **kwargs)
diff --git a/mindsdb/interfaces/agents/event_dispatch_callback_handler.py b/mindsdb/interfaces/agents/event_dispatch_callback_handler.py
deleted file mode 100644
index 7446ba2adaa..00000000000
--- a/mindsdb/interfaces/agents/event_dispatch_callback_handler.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import queue
-from typing import Any, Dict, List, Optional, Sequence
-from uuid import UUID
-
-from langchain_core.callbacks import BaseCallbackHandler
-from langchain_core.documents import Document
-
-
-class EventDispatchCallbackHandler(BaseCallbackHandler):
-    '''Puts dispatched events onto an event queue to be processed as a streaming chunk'''
-    def __init__(self, queue: queue.Queue):
-        self.queue = queue
-
-    def on_custom_event(
-        self,
-        name: str,
-        data: Any,
-        *,
-        run_id: UUID,
-        tags: Optional[List[str]] = None,
-        metadata: Optional[Dict[str, Any]] = None,
-        **kwargs
-    ):
-        self.queue.put({
-            'type': 'event',
-            'name': name,
-            'data': data
-        })
-
-    def on_retriever_end(
-        self,
-        documents: Sequence[Document],
-        *,
-        run_id: UUID,
-        parent_run_id: Optional[UUID] = None,
-        **kwargs: Any,
-    ) -> Any:
-        document_objects = []
-        for d in documents:
-            document_objects.append({
-                'content': d.page_content,
-                'metadata': d.metadata
-            })
-        self.queue.put({
-            'type': 'event',
-            'name': 'retriever_end',
-            'data': {
-                'documents': document_objects
-            }
-        })
diff --git a/mindsdb/interfaces/agents/langfuse_callback_handler.py b/mindsdb/interfaces/agents/langfuse_callback_handler.py
deleted file mode 100644
index 948eadda6b8..00000000000
--- a/mindsdb/interfaces/agents/langfuse_callback_handler.py
+++ /dev/null
@@ -1,308 +0,0 @@
-from typing import Any, Dict, Union, Optional, List
-from uuid import uuid4
-import datetime
-import json
-
-from langchain_core.callbacks.base import BaseCallbackHandler
-
-from mindsdb.utilities import log
-from mindsdb.interfaces.storage import db
-
-logger = log.getLogger(__name__)
-logger.setLevel('DEBUG')
-
-
-class LangfuseCallbackHandler(BaseCallbackHandler):
-    """Langchain callback handler that traces tool & chain executions using Langfuse."""
-
-    def __init__(self, langfuse, trace_id: Optional[str] = None, observation_id: Optional[str] = None):
-        self.langfuse = langfuse
-        self.chain_uuid_to_span = {}
-        self.action_uuid_to_span = {}
-        # if these are not available, we generate some UUIDs
-        self.trace_id = trace_id or uuid4().hex
-        self.observation_id = observation_id or uuid4().hex
-        # Track metrics about tools and chains
-        self.tool_metrics = {}
-        self.chain_metrics = {}
-        self.current_chain = None
-
-    def on_tool_start(
-            self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
-    ) -> Any:
-        """Run when tool starts running."""
-        parent_run_uuid = kwargs.get('parent_run_id', uuid4()).hex
-        action_span = self.action_uuid_to_span.get(parent_run_uuid)
-        if action_span is None:
-            return
-
-        tool_name = serialized.get("name", "tool")
-        start_time = datetime.datetime.now()
-
-        # Initialize or update tool metrics
-        if tool_name not in self.tool_metrics:
-            self.tool_metrics[tool_name] = {
-                'count': 0,
-                'total_time': 0,
-                'errors': 0,
-                'last_error': None,
-                'inputs': []
-            }
-
-        self.tool_metrics[tool_name]['count'] += 1
-        self.tool_metrics[tool_name]['inputs'].append(input_str)
-
-        metadata = {
-            'tool_name': tool_name,
-            'started': start_time.isoformat(),
-            'start_timestamp': start_time.timestamp(),
-            'input_length': len(input_str) if input_str else 0
-        }
-        action_span.update(metadata=metadata)
-
-    def on_tool_end(self, output: str, **kwargs: Any) -> Any:
-        """Run when tool ends running."""
-        parent_run_uuid = kwargs.get('parent_run_id', uuid4()).hex
-        action_span = self.action_uuid_to_span.get(parent_run_uuid)
-        if action_span is None:
-            return
-
-        end_time = datetime.datetime.now()
-        tool_name = action_span.metadata.get('tool_name', 'unknown')
-        start_timestamp = action_span.metadata.get('start_timestamp')
-
-        if start_timestamp:
-            duration = end_time.timestamp() - start_timestamp
-            if tool_name in self.tool_metrics:
-                self.tool_metrics[tool_name]['total_time'] += duration
-
-        metadata = {
-            'finished': end_time.isoformat(),
-            'duration_seconds': duration if start_timestamp else None,
-            'output_length': len(output) if output else 0
-        }
-
-        action_span.update(
-            output=output,  # tool output is action output (unless superseded by a global action output)
-            metadata=metadata
-        )
-
-    def on_tool_error(
-            self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
-    ) -> Any:
-        """Run when tool errors."""
-        parent_run_uuid = kwargs.get('parent_run_id', uuid4()).hex
-        action_span = self.action_uuid_to_span.get(parent_run_uuid)
-        if action_span is None:
-            return
-
-        try:
-            error_str = str(error)
-        except Exception:
-            error_str = "Couldn't get error string."
-
-        tool_name = action_span.metadata.get('tool_name', 'unknown')
-        if tool_name in self.tool_metrics:
-            self.tool_metrics[tool_name]['errors'] += 1
-            self.tool_metrics[tool_name]['last_error'] = error_str
-
-        metadata = {
-            'error_description': error_str,
-            'error_type': error.__class__.__name__,
-            'error_time': datetime.datetime.now().isoformat()
-        }
-        action_span.update(metadata=metadata)
-
-    def on_chain_start(
-            self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
-    ) -> Any:
-        """Run when chain starts running."""
-        if self.langfuse is None:
-            return
-
-        run_uuid = kwargs.get('run_id', uuid4()).hex
-
-        if serialized is None:
-            serialized = {}
-
-        chain_name = serialized.get("name", "chain")
-        start_time = datetime.datetime.now()
-
-        # Initialize or update chain metrics
-        if chain_name not in self.chain_metrics:
-            self.chain_metrics[chain_name] = {
-                'count': 0,
-                'total_time': 0,
-                'errors': 0,
-                'last_error': None
-            }
-
-        self.chain_metrics[chain_name]['count'] += 1
-        self.current_chain = chain_name
-
-        try:
-            chain_span = self.langfuse.span(
-                name=f'{chain_name}-{run_uuid}',
-                trace_id=self.trace_id,
-                parent_observation_id=self.observation_id,
-                input=json.dumps(inputs, indent=2)
-            )
-
-            metadata = {
-                'chain_name': chain_name,
-                'started': start_time.isoformat(),
-                'start_timestamp': start_time.timestamp(),
-                'input_keys': list(inputs.keys()) if isinstance(inputs, dict) else None,
-                'input_size': len(inputs) if isinstance(inputs, dict) else len(str(inputs))
-            }
-            chain_span.update(metadata=metadata)
-            self.chain_uuid_to_span[run_uuid] = chain_span
-        except Exception as e:
-            logger.warning(f"Error creating Langfuse span: {str(e)}")
-
-    def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
-        """Run when chain ends running."""
-        if self.langfuse is None:
-            return
-
-        chain_uuid = kwargs.get('run_id', uuid4()).hex
-        if chain_uuid not in self.chain_uuid_to_span:
-            return
-        chain_span = self.chain_uuid_to_span.pop(chain_uuid)
-        if chain_span is None:
-            return
-
-        try:
-            end_time = datetime.datetime.now()
-            chain_name = chain_span.metadata.get('chain_name', 'unknown')
-            start_timestamp = chain_span.metadata.get('start_timestamp')
-
-            if start_timestamp and chain_name in self.chain_metrics:
-                duration = end_time.timestamp() - start_timestamp
-                self.chain_metrics[chain_name]['total_time'] += duration
-
-            metadata = {
-                'finished': end_time.isoformat(),
-                'duration_seconds': duration if start_timestamp else None,
-                'output_keys': list(outputs.keys()) if isinstance(outputs, dict) else None,
-                'output_size': len(outputs) if isinstance(outputs, dict) else len(str(outputs))
-            }
-            chain_span.update(output=json.dumps(outputs, indent=2), metadata=metadata)
-            chain_span.end()
-        except Exception as e:
-            logger.warning(f"Error updating Langfuse span: {str(e)}")
-
-    def on_chain_error(self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any) -> Any:
-        """Run when chain errors."""
-        chain_uuid = kwargs.get('run_id', uuid4()).hex
-        if chain_uuid not in self.chain_uuid_to_span:
-            return
-        chain_span = self.chain_uuid_to_span.get(chain_uuid)
-        if chain_span is None:
-            return
-
-        try:
-            error_str = str(error)
-        except Exception:
-            error_str = "Couldn't get error string."
-
-        chain_name = chain_span.metadata.get('chain_name', 'unknown')
-        if chain_name in self.chain_metrics:
-            self.chain_metrics[chain_name]['errors'] += 1
-            self.chain_metrics[chain_name]['last_error'] = error_str
-
-        metadata = {
-            'error_description': error_str,
-            'error_type': error.__class__.__name__,
-            'error_time': datetime.datetime.now().isoformat()
-        }
-        chain_span.update(metadata=metadata)
-
-    def on_agent_action(self, action, **kwargs: Any) -> Any:
-        """Run on agent action."""
-        if self.langfuse is None:
-            return
-
-        run_uuid = kwargs.get('run_id', uuid4()).hex
-        try:
-            action_span = self.langfuse.span(
-                name=f'{getattr(action, "type", "action")}-{getattr(action, "tool", "")}-{run_uuid}',
-                trace_id=self.trace_id,
-                parent_observation_id=self.observation_id,
-                input=str(action)
-            )
-            self.action_uuid_to_span[run_uuid] = action_span
-        except Exception as e:
-            logger.warning(f"Error creating Langfuse span for agent action: {str(e)}")
-
-    def on_agent_finish(self, finish, **kwargs: Any) -> Any:
-        """Run on agent end."""
-        if self.langfuse is None:
-            return
-
-        run_uuid = kwargs.get('run_id', uuid4()).hex
-        if run_uuid not in self.action_uuid_to_span:
-            return
-        action_span = self.action_uuid_to_span.pop(run_uuid)
-        if action_span is None:
-            return
-
-        try:
-            if finish is not None:
-                action_span.update(output=finish)  # supersedes tool output
-            action_span.end()
-        except Exception as e:
-            logger.warning(f"Error updating Langfuse span: {str(e)}")
-
-    def auth_check(self):
-        if self.langfuse is not None:
-            return self.langfuse.auth_check()
-        return False
-
-    def get_metrics(self) -> Dict[str, Any]:
-        """Get collected metrics about tools and chains.
-
-        Returns:
-            Dict containing:
-            - tool_metrics: Statistics about tool usage, errors, and timing
-            - chain_metrics: Statistics about chain execution, errors, and timing
-            For each tool/chain, includes:
-                - count: Number of times used
-                - total_time: Total execution time
-                - errors: Number of errors
-                - last_error: Most recent error message
-                - avg_duration: Average execution time
-        """
-        metrics = {
-            'tool_metrics': {},
-            'chain_metrics': {}
-        }
-
-        # Process tool metrics
-        for tool_name, data in self.tool_metrics.items():
-            metrics['tool_metrics'][tool_name] = {
-                'count': data['count'],
-                'total_time': data['total_time'],
-                'avg_duration': data['total_time'] / data['count'] if data['count'] > 0 else 0,
-                'errors': data['errors'],
-                'last_error': data['last_error'],
-                'error_rate': data['errors'] / data['count'] if data['count'] > 0 else 0
-            }
-
-        # Process chain metrics
-        for chain_name, data in self.chain_metrics.items():
-            metrics['chain_metrics'][chain_name] = {
-                'count': data['count'],
-                'total_time': data['total_time'],
-                'avg_duration': data['total_time'] / data['count'] if data['count'] > 0 else 0,
-                'errors': data['errors'],
-                'last_error': data['last_error'],
-                'error_rate': data['errors'] / data['count'] if data['count'] > 0 else 0
-            }
-
-        return metrics
-
-
-def get_skills(agent: db.Agents) -> List:
-    """ Retrieve skills from agent `skills` attribute. Specific to agent endpoints. """
-    return [rel.skill.type for rel in agent.skills_relationships]
diff --git a/mindsdb/interfaces/agents/modes/base.py b/mindsdb/interfaces/agents/modes/base.py
index 97376b2a2af..1ec13d9242c 100644
--- a/mindsdb/interfaces/agents/modes/base.py
+++ b/mindsdb/interfaces/agents/modes/base.py
@@ -8,6 +8,10 @@ class PlanResponse(BaseModel):
     estimated_steps: int = Field(..., description="Estimated number of steps needed to solve the question")
 
 
+class TestResponse(BaseModel):
+    text: str = Field(..., description="Text response to the user")
+
+
 class ResponseType:
     FINAL_QUERY = "final_query"  # this is the final query
     EXPLORATORY = "exploratory_query"  # this is a query to explore and collect info to solve the challenge (e.g., distinct values of a categorical column, schema inference, etc.)
diff --git a/mindsdb/interfaces/agents/provider_utils.py b/mindsdb/interfaces/agents/provider_utils.py
deleted file mode 100644
index 8447102fb1b..00000000000
--- a/mindsdb/interfaces/agents/provider_utils.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""Utilities for working with agent providers.
-
-These helpers are intentionally free of heavy optional dependencies so they can
-be imported in lightweight builds where LangChain is not installed.
-"""
-
-from typing import Dict
-
-from mindsdb.interfaces.agents.constants import (
-    ANTHROPIC_CHAT_MODELS,
-    GOOGLE_GEMINI_CHAT_MODELS,
-    NVIDIA_NIM_CHAT_MODELS,
-    OLLAMA_CHAT_MODELS,
-    OPEN_AI_CHAT_MODELS,
-    WRITER_CHAT_MODELS,
-)
-
-
-def get_llm_provider(args: Dict) -> str:
-    """Infer the LLM provider from the supplied arguments."""
-
-    # Prefer an explicitly provided provider.
-    if "provider" in args:
-        return args["provider"]
-
-    model_name = args.get("model_name")
-    if model_name in ANTHROPIC_CHAT_MODELS:
-        return "anthropic"
-    if model_name in OPEN_AI_CHAT_MODELS:
-        return "openai"
-    if model_name in OLLAMA_CHAT_MODELS:
-        return "ollama"
-    if model_name in NVIDIA_NIM_CHAT_MODELS:
-        return "nvidia_nim"
-    if model_name in GOOGLE_GEMINI_CHAT_MODELS:
-        return "google"
-    if model_name in WRITER_CHAT_MODELS:
-        return "writer"
-
-    raise ValueError("Invalid model name. Please define a supported llm provider")
diff --git a/mindsdb/interfaces/agents/pydantic_ai_agent.py b/mindsdb/interfaces/agents/pydantic_ai_agent.py
index f32fb3d8ed3..7a38c57b165 100644
--- a/mindsdb/interfaces/agents/pydantic_ai_agent.py
+++ b/mindsdb/interfaces/agents/pydantic_ai_agent.py
@@ -25,7 +25,7 @@
 from mindsdb.utilities.context import context as ctx
 from mindsdb.utilities.langfuse import LangfuseClientWrapper
 from mindsdb.interfaces.agents.modes import sql as sql_mode, text_sql as text_sql_mode
-from mindsdb.interfaces.agents.modes.base import ResponseType, PlanResponse
+from mindsdb.interfaces.agents.modes.base import ResponseType, PlanResponse, TestResponse
 
 logger = log.getLogger(__name__)
 DEBUG_LOGGER = logger.debug
@@ -65,6 +65,12 @@ def wrapper(self, messages, *args, **kwargs):
     return decorator
 
 
+def check_agent_llm(llm_params):
+    model = get_model_instance_from_kwargs(llm_params)
+    agent = Agent(model, output_type=TestResponse)
+    agent.run_sync("Say 'hi'")
+
+
 class PydanticAIAgent:
     """Pydantic AI-based agent to replace LangchainAgent"""
 
@@ -87,10 +93,6 @@ def __init__(
         self.llm: Optional[object] = None
         self.embedding_model: Optional[object] = None
 
-        self.log_callback_handler: Optional[object] = None
-        self.langfuse_callback_handler: Optional[object] = None
-        self.mdb_langfuse_callback_handler: Optional[object] = None
-
         self.langfuse_client_wrapper = LangfuseClientWrapper()
         self.agent_mode = self.agent.params.get("mode", "text")
 
diff --git a/mindsdb/interfaces/agents/utils/pydantic_ai_model_factory.py b/mindsdb/interfaces/agents/utils/pydantic_ai_model_factory.py
index 8189542312e..aa72d5e2f99 100644
--- a/mindsdb/interfaces/agents/utils/pydantic_ai_model_factory.py
+++ b/mindsdb/interfaces/agents/utils/pydantic_ai_model_factory.py
@@ -52,7 +52,7 @@ def get_llm_provider(args: Dict) -> str:
         return "writer"
 
     # For vLLM, require explicit provider specification
-    raise ValueError("Invalid model name. Please define a supported llm provider")
+    raise ValueError(f"Invalid model name: {model_name}. Please define a supported llm provider")
 
 
 def get_embedding_model_provider(args: Dict) -> str:
diff --git a/mindsdb/interfaces/agents/utils/sql_toolkit.py b/mindsdb/interfaces/agents/utils/sql_toolkit.py
index 1468bc2fb75..9588ad6ecba 100644
--- a/mindsdb/interfaces/agents/utils/sql_toolkit.py
+++ b/mindsdb/interfaces/agents/utils/sql_toolkit.py
@@ -281,11 +281,11 @@ def _check_f(node, is_table=None, **kwargs):
 
         query_traversal(ast_query, _check_f)
 
-    def get_usable_table_names(self):
+    def get_usable_table_names(self, lazy=True):
         if not self.tables:
             # no tables allowed
             return []
-        if not self.tables.has_wildcard:
+        if not self.tables.has_wildcard and lazy:
             return self.tables.items
 
         result_tables = []
@@ -330,16 +330,19 @@ def get_usable_table_names(self):
 
         return result_tables
 
-    def get_usable_knowledge_base_names(self):
+    def get_usable_knowledge_base_names(self, lazy=True):
         if not self.knowledge_bases:
             # no tables allowed
             return []
-        if not self.knowledge_bases.has_wildcard:
+        if not self.knowledge_bases.has_wildcard and lazy:
             return self.knowledge_bases.items
 
         try:
             # Query to get all knowledge bases
-            ast_query = Show(category="Knowledge Bases")
+            ast_query = Select(
+                targets=[Identifier("PROJECT"), Identifier("NAME")],
+                from_table=Identifier(parts=["information_schema", "knowledge_bases"]),
+            )
             result = self.command_executor.execute_command(ast_query)
 
             kb_names = []
diff --git a/mindsdb/interfaces/database/database.py b/mindsdb/interfaces/database/database.py
index bbacc9c256a..3f0fb602ace 100644
--- a/mindsdb/interfaces/database/database.py
+++ b/mindsdb/interfaces/database/database.py
@@ -101,11 +101,15 @@ def get_dict(self, filter_type: Optional[str] = None, lowercase: bool = True):
 
     def get_integration(self, integration_id):
         # get integration by id
-
-        # TODO get directly from db?
-        for rec in self.get_list():
-            if rec["id"] == integration_id and rec["type"] == "data":
-                return {"name": rec["name"], "type": rec["type"], "engine": rec["engine"], "id": rec["id"]}
+        integration = self.integration_controller.get_by_id(integration_id)
+        if integration and integration.get("type", "data") == "data":
+            return {
+                "name": integration["name"],
+                "type": integration["type"],
+                "engine": integration["engine"],
+                "id": integration["id"],
+            }
+        return None
 
     def exists(self, db_name: str) -> bool:
         return db_name.lower() in self.get_dict()
diff --git a/mindsdb/interfaces/database/integrations.py b/mindsdb/interfaces/database/integrations.py
index 867686aad02..5605ee04af5 100644
--- a/mindsdb/interfaces/database/integrations.py
+++ b/mindsdb/interfaces/database/integrations.py
@@ -1,11 +1,13 @@
 import os
 import sys
+import types
 import base64
 import shutil
 import ast
 import time
 import tempfile
 import importlib
+import importlib.util as iutil
 import threading
 from pathlib import Path
 from copy import deepcopy
@@ -33,6 +35,13 @@
 from mindsdb.integrations.libs.base import BaseHandler
 import mindsdb.utilities.profiler as profiler
 
+from mindsdb.integrations.utilities.community_handler_fetcher import (
+    community_handlers_enabled,
+    fetch_handler,
+    get_community_handlers_storage_dir,
+    list_available_handlers,
+)
+
 logger = log.getLogger(__name__)
 
 
@@ -152,9 +161,19 @@ def _is_not_empty_str(s):
 
     def __init__(self):
         self._import_lock = threading.Lock()
+        self._community_handlers_dir = None
         self._load_handler_modules()
         self.handlers_cache = HandlersCache()
 
+    @property
+    def community_handlers_dir(self) -> Path:
+        """Returns (and creates) the community handlers directory under MINDSDB_STORAGE_DIR."""
+        if self._community_handlers_dir is None:
+            config = Config()
+            storage_root = Path(config["paths"]["root"])
+            self._community_handlers_dir = get_community_handlers_storage_dir(storage_root)
+        return self._community_handlers_dir
+
     def _add_integration_record(self, name, engine, connection_args):
         integration_record = db.Integration(
             name=name, engine=engine, data=connection_args or {}, company_id=ctx.company_id
@@ -698,45 +717,95 @@ def _get_handler_icon(self, handler_dir, icon_path):
             logger.error(f"Error reading icon for {handler_dir}, {e}!")
         return icon
 
+    def _register_handler_dir(self, handler_dir: Path, is_community: bool = False):
+        """
+        Parse a handler directory and register it in handlers_import_status.
+        If the handler can be imported, its module is added to handler_modules and import status is set to success.
+        If the handler cannot be imported, import status is set to failed with error message."""
+        handler_info = self._get_handler_info(handler_dir)
+        if "name" not in handler_info:
+            return
+        handler_name = handler_info["name"]
+        dependencies = self._read_dependencies(handler_dir)
+        handler_meta = {
+            "path": handler_dir,
+            "import": {
+                "success": None,
+                "error_message": None,
+                "folder": handler_dir.name,
+                "dependencies": dependencies,
+            },
+            "name": handler_name,
+            "permanent": handler_info.get("permanent", False),
+            "connection_args": handler_info.get("connection_args", None),
+            "class_type": handler_info.get("class_type", None),
+            "type": handler_info.get("type"),
+            "support_level": handler_info.get("support_level"),
+            "community": is_community,
+        }
+        if "icon_path" in handler_info:
+            icon = self._get_handler_icon(handler_dir, handler_info["icon_path"])
+            if icon:
+                handler_meta["icon"] = icon
+        self.handlers_import_status[handler_name] = handler_meta
+
     def _load_handler_modules(self):
-        mindsdb_path = Path(importlib.util.find_spec("mindsdb").origin).parent
+        mindsdb_path = Path(iutil.find_spec("mindsdb").origin).parent
         handlers_path = mindsdb_path.joinpath("integrations/handlers")
 
         # edge case: running from tests directory, find_spec finds the base folder instead of actual package
         if not os.path.isdir(handlers_path):
-            mindsdb_path = Path(importlib.util.find_spec("mindsdb").origin).parent.joinpath("mindsdb")
+            mindsdb_path = Path(iutil.find_spec("mindsdb").origin).parent.joinpath("mindsdb")
             handlers_path = mindsdb_path.joinpath("integrations/handlers")
 
         self.handler_modules = {}
         self.handlers_import_status = {}
+
+        # Built-in handlers always present in the mindsdb package
         for handler_dir in handlers_path.iterdir():
             if handler_dir.is_dir() is False or handler_dir.name.startswith("__"):
                 continue
+            # TODO: do we need this for always present built-in handlers?
+            # maybe we can just import them without scanning the directory and parsing the metadata?
+            self._register_handler_dir(handler_dir, is_community=False)
 
-            handler_info = self._get_handler_info(handler_dir)
-            if "name" not in handler_info:
-                continue
-            handler_name = handler_info["name"]
-            dependencies = self._read_dependencies(handler_dir)
-            handler_meta = {
-                "path": handler_dir,
-                "import": {
-                    "success": None,
-                    "error_message": None,
-                    "folder": handler_dir.name,
-                    "dependencies": dependencies,
-                },
-                "name": handler_name,
-                "permanent": handler_info.get("permanent", False),
-                "connection_args": handler_info.get("connection_args", None),
-                "class_type": handler_info.get("class_type", None),
-                "type": handler_info.get("type"),
-            }
-            if "icon_path" in handler_info:
-                icon = self._get_handler_icon(handler_dir, handler_info["icon_path"])
-                if icon:
-                    handler_meta["icon"] = icon
-            self.handlers_import_status[handler_name] = handler_meta
+        if not community_handlers_enabled():
+            logger.debug("Community handlers disabled (set MINDSDB_COMMUNITY_HANDLERS=true to enable)")
+            return
+
+        # Community handlers already fetched in a previous session (on disk)
+        try:
+            for handler_dir in self.community_handlers_dir.iterdir():
+                if handler_dir.is_dir() and not handler_dir.name.startswith(("__", ".")):
+                    self._register_handler_dir(handler_dir, is_community=True)
+        except Exception as e:
+            logger.warning("Could not scan community handlers dir: %s", e)
+
+        try:
+            for entry in list_available_handlers():
+                handler_name = entry.get("name")
+                if not handler_name or handler_name in self.handlers_import_status:
+                    continue
+                handler_type = HANDLER_TYPE.ML if entry.get("type") == "ml" else HANDLER_TYPE.DATA
+                self.handlers_import_status[handler_name] = {
+                    "path": None,
+                    "import": {
+                        "success": None,
+                        "error_message": None,
+                        "folder": entry.get("folder", f"{handler_name}_handler"),
+                        "dependencies": [],
+                    },
+                    "name": handler_name,
+                    "title": entry.get("title", handler_name),
+                    "description": entry.get("description", ""),
+                    "permanent": False,
+                    "connection_args": None,
+                    "class_type": None,
+                    "type": handler_type,
+                    "support_level": HANDLER_SUPPORT_LEVEL.COMMUNITY,
+                }
+        except Exception as e:
+            logger.warning("Could not load community handlers index: %s", e)
 
     def _get_connection_args(self, args_file: Path, param_name: str) -> dict:
         """
@@ -858,32 +927,115 @@ def _get_handler_info(self, handler_dir: Path) -> dict:
 
         return info
 
+    def _fetch_community_handler(self, handler_name: str, handler_folder) -> Optional[dict]:
+        """
+        Attempt to fetch a community handler from GitHub by its logical name.
+
+        Maps the logical handler name (e.g. "github") to the directory name,
+        downloads it from the external repository,
+        registers it in handlers_import_status, and returns the handler_meta.
+
+        Returns None if the handler does not exist in the community repo or
+        if a network/API error occurs (logged as a warning).
+        """
+
+        logger.debug(
+            "Handler '%s' not found locally, attempting on-demand fetch from community repo...",
+            handler_name,
+        )
+
+        try:
+            handler_dir = fetch_handler(handler_folder, self.community_handlers_dir)
+        except RuntimeError as e:
+            logger.warning("Failed to fetch community handler '%s': %s", handler_name, e)
+            return None
+
+        if handler_dir is None:
+            logger.debug("Handler '%s' does not exist in the community repo", handler_name)
+            return None
+
+        self._register_handler_dir(handler_dir, is_community=True)
+        handler_meta = self.handlers_import_status.get(handler_name)
+        if handler_meta is None:
+            logger.warning(
+                "Fetched community handler dir '%s' but could not determine handler name "
+                "(missing or malformed __init__.py)",
+                handler_folder,
+            )
+        return handler_meta
+
     def import_handler(self, handler_name: str, base_import: str = None):
         with self._import_lock:
+            time_before_import = time.perf_counter()
             handler_meta = self.handlers_import_status[handler_name]
             handler_dir = handler_meta["path"]
 
-            handler_folder_name = str(handler_dir.name)
-            if base_import is None:
-                base_import = "mindsdb.integrations.handlers."
+            # Community handlers live outside the mindsdb package tree.
+            # They need spec_from_file_location so relative imports inside the handler.
+            if handler_meta.get("community") and handler_dir is not None:
+                handler_folder_name = str(handler_dir.name)
+                try:
+                    parent_pkg = "mindsdb_community_handlers"
+                    if parent_pkg not in sys.modules:
+                        parent_mod = types.ModuleType(parent_pkg)
+                        parent_mod.__path__ = [str(self.community_handlers_dir)]
+                        parent_mod.__package__ = parent_pkg
+                        sys.modules[parent_pkg] = parent_mod
+
+                    module_name = f"{parent_pkg}.{handler_folder_name}"
+                    init_file = handler_dir / "__init__.py"
+                    # We need to use spec_from_file_location with submodule_search_locations
+                    # to make relative imports work inside the handler's __init__.py
+                    spec = iutil.spec_from_file_location(
+                        module_name,
+                        init_file,
+                        submodule_search_locations=[str(handler_dir)],
+                    )
+                    if spec is None or spec.loader is None:
+                        raise ImportError(f"Could not create module spec for community handler '{handler_name}'")
+                    handler_module = iutil.module_from_spec(spec)
+                    handler_module.__package__ = module_name
+                    # Insert the module so that imports can wokr inside the mindsdb handler
+                    sys.modules[module_name] = handler_module
+                    spec.loader.exec_module(handler_module)
+                    self.handler_modules[handler_name] = handler_module
+                    handler_meta = self._get_handler_meta(handler_name)
+                    logger.debug(f"Community handler '{handler_name}' imported successfully!")
+                except Exception as e:
+                    # Remove the module from sys.modules in case of import failure to avoid inconsistent state
+                    sys.modules.pop(module_name, None)
+                    handler_meta["import"]["success"] = False
+                    handler_meta["import"]["error_message"] = str(e)
+                    logger.debug(f"Failed to import community handler '{handler_name}': {e}")
+            else:
+                handler_folder_name = str(handler_dir.name) if handler_dir else ""
+                if base_import is None:
+                    base_import = "mindsdb.integrations.handlers."
 
-            try:
-                handler_module = importlib.import_module(f"{base_import}{handler_folder_name}")
-                self.handler_modules[handler_name] = handler_module
-                handler_meta = self._get_handler_meta(handler_name)
-            except Exception as e:
-                handler_meta["import"]["success"] = False
-                handler_meta["import"]["error_message"] = str(e)
+                try:
+                    handler_module = importlib.import_module(f"{base_import}{handler_folder_name}")
+                    self.handler_modules[handler_name] = handler_module
+                    handler_meta = self._get_handler_meta(handler_name)
+                    logger.debug(
+                        f"Handler '{handler_name}' imported successfully in {(time.perf_counter() - time_before_import):.3f} seconds"
+                    )
+                except Exception as e:
+                    handler_meta["import"]["success"] = False
+                    handler_meta["import"]["error_message"] = str(e)
+                    logger.debug(f"Failed to import handler '{handler_name}': {e}")
 
             self.handlers_import_status[handler_meta["name"]] = handler_meta
             return handler_meta
 
     def get_handlers_import_status(self):
-        # tries to import all not imported yet
-
         result = {}
         for handler_name in list(self.handlers_import_status.keys()):
-            handler_meta = self.get_handler_meta(handler_name)
+            handler_meta = self.handlers_import_status[handler_name]
+            if handler_meta.get("support_level") == "community" and handler_meta.get("path") is None:
+                result[handler_name] = handler_meta
+                continue
+            handler_folder = handler_meta.get("import", {}).get("folder")
+            handler_meta = self.get_handler_meta(handler_name, handler_folder)
             result[handler_name] = handler_meta
 
         return result
@@ -895,11 +1047,26 @@ def get_handler_metadata(self, handler_name):
         # returns metadata
         return self.handlers_import_status.get(handler_name)
 
-    def get_handler_meta(self, handler_name):
+    def get_handler_meta(self, handler_name, handler_folder=None):
         # returns metadata and tries to import it
         handler_meta = self.handlers_import_status.get(handler_name)
         if handler_meta is None:
-            return
+            return None
+        # Stub from the index: path=None means the handler hasn't been
+        # fetched yet β€” download it on demand from the community repo.
+        if handler_meta.get("support_level") == "community" and handler_meta["path"] is None:
+            # Derive folder from stub metadata if not explicitly provided.
+            if handler_folder is None:
+                handler_folder = handler_meta.get("import", {}).get("folder")
+            if handler_folder is None:
+                logger.warning(
+                    "Community handler '%s' has no folder in metadata, skipping fetch",
+                    handler_name,
+                )
+                return None
+            handler_meta = self._fetch_community_handler(handler_name, handler_folder)
+        if handler_meta is None:
+            return None
         if handler_meta["import"]["success"] is None:
             handler_meta = self.import_handler(handler_name)
         return handler_meta
diff --git a/mindsdb/interfaces/database/log.py b/mindsdb/interfaces/database/log.py
index bda24fa9f6b..4a3b9a15af9 100644
--- a/mindsdb/interfaces/database/log.py
+++ b/mindsdb/interfaces/database/log.py
@@ -9,13 +9,14 @@
 from mindsdb_sql_parser.utils import JoinType
 
 from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
-from mindsdb.integrations.utilities.query_traversal import query_traversal
 from mindsdb.utilities.functions import resolve_table_identifier
-from mindsdb.api.executor.utilities.sql import get_query_tables
 from mindsdb.utilities.exception import EntityNotExistsError
-import mindsdb.interfaces.storage.db as db
 from mindsdb.utilities.context import context as ctx
-from mindsdb.api.executor.datahub.classes.response import DataHubResponse
+from mindsdb.utilities.types.column import Column
+from mindsdb.integrations.utilities.query_traversal import query_traversal
+from mindsdb.integrations.libs.response import TableResponse
+import mindsdb.interfaces.storage.db as db
+from mindsdb.api.executor.utilities.sql import get_query_tables
 from mindsdb.api.executor.datahub.classes.tables_row import (
     TABLES_ROW_TYPE,
     TablesRow,
@@ -228,7 +229,7 @@ def get_tables_rows(self) -> List[TablesRow]:
             for table_name in self._tables.keys()
         ]
 
-    def query(self, query: Select = None, native_query: str = None, session=None) -> DataHubResponse:
+    def query(self, query: Select = None, native_query: str = None, session=None) -> TableResponse:
         if native_query is not None:
             if query is not None:
                 raise Exception("'query' and 'native_query' arguments can not be used together")
@@ -290,6 +291,5 @@ def check_columns(node, is_table, **kwargs):
                     df[df_column_name] = df[df_column_name].astype(column_type)
         # endregion
 
-        columns_info = [{"name": k, "type": v} for k, v in df.dtypes.items()]
-
-        return DataHubResponse(data_frame=df, columns=columns_info)
+        columns = [Column(name=k, dtype=v) for k, v in df.dtypes.items()]
+        return TableResponse(data=df, columns=columns, affected_rows=0)
diff --git a/mindsdb/interfaces/database/projects.py b/mindsdb/interfaces/database/projects.py
index 28b38e5b847..144d633b497 100644
--- a/mindsdb/interfaces/database/projects.py
+++ b/mindsdb/interfaces/database/projects.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 from mindsdb_sql_parser.ast.base import ASTNode
-from mindsdb_sql_parser.ast import Select, Star, Constant, Identifier, BinaryOperation, Function
+from mindsdb_sql_parser.ast import Select, Star, Constant, Identifier, BinaryOperation, Function, Join
 from mindsdb_sql_parser import parse_sql
 
 from mindsdb.interfaces.storage import db
@@ -125,7 +125,17 @@ def create_view(self, name: str, query: str, session):
 
             query_context_controller.set_context(query_context_controller.IGNORE_CONTEXT)
             try:
-                SQLQuery(ast_query, session=session, database=self.name)
+                resp = SQLQuery(ast_query, session=session, database=self.name)
+                columns = [col.name for col in resp.fetched_data.columns]
+                seen, duplicates = set(), set()
+                for col in columns:
+                    if col in seen:
+                        duplicates.add(col)
+                    else:
+                        seen.add(col)
+                if len(duplicates) > 0:
+                    raise ValueError(f"Found duplicated columns in the view: {', '.join(duplicates)}")
+
             finally:
                 query_context_controller.release_context(query_context_controller.IGNORE_CONTEXT)
 
@@ -234,29 +244,35 @@ def get_conditions_to_move(node):
                 #     column is not in black list AND (query has star(*) OR column in white list)
 
                 has_star = False
-                white_list, black_list = [], []
+                white_list, black_list = {}, []
                 for target in view_query.targets:
                     if isinstance(target, Star):
                         has_star = True
                     if isinstance(target, Identifier):
                         name = target.parts[-1].lower()
                         if target.alias is None or target.alias.parts[-1].lower() == name:
-                            white_list.append(name)
+                            white_list[name] = target
                     elif target.alias is not None:
                         black_list.append(target.alias.parts[-1].lower())
 
+                is_join = isinstance(view_query.from_table, Join)
                 view_where = view_query.where
                 for condition in conditions:
                     arg1, arg2 = condition.args
 
                     if isinstance(arg1, Identifier):
                         name = arg1.parts[-1].lower()
-                        if name in black_list or not (has_star or name in white_list):
+                        # don't move condition for join with Star
+                        if name in black_list or not (has_star and not is_join):
                             continue
+                        elif name in white_list:
+                            arg1 = white_list[name]
                     if isinstance(arg2, Identifier):
                         name = arg2.parts[-1].lower()
-                        if name in black_list or not (has_star or name in white_list):
+                        if name in black_list or not (has_star and not is_join):
                             continue
+                        elif name in white_list:
+                            arg2 = white_list[name]
 
                     # condition can be moved into view
                     condition2 = BinaryOperation(condition.op, [arg1, arg2])
@@ -312,7 +328,13 @@ def get_conditions_to_move(node):
 
         # combine outer query with view's query
         view_query.parentheses = True
+
+        # keep alias (column of the query might relate to it)
+        alias = query.from_table.alias if query.from_table.alias is not None else query.from_table
+        view_query.alias = Identifier(parts=[alias.parts[-1]])
+
         query.from_table = view_query
+
         return query
 
     def query_view(self, query: Select, session) -> pd.DataFrame:
diff --git a/mindsdb/interfaces/file/file_controller.py b/mindsdb/interfaces/file/file_controller.py
index 5dfa7c05360..cb1308a952f 100644
--- a/mindsdb/interfaces/file/file_controller.py
+++ b/mindsdb/interfaces/file/file_controller.py
@@ -169,6 +169,7 @@ def get_file_pages(self, source_path: str):
         """
         file_reader = FileReader(path=source_path)
         tables = file_reader.get_contents()
+        file_reader.close()
 
         pages_files = {}
         pages_index = {}
diff --git a/mindsdb/interfaces/functions/controller.py b/mindsdb/interfaces/functions/controller.py
index 6503e6af402..7f63fa8b2de 100644
--- a/mindsdb/interfaces/functions/controller.py
+++ b/mindsdb/interfaces/functions/controller.py
@@ -4,7 +4,6 @@
 from duckdb.typing import BIGINT, DOUBLE, VARCHAR, BLOB, BOOLEAN
 
 from mindsdb.interfaces.storage.model_fs import HandlerStorage
-from mindsdb.integrations.libs.llm.utils import get_llm_config
 from mindsdb.utilities.config import config
 
 
@@ -140,10 +139,7 @@ def llm_call_function(self, node):
         try:
             from mindsdb.interfaces.knowledge_base.llm_client import LLMClient
 
-            llm_config = get_llm_config(chat_model_params["provider"], chat_model_params)
-            chat_model_params = llm_config.model_dump(by_alias=True)
-            chat_model_params = {k: v for k, v in chat_model_params.items() if v is not None}
-
+            chat_model_params.pop("api_keys", None)
             llm = LLMClient(chat_model_params, session=self.session)
         except Exception as e:
             raise RuntimeError(f"Unable to use LLM function, check ENV variables: {e}") from e
diff --git a/mindsdb/interfaces/jobs/jobs_controller.py b/mindsdb/interfaces/jobs/jobs_controller.py
index 31382daedf1..5c85372ffb1 100644
--- a/mindsdb/interfaces/jobs/jobs_controller.py
+++ b/mindsdb/interfaces/jobs/jobs_controller.py
@@ -16,6 +16,7 @@
 from mindsdb.interfaces.database.projects import ProjectController
 from mindsdb.interfaces.query_context.context_controller import query_context_controller
 from mindsdb.interfaces.database.log import LogDBController
+from mindsdb.integrations.libs.response import TableResponse
 
 from mindsdb.utilities import log
 
@@ -346,9 +347,9 @@ def get_history(self, name: str, project_name: str) -> List[dict]:
                 ],
             ),
         )
-        response = logs_db_controller.query(query)
+        response: TableResponse = logs_db_controller.query(query)
 
-        names = [i["name"] for i in response.columns]
+        names = [i.name for i in response.columns]
         return response.data_frame[names].to_dict(orient="records")
 
 
diff --git a/mindsdb/interfaces/knowledge_base/controller.py b/mindsdb/interfaces/knowledge_base/controller.py
index 4e551024225..d158917f72c 100644
--- a/mindsdb/interfaces/knowledge_base/controller.py
+++ b/mindsdb/interfaces/knowledge_base/controller.py
@@ -1,4 +1,3 @@
-import os
 import copy
 from typing import Dict, List, Optional, Any, Text, Tuple, Union
 import json
@@ -6,7 +5,7 @@
 
 import pandas as pd
 import numpy as np
-from pydantic import BaseModel, ValidationError
+from pydantic import BaseModel
 from sqlalchemy.orm.attributes import flag_modified
 
 from mindsdb_sql_parser.ast import BinaryOperation, Constant, Identifier, Select, Update, Delete, Star
@@ -17,45 +16,36 @@
 
 import mindsdb.interfaces.storage.db as db
 from mindsdb.integrations.libs.vectordatabase_handler import (
-    DistanceFunction,
     TableField,
     VectorStoreHandler,
 )
 from mindsdb.integrations.utilities.handler_utils import get_api_key
 from mindsdb.integrations.utilities.handlers.auth_utilities.snowflake import get_validated_jwt
 
-from mindsdb.integrations.utilities.rag.settings import RerankerMode
-
-from mindsdb.interfaces.agents.utils.constants import DEFAULT_EMBEDDINGS_MODEL_PROVIDER, MAX_INSERT_BATCH_SIZE
+from mindsdb.interfaces.agents.utils.constants import MAX_INSERT_BATCH_SIZE
 from mindsdb.interfaces.database.projects import ProjectController
 from mindsdb.interfaces.knowledge_base.preprocessing.models import PreprocessingConfig, Document
 from mindsdb.interfaces.knowledge_base.preprocessing.document_preprocessor import PreprocessorFactory
 from mindsdb.interfaces.knowledge_base.evaluate import EvaluateBase
 from mindsdb.interfaces.knowledge_base.executor import KnowledgeBaseQueryExecutor
+from mindsdb.interfaces.knowledge_base.default_storage_resolver import resolve_default_storage_engines
 from mindsdb.interfaces.model.functions import PredictorRecordNotFound
 from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
-from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, KeywordSearchArgs
 from mindsdb.utilities.config import config
 from mindsdb.utilities.context import context as ctx
-from mindsdb.interfaces.agents.utils.pydantic_ai_model_factory import get_llm_provider
-from mindsdb.interfaces.knowledge_base.llm_wrapper import create_chat_model
+from mindsdb.utilities.utils import validate_pydantic_params
+from mindsdb.utilities import log
 
 from mindsdb.api.executor.command_executor import ExecuteCommands
 from mindsdb.api.executor.utilities.sql import query_df
-from mindsdb.utilities import log
+from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, KeywordSearchArgs
+from mindsdb.integrations.utilities.rag.settings import RerankerMode, RerankerConfig
 from mindsdb.integrations.utilities.rag.rerankers.base_reranker import BaseLLMReranker, ListwiseLLMReranker
 from mindsdb.interfaces.knowledge_base.llm_client import LLMClient
 
 logger = log.getLogger(__name__)
 
 
-def _require_agent_extra(feature: str):
-    if create_chat_model is None:
-        raise ImportError(
-            f"{feature} requires the optional agent dependencies. Install them via `pip install mindsdb[kb]`."
-        )
-
-
 class KnowledgeBaseInputParams(BaseModel):
     metadata_columns: List[str] | None = None
     content_columns: List[str] | None = None
@@ -93,37 +83,10 @@ def get_model_params(model_params: dict, default_config_key: str):
     return combined_model_params
 
 
-def adapt_embedding_model_params(embedding_model_params: dict):
-    """
-    Prepare parameters for embedding model.
-    """
-    params_copy = copy.deepcopy(embedding_model_params)
-    provider = params_copy.pop("provider", None).lower()
-    api_key = get_api_key(provider, params_copy, strict=False) or params_copy.get("api_key")
-    # Underscores are replaced because the provider name ultimately gets mapped to a class name.
-    # This is mostly to support Azure OpenAI (azure_openai); the mapped class name is 'AzureOpenAIEmbeddings'.
-    params_copy["class"] = provider.replace("_", "")
-    if provider == "azure_openai":
-        # Azure OpenAI expects the api_key to be passed as 'openai_api_key'.
-        params_copy["openai_api_key"] = api_key
-        params_copy["azure_endpoint"] = params_copy.pop("base_url")
-        if "chunk_size" not in params_copy:
-            params_copy["chunk_size"] = 2048
-        if "api_version" in params_copy:
-            params_copy["openai_api_version"] = params_copy["api_version"]
-    else:
-        params_copy[f"{provider}_api_key"] = api_key
-    params_copy.pop("api_key", None)
-    params_copy["model"] = params_copy.pop("model_name", None)
-
-    return params_copy
-
-
 def get_reranking_model_from_params(reranking_model_params: dict):
     """
     Create reranking model from parameters.
     """
-    from mindsdb.integrations.utilities.rag.settings import RerankerConfig
 
     # Work on a copy; do not mutate caller's dict
     params_copy = copy.deepcopy(reranking_model_params)
@@ -179,7 +142,7 @@ def rotate_provider_api_key(params):
     :param params: input params, can be modified by this function
     :return: a new api key if it is refreshed
     """
-    provider = params.get("provider").lower()
+    provider = params.get("provider", "").lower()
 
     if provider == "snowflake":
         if "snowflake_account_id" in params:
@@ -726,30 +689,6 @@ def delete_query(self, query: Delete):
         self.addapt_conditions_columns(conditions)
         db_handler.dispatch_delete(query, conditions)
 
-    def hybrid_search(
-        self,
-        query: str,
-        keywords: List[str] = None,
-        metadata: Dict[str, str] = None,
-        distance_function=DistanceFunction.COSINE_DISTANCE,
-    ) -> pd.DataFrame:
-        query_df = pd.DataFrame.from_records([{TableField.CONTENT.value: query}])
-        embeddings_df = self._df_to_embeddings(query_df)
-        if embeddings_df.empty:
-            return pd.DataFrame([])
-        embeddings = embeddings_df.iloc[0][TableField.EMBEDDINGS.value]
-        keywords_query = None
-        if keywords is not None:
-            keywords_query = " ".join(keywords)
-        db_handler = self.get_vector_db()
-        return db_handler.hybrid_search(
-            self._kb.vector_database_table,
-            embeddings,
-            query=keywords_query,
-            metadata=metadata,
-            distance_function=distance_function,
-        )
-
     def clear(self):
         """
         Clear data in KB table
@@ -1102,91 +1041,6 @@ def _content_to_embeddings(self, content: str) -> List[float]:
         res = self._df_to_embeddings(df)
         return res[TableField.EMBEDDINGS.value][0]
 
-    @staticmethod
-    def call_litellm_embedding(session, model_params, messages):
-        args = copy.deepcopy(model_params)
-
-        if "model_name" not in args:
-            raise ValueError("'model_name' must be provided for embedding model")
-
-        llm_model = args.pop("model_name")
-        engine = args.pop("provider")
-
-        module = session.integration_controller.get_handler_module("litellm")
-        if module is None or module.Handler is None:
-            raise ValueError(f'Unable to use "{engine}" provider. Litellm handler is not installed')
-        return module.Handler.embeddings(engine, llm_model, messages, args)
-
-    def build_rag_pipeline(self, retrieval_config: dict):
-        """
-        Builds a RAG pipeline with returned sources
-
-        Args:
-            retrieval_config: dict with retrieval config
-
-        Returns:
-            RAG: Configured RAG pipeline instance
-
-        Raises:
-            ValueError: If the configuration is invalid or required components are missing
-        """
-        # Get embedding model from knowledge base
-        from mindsdb.interfaces.knowledge_base.embedding_model_utils import construct_embedding_model_from_args
-        from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
-        from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
-
-        embedding_model_params = get_model_params(self._kb.params.get("embedding_model", {}), "default_embedding_model")
-        if self._kb.embedding_model:
-            # Extract embedding model args from knowledge base table
-            embedding_args = self._kb.embedding_model.learn_args.get("using", {})
-            # Construct the embedding model directly
-            embeddings_model = construct_embedding_model_from_args(embedding_args, session=self.session)
-            logger.debug(f"Using knowledge base embedding model with args: {embedding_args}")
-        elif embedding_model_params:
-            embeddings_model = construct_embedding_model_from_args(
-                adapt_embedding_model_params(embedding_model_params), session=self.session
-            )
-            logger.debug(f"Using knowledge base embedding model from params: {self._kb.params['embedding_model']}")
-        else:
-            # Use default embedding model with default provider
-            # Default to OpenAI's text-embedding-3-small for OpenAI provider, otherwise let the provider choose
-            default_model_name = "text-embedding-3-small" if DEFAULT_EMBEDDINGS_MODEL_PROVIDER == "openai" else None
-            default_embedding_args = {
-                "provider": DEFAULT_EMBEDDINGS_MODEL_PROVIDER,
-            }
-            if default_model_name:
-                default_embedding_args["model_name"] = default_model_name
-            embeddings_model = construct_embedding_model_from_args(default_embedding_args, session=self.session)
-            logger.debug(
-                f"Using default embedding model ({DEFAULT_EMBEDDINGS_MODEL_PROVIDER}) as knowledge base has no embedding model"
-            )
-
-        # Update retrieval config with knowledge base parameters
-        kb_params = {"vector_store_config": {"kb_table": self}}
-
-        # Load and validate config
-        try:
-            rag_config = load_rag_config(retrieval_config, kb_params, embeddings_model)
-
-            # Build LLM if specified
-            if "llm_model_name" in rag_config:
-                llm_args = {"model_name": rag_config.llm_model_name}
-                if not rag_config.llm_provider:
-                    llm_args["provider"] = get_llm_provider(llm_args)
-                else:
-                    llm_args["provider"] = rag_config.llm_provider
-                _require_agent_extra("Building knowledge base retrieval pipelines")
-                rag_config.llm = create_chat_model(llm_args)
-
-            # Create RAG pipeline
-            rag = RAG(rag_config)
-            logger.debug(f"RAG pipeline created with config: {rag_config}")
-            return rag
-
-        except Exception as e:
-            logger.exception("Error building RAG pipeline:")
-            raise ValueError(f"Failed to build RAG pipeline: {str(e)}") from e
-
     def _parse_metadata(self, base_metadata):
         """Helper function to robustly parse metadata string to dict"""
         if isinstance(base_metadata, dict):
@@ -1207,36 +1061,6 @@ def _generate_document_id(self, content: str, content_column: str, provided_id:
 
         return generate_document_id(content=content, provided_id=provided_id)
 
-    def _convert_metadata_value(self, value):
-        """
-        Convert metadata value to appropriate Python type.
-
-        Args:
-            value: The value to convert
-
-        Returns:
-            Converted value in appropriate Python type
-        """
-        if pd.isna(value):
-            return None
-
-        # Handle pandas/numpy types
-        if pd.api.types.is_datetime64_any_dtype(value) or isinstance(value, pd.Timestamp):
-            return str(value)
-        elif pd.api.types.is_integer_dtype(type(value)):
-            return int(value)
-        elif pd.api.types.is_float_dtype(type(value)):
-            return float(value)
-        elif pd.api.types.is_bool_dtype(type(value)):
-            return bool(value)
-
-        # Handle basic Python types
-        if isinstance(value, (int, float, bool)):
-            return value
-
-        # Convert everything else to string
-        return str(value)
-
     def create_index(self, params: dict = None):
         """
         Create an index on the knowledge base table
@@ -1258,26 +1082,6 @@ class KnowledgeBaseController:
     def __init__(self, session) -> None:
         self.session = session
 
-    def _check_kb_input_params(self, params):
-        # check names and types KB params
-        try:
-            KnowledgeBaseInputParams.model_validate(params)
-        except ValidationError as e:
-            problems = []
-            for error in e.errors():
-                parameter = ".".join([str(i) for i in error["loc"]])
-                param_type = error["type"]
-                if param_type == "extra_forbidden":
-                    msg = f"Parameter '{parameter}' is not allowed"
-                else:
-                    msg = f"Error in '{parameter}' (type: {param_type}): {error['msg']}. Input: {repr(error['input'])}"
-                problems.append(msg)
-
-            msg = "\n".join(problems)
-            if len(problems) > 1:
-                msg = "\n" + msg
-            raise ValueError(f"Problem with knowledge base parameters: {msg}") from e
-
     def add(
         self,
         name: str,
@@ -1299,10 +1103,9 @@ def add(
         # Validate preprocessing config first if provided
         if preprocessing_config is not None:
             PreprocessingConfig(**preprocessing_config)  # Validate before storing
-            params = params or {}
             params["preprocessing"] = preprocessing_config
 
-        self._check_kb_input_params(params)
+        validate_pydantic_params(params, KnowledgeBaseInputParams, "knowledge base")
 
         # Check if vector_size is provided when using sparse vectors
         is_sparse = params.get("is_sparse")
@@ -1322,6 +1125,9 @@ def add(
             raise EntityExistsError("Knowledge base already exists", name)
 
         embedding_params = get_model_params(params.get("embedding_model", {}), "default_embedding_model")
+        if not bool(embedding_params):
+            raise ValueError("No embedding model parameters provided")
+
         params["embedding_model"] = embedding_params
         rotate_provider_api_key(embedding_params)
 
@@ -1349,24 +1155,12 @@ def add(
 
         # search for the vector database table
         if storage is None:
-            cloud_pg_vector = os.environ.get("KB_PGVECTOR_URL")
-            if cloud_pg_vector:
-                vector_table_name = name
-                # Add sparse vector support for pgvector
-                vector_db_params = {}
-                # Check both explicit parameter and model configuration
-                if is_sparse:
-                    vector_db_params["is_sparse"] = True
-                    if vector_size is not None:
-                        vector_db_params["vector_size"] = vector_size
-                vector_db_name = self._create_persistent_pgvector(vector_db_params)
-                params["default_vector_storage"] = vector_db_name
-            else:
-                raise ValueError(
-                    "Vector table is not defined. Set it by `storage=vector_db.vector_table`. "
-                    "One of the options is to use pgvector: "
-                    "https://docs.mindsdb.com/integrations/vector-db-integrations/pgvector"
-                )
+            vector_db_name, vector_table_name = self._resolve_default_vector_storage(
+                kb_name=name,
+                is_sparse=is_sparse,
+                vector_size=vector_size,
+            )
+            params["default_vector_storage"] = vector_db_name
         elif len(storage.parts) != 2:
             raise ValueError("Storage param has to be vector db with table")
         else:
@@ -1455,7 +1249,7 @@ def update(
             params = params or {}
             params["preprocessing"] = preprocessing_config
 
-        self._check_kb_input_params(params)
+        validate_pydantic_params(params, KnowledgeBaseInputParams, "knowledge base")
 
         # get project id
         project = self.session.database_controller.get_project(project_name)
@@ -1544,21 +1338,44 @@ def _create_persistent_pgvector(self, params=None):
         self.session.integration_controller.add(vector_store_name, "pgvector", params or {})
         return vector_store_name
 
-    def _create_persistent_chroma(self, kb_name, engine="chromadb"):
-        """Create default vector database for knowledge base, if not specified"""
-
-        vector_store_name = f"{kb_name}_{engine}"
-
-        vector_store_folder_name = f"{vector_store_name}"
-        connection_args = {"persist_directory": vector_store_folder_name}
+    def _create_persistent_faiss(self, kb_name: str):
+        vector_store_name = f"store_{kb_name}"
 
         # check if exists
         if self.session.integration_controller.get(vector_store_name):
             return vector_store_name
 
-        self.session.integration_controller.add(vector_store_name, engine, connection_args)
+        self.session.integration_controller.add(vector_store_name, "duckdb_faiss", {})
         return vector_store_name
 
+    def _resolve_default_vector_storage(self, kb_name: str, is_sparse: bool = False, vector_size: int = None):
+        resolved_storage = resolve_default_storage_engines(config)
+        default_engine = resolved_storage["default_storage"]
+
+        if default_engine is None:
+            raise ValueError(
+                "Vector table is not defined. Set it by `storage=vector_db.vector_table` or configure "
+                "`knowledge_bases.storage` as one of: pgvector, faiss."
+            )
+
+        if default_engine == "pgvector":
+            vector_db_params = {}
+            if is_sparse:
+                vector_db_params["is_sparse"] = True
+                if vector_size is not None:
+                    vector_db_params["vector_size"] = vector_size
+            vector_db_name = self._create_persistent_pgvector(vector_db_params)
+            return vector_db_name, kb_name
+
+        if default_engine in ("duckdb_faiss", "faiss"):
+            vector_db_name = self._create_persistent_faiss(kb_name)
+            return vector_db_name, kb_name
+
+        raise ValueError(
+            f"Automatic default storage creation is not supported for engine '{default_engine}'. "
+            "Set `storage=vector_db.vector_table` explicitly."
+        )
+
     def _check_embedding_model(self, project_name, params: dict = None, kb_name="") -> dict:
         """check embedding model for knowledge base, return embedding model info"""
 
@@ -1581,15 +1398,15 @@ def _check_embedding_model(self, project_name, params: dict = None, kb_name="")
                 f"Wrong embedding provider: {params['provider']}. Available providers: {', '.join(avail_providers)}"
             )
 
-        llm_client = LLMClient(params, session=self.session)
-
         try:
+            llm_client = LLMClient(params, session=self.session)
+
             resp = llm_client.embeddings(["test"])
             return {"dimension": len(resp[0])}
         except Exception as e:
             raise RuntimeError(f"Problem with embedding model config: {e}") from e
 
-    def delete(self, name: str, project_name: int, if_exists: bool = False) -> None:
+    def delete(self, name: str, project_name: str, if_exists: bool = False) -> None:
         """
         Delete a knowledge base from the database
         """
@@ -1708,3 +1525,26 @@ def evaluate(self, table_name: str, project_name: str, params: dict = None) -> p
         scores = EvaluateBase.run(self.session, kb_table, params)
 
         return scores
+
+    def release_lock(self, knowledge_base: Identifier, project_name):
+        # works only for FAISS dbs.
+        # if FAISS vector db is used in KB: remove this db from handlers cache.
+        #   it will clear internal cache of tables in faiss handler and release locks for faiss files
+        #   return unloaded database name
+
+        if len(knowledge_base.parts) > 1:
+            project_name, kb_name = knowledge_base.parts[-2:]
+        else:
+            kb_name = knowledge_base.parts[-1]
+
+        project_id = self.session.database_controller.get_project(project_name).id
+        kb = self.get(kb_name, project_id)
+        if kb is None or kb.vector_database_id is None:
+            return
+        database = db.Integration.query.get(kb.vector_database_id)
+        if database is None:
+            return
+
+        if database.engine == "duckdb_faiss":
+            self.session.integration_controller.handlers_cache.delete(database.name)
+            return database.name
diff --git a/mindsdb/interfaces/knowledge_base/default_storage_resolver.py b/mindsdb/interfaces/knowledge_base/default_storage_resolver.py
new file mode 100644
index 00000000000..93a4c364054
--- /dev/null
+++ b/mindsdb/interfaces/knowledge_base/default_storage_resolver.py
@@ -0,0 +1,90 @@
+import os
+from typing import Any
+
+from mindsdb.utilities.config import config
+
+
+def _normalize_engine_name(engine: str | None) -> str | None:
+    if engine is None:
+        return None
+    normalized = engine.strip().lower()
+    if normalized in ("duckdb_faiss", "faiss"):
+        return "faiss"
+    if normalized == "pgvector":
+        return "pgvector"
+    return normalized or None
+
+
+def _get_env_available_engines() -> list[str]:
+    engines: list[str] = ["faiss"]
+    if os.environ.get("KB_PGVECTOR_URL"):
+        engines.append("pgvector")
+    return engines
+
+
+def get_env_available_engines() -> list[str]:
+    return _get_env_available_engines()
+
+
+def get_knowledge_base_storage_config(config_obj=None) -> str | None:
+    config_obj = config_obj or config
+    storage = config_obj.get("knowledge_bases", {}).get("storage", None)
+
+    if storage is None:
+        return None
+
+    if isinstance(storage, list):
+        if len(storage) == 0:
+            return None
+        storage = storage[0]
+
+    if not isinstance(storage, str):
+        raise ValueError("knowledge_bases.storage must be a string value")
+
+    return _normalize_engine_name(storage)
+
+
+def _unique_default_first(default: str | None, ordered: list[str]) -> list[str]:
+    """Return `ordered` with `default` first if set, dropping later duplicates."""
+    out: list[str] = []
+    seen: set[str] = set()
+    for engine in ([default] if default else []) + ordered:
+        if engine not in seen:
+            seen.add(engine)
+            out.append(engine)
+    return out
+
+
+def resolve_default_storage_engines(config_obj=None) -> dict[str, Any]:
+    configured = get_knowledge_base_storage_config(config_obj)
+    pgvector_enabled = os.environ.get("KB_PGVECTOR_URL") is not None
+    available = _get_env_available_engines()
+
+    if configured and configured not in available:
+        available = [configured, *available]
+
+    default = configured
+    if default is None:
+        default = "pgvector" if pgvector_enabled else None
+    if default is None and available:
+        default = available[0]
+
+    candidates = _unique_default_first(default, available)
+    available_set = set(available)
+    resolved_storage = [
+        {
+            "engine": name,
+            "available": name in available_set,
+            "default": name == default,
+            "source": "config" if configured == name else "fallback",
+        }
+        for name in candidates
+    ]
+
+    return {
+        "storage": configured,
+        "resolved_storage": resolved_storage,
+        "default_storage": default,
+        "available_vector_engines": available,
+        "pgvector_enabled": pgvector_enabled,
+    }
diff --git a/mindsdb/interfaces/knowledge_base/embedding_model_utils.py b/mindsdb/interfaces/knowledge_base/embedding_model_utils.py
deleted file mode 100644
index f8f151d7863..00000000000
--- a/mindsdb/interfaces/knowledge_base/embedding_model_utils.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""Custom embedding model utilities to replace langchain construct_model_from_args"""
-
-import copy
-from typing import Dict, Any, List
-
-from mindsdb.interfaces.knowledge_base.llm_client import LLMClient
-from mindsdb.utilities import log
-
-logger = log.getLogger(__name__)
-
-
-class CustomEmbeddingModel:
-    """
-    Custom embedding model wrapper that uses LLMClient for embeddings.
-    This replaces langchain embedding models for use in knowledge_base.
-    """
-
-    def __init__(self, args: Dict[str, Any], session=None):
-        """
-        Initialize the embedding model
-
-        Args:
-            args: Dictionary with model parameters (model_name, provider, etc.)
-            session: Optional session for LLMClient
-        """
-        # Prepare params for LLMClient
-        # Handle model_name -> model mapping if needed
-        params = {
-            "model_name": args.get("model", args.get("model_name")),
-            "provider": args.get("provider", "openai"),
-            **{k: v for k, v in args.items() if k not in ["model", "model_name", "provider", "class", "target"]},
-        }
-
-        self.llm_client = LLMClient(params=params, session=session)
-        self.model_name = params["model_name"]
-
-    def embed_query(self, text: str) -> List[float]:
-        """
-        Embed a single query string
-
-        Args:
-            text: Text to embed
-
-        Returns:
-            List of floats representing the embedding vector
-        """
-        embeddings = self.llm_client.embeddings([text])
-        return embeddings[0] if embeddings else []
-
-    def embed_documents(self, texts: List[str]) -> List[List[float]]:
-        """
-        Embed a list of documents
-
-        Args:
-            texts: List of text strings to embed
-
-        Returns:
-            List of embedding vectors (each is a list of floats)
-        """
-        return self.llm_client.embeddings(texts)
-
-
-def construct_embedding_model_from_args(args: Dict[str, Any], session=None):
-    """
-    Construct an embedding model from arguments (replacement for langchain's construct_model_from_args)
-
-    Args:
-        args: Dictionary with embedding model parameters
-            - class: Embedding class name (for compatibility, but not used)
-            - model or model_name: Model name to use
-            - provider: Provider name (openai, etc.)
-            - Other provider-specific parameters
-        session: Optional session for LLMClient
-
-    Returns:
-        CustomEmbeddingModel instance
-    """
-    # Work on a copy to avoid mutating the original
-    args_copy = copy.deepcopy(args)
-
-    # Extract class name for logging (but we don't use it)
-    class_name = args_copy.pop("class", "OpenAIEmbeddings")
-    target = args_copy.pop("target", None)
-
-    logger.debug(f"Constructing embedding model with class: {class_name}, args: {args_copy}")
-
-    # Create the custom embedding model
-    model = CustomEmbeddingModel(args_copy, session=session)
-
-    # Restore args for compatibility (in case caller expects them)
-    if target is not None:
-        args["target"] = target
-    args["class"] = class_name
-
-    return model
diff --git a/mindsdb/interfaces/knowledge_base/llm_client.py b/mindsdb/interfaces/knowledge_base/llm_client.py
index ab044811b94..61591b62c17 100644
--- a/mindsdb/interfaces/knowledge_base/llm_client.py
+++ b/mindsdb/interfaces/knowledge_base/llm_client.py
@@ -7,6 +7,10 @@
 
 from mindsdb.integrations.utilities.handler_utils import get_api_key
 
+from mindsdb.interfaces.knowledge_base.providers.bedrock import BedrockClient
+from mindsdb.interfaces.knowledge_base.providers.gemini import GeminiClient
+from mindsdb.interfaces.knowledge_base.providers.snowflake import SnowflakeClient
+
 
 def retry_with_exponential_backoff(func):
     def decorator(*args, **kwargs):
@@ -60,22 +64,23 @@ def wrapper(self, messages, *args, **kwargs):
 class LLMClient:
     """
     Class for accession to LLM.
-    It chooses openai client or litellm handler depending on the config
+    It chooses provider client depending on the config
     """
 
     def __init__(self, params: dict = None, session=None):
         self._session = session
-        self.params = params
+        params = params.copy()
 
-        self.provider = params.get("provider", "openai")
+        self.provider = params.pop("provider", "openai")
+        self.model_name = params.pop("model_name")
+        if self.provider == "google":
+            self.provider = "gemini"
 
         if "api_key" not in params:
             api_key = get_api_key(self.provider, params, strict=False)
             if api_key is not None:
                 params["api_key"] = api_key
 
-        self.engine = "openai"
-
         if self.provider == "azure_openai":
             azure_api_key = params.get("api_key") or os.getenv("AZURE_OPENAI_API_KEY")
             azure_api_endpoint = params.get("base_url") or os.environ.get("AZURE_OPENAI_ENDPOINT")
@@ -91,62 +96,46 @@ def __init__(self, params: dict = None, session=None):
                 kwargs["base_url"] = base_url
             self.client = OpenAI(**kwargs)
         elif self.provider == "ollama":
-            kwargs = params.copy()
-            kwargs.pop("model_name")
-            kwargs.pop("provider", None)
-            if kwargs.get("api_key") is None:
-                kwargs["api_key"] = "n/a"
-            self.client = OpenAI(**kwargs)
+            if params.get("api_key") is None:
+                params["api_key"] = "n/a"
+            self.client = OpenAI(**params)
+        elif self.provider == "bedrock":
+            if "aws_region" in params:
+                params["aws_region_name"] = params.pop("aws_region")
+            self.client = BedrockClient(**params)
+        elif self.provider == "gemini":
+            self.client = GeminiClient(**params)
+        elif self.provider == "snowflake":
+            self.client = SnowflakeClient(**params)
         else:
-            # try to use litellm
-            if self._session is None:
-                from mindsdb.api.executor.controllers.session_controller import SessionController
-
-                self._session = SessionController()
-            module = self._session.integration_controller.get_handler_module("litellm")
-
-            if module is None or module.Handler is None:
-                raise ValueError(f'Unable to use "{self.provider}" provider. Litellm handler is not installed')
-
-            self.client = module.Handler
-            self.engine = "litellm"
+            raise NotImplementedError(f'Provider "{self.provider}" is not supported')
 
     @run_in_batches(1000)
     @retry_with_exponential_backoff
     def embeddings(self, messages: List[str]):
-        params = self.params
-        if self.engine == "openai":
+        if self.provider in ("openai", "azure_openai", "ollama"):
             response = self.client.embeddings.create(
-                model=params["model_name"],
+                model=self.model_name,
                 input=messages,
             )
             return [item.embedding for item in response.data]
         else:
-            kwargs = params.copy()
-            model = kwargs.pop("model_name")
-            kwargs.pop("provider", None)
-
-            return self.client.embeddings(self.provider, model=model, messages=messages, args=kwargs)
+            return self.client.embeddings(self.model_name, messages)
 
     @run_in_batches(100)
     def completion(self, messages: List[dict], json_output: bool = False) -> List[str]:
         """
         Call LLM completion and get response
         """
-        params = self.params
-        params["json_output"] = json_output
-        if self.engine == "openai":
+
+        if self.provider in ("openai", "azure_openai", "ollama"):
             response = self.client.chat.completions.create(
-                model=params["model_name"],
+                model=self.model_name,
                 messages=messages,
             )
             return [item.message.content for item in response.choices]
         else:
-            kwargs = params.copy()
-            model = kwargs.pop("model_name")
-            kwargs.pop("provider", None)
-            response = self.client.completion(self.provider, model=model, messages=messages, args=kwargs)
-            return [item.message.content for item in response.choices]
+            return [self.client.completion(self.model_name, messages)]
 
     async def abatch(self, messages_list: List[List[dict]], json_output: bool = False) -> List[List[str]]:
         """
diff --git a/mindsdb/interfaces/knowledge_base/preprocessing/constants.py b/mindsdb/interfaces/knowledge_base/preprocessing/constants.py
index 0e984c2c67a..47332c37b8b 100644
--- a/mindsdb/interfaces/knowledge_base/preprocessing/constants.py
+++ b/mindsdb/interfaces/knowledge_base/preprocessing/constants.py
@@ -1,13 +1,4 @@
-# Default settings for markdown header splitting
-DEFAULT_MARKDOWN_HEADERS = [
-    ("#", "Header 1"),
-    ("##", "Header 2"),
-    ("###", "Header 3"),
-]
-
 # Limits for web crawling
 DEFAULT_CRAWL_DEPTH = None
 DEFAULT_WEB_CRAWL_LIMIT = 1
 DEFAULT_WEB_FILTERS = []
-
-DEFAULT_CONTEXT_DOCUMENT_LIMIT = 50
diff --git a/mindsdb/interfaces/knowledge_base/providers/__init__.py b/mindsdb/interfaces/knowledge_base/providers/__init__.py
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/mindsdb/interfaces/knowledge_base/providers/__init__.py
@@ -0,0 +1 @@
+
diff --git a/mindsdb/interfaces/knowledge_base/providers/bedrock.py b/mindsdb/interfaces/knowledge_base/providers/bedrock.py
new file mode 100644
index 00000000000..2f50c5c4c87
--- /dev/null
+++ b/mindsdb/interfaces/knowledge_base/providers/bedrock.py
@@ -0,0 +1,147 @@
+import json
+from typing import Dict, List, Optional
+
+
+def prepare_conversation(messages: List[dict]) -> List[dict]:
+    """Convert chat messages to Bedrock `converse` message payload format."""
+    conversation = []
+    for message in messages:
+        content = message["content"]
+        role = message["role"]
+        if role == "system":
+            role = "assistant"
+        if role != "user":
+            if len(conversation) == 0:
+                # the first message has to be user message
+                content = message["role"] + ":\n" + content
+                role = "user"
+
+        conversation.append(
+            {
+                "role": role,
+                "content": [{"text": content}],
+            }
+        )
+    return conversation
+
+
+class AsyncBedrockClient:
+    """Async Bedrock runtime client wrapper"""
+
+    def __init__(
+        self,
+        aws_access_key_id: Optional[str] = None,
+        aws_secret_access_key: Optional[str] = None,
+        aws_region_name: Optional[str] = None,
+        aws_session_token: Optional[str] = None,
+    ):
+        try:
+            from aiobotocore.session import get_session
+        except ImportError as exc:
+            raise ImportError(
+                "aiobotocore is required for the Bedrock reranker client. Install it with `pip install aiobotocore`."
+            ) from exc
+
+        self.aws_access_key_id = aws_access_key_id
+        self.aws_secret_access_key = aws_secret_access_key
+        self.aws_session_token = aws_session_token
+        self.region_name = aws_region_name
+
+        self._session = get_session()
+
+    async def acompletion(
+        self,
+        model_name: str,
+        messages: List[dict],
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_p: Optional[float] = None,
+    ) -> str:
+        """Generate a chat completion asynchronously via Bedrock."""
+        inference_config = {}
+        if temperature is not None:
+            inference_config["temperature"] = temperature
+        if max_tokens is not None:
+            inference_config["max_tokens"] = max_tokens
+        if top_p is not None:
+            inference_config["top_p"] = top_p
+
+        conversation = prepare_conversation(messages)
+
+        # Create client with credentials
+        client_kwargs = {
+            "service_name": "bedrock-runtime",
+            "region_name": self.region_name,
+            "aws_access_key_id": self.aws_access_key_id,
+            "aws_secret_access_key": self.aws_secret_access_key,
+            "aws_session_token": self.aws_session_token,
+        }
+
+        async with self._session.create_client(**client_kwargs) as client:
+            response = await client.converse(
+                modelId=model_name, messages=conversation, inferenceConfig=inference_config
+            )
+
+        return response["output"]["message"]["content"][0]["text"]
+
+
+class BedrockClient:
+    """Synchronous Bedrock runtime client wrapper"""
+
+    def __init__(
+        self,
+        aws_access_key_id: Optional[str] = None,
+        aws_secret_access_key: Optional[str] = None,
+        aws_region_name: Optional[str] = None,
+        aws_session_token: Optional[str] = None,
+    ):
+        try:
+            import boto3
+        except ImportError as exc:
+            raise ImportError("boto3 is required for the Bedrock client. Install it with `pip install boto3`.") from exc
+
+        self.client = boto3.client(
+            "bedrock-runtime",
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            aws_session_token=aws_session_token,
+            region_name=aws_region_name,
+        )
+
+    def embeddings(self, model_name: str, messages: List[str]) -> List[List[float]]:
+        """Request embedding vectors for each text in `messages`."""
+        embeddings = []
+        for message in messages:
+            native_request = {"inputText": message}
+            request = json.dumps(native_request)
+
+            response = self.client.invoke_model(modelId=model_name, body=request)
+            model_response = json.loads(response["body"].read())
+
+            # Extract and print the generated embedding and the input text token count.
+            embeddings.append(model_response["embedding"])
+
+        return embeddings
+
+    def completion(
+        self,
+        model_name: str,
+        messages: List[dict],
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_p: Optional[float] = None,
+    ) -> str:
+        """Generate a chat completion synchronously via Bedrock."""
+        inference_config: Dict[str, float | int] = {}
+        if temperature is not None:
+            inference_config["temperature"] = temperature
+        if max_tokens is not None:
+            inference_config["max_tokens"] = max_tokens
+        if top_p is not None:
+            inference_config["top_p"] = top_p
+
+        conversation = prepare_conversation(messages)
+
+        response = self.client.converse(modelId=model_name, messages=conversation, inferenceConfig=inference_config)
+
+        return response["output"]["message"]["content"][0]["text"]
diff --git a/mindsdb/interfaces/knowledge_base/providers/gemini.py b/mindsdb/interfaces/knowledge_base/providers/gemini.py
new file mode 100644
index 00000000000..33e2bc314d0
--- /dev/null
+++ b/mindsdb/interfaces/knowledge_base/providers/gemini.py
@@ -0,0 +1,79 @@
+from typing import Any, List, Optional
+
+
+class GeminiClient:
+    """Wrapper around google-genai SDK"""
+
+    def __init__(self, api_key: str):
+        try:
+            from google import genai
+            from google.genai import types
+        except ImportError as exc:
+            raise ImportError("google.genai is required. Install it with `pip install google-genai`.") from exc
+
+        self.client = genai.Client(api_key=api_key)
+        self.types = types
+
+    def embeddings(self, model_name: str, messages: List[str]) -> List[List[float]]:
+        """Generate embedding vectors for each text in `messages`."""
+        result = self.client.models.embed_content(model=model_name, contents=messages)
+
+        return [item.values for item in result.embeddings]
+
+    def _prepare_messages(self, messages: List[dict]) -> List[Any]:
+        """Convert chat messages into google-genai content payloads."""
+        contents = []
+        for message in messages:
+            role = message["role"]
+            # system role is not supported
+            if role != "user":
+                role = "model"
+
+            contents.append(self.types.Content(role=role, parts=[self.types.Part(text=message["content"])]))
+        return contents
+
+    def completion(
+        self,
+        model_name: str,
+        messages: List[dict],
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_p: Optional[float] = None,
+    ) -> str:
+        """Produce a chat response"""
+        config = {}
+        if temperature:
+            config["temperature"] = temperature
+        if max_tokens:
+            config["max_output_tokens"] = max_tokens
+        if top_p:
+            config["top_p"] = top_p
+
+        contents = self._prepare_messages(messages)
+
+        result = self.client.models.generate_content(model=model_name, contents=contents, config=config)
+
+        return result.text
+
+    async def acompletion(
+        self,
+        model_name: str,
+        messages: List[dict],
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_p: Optional[float] = None,
+    ) -> str:
+        """Async variant of `completion` using the SDK aio client."""
+        config = {}
+        if temperature:
+            config["temperature"] = temperature
+        if max_tokens:
+            config["max_output_tokens"] = max_tokens
+        if top_p:
+            config["top_p"] = top_p
+
+        contents = self._prepare_messages(messages)
+
+        result = await self.client.aio.models.generate_content(model=model_name, contents=contents, config=config)
+
+        return result.text
diff --git a/mindsdb/interfaces/knowledge_base/providers/snowflake.py b/mindsdb/interfaces/knowledge_base/providers/snowflake.py
new file mode 100644
index 00000000000..7bcc3fc6382
--- /dev/null
+++ b/mindsdb/interfaces/knowledge_base/providers/snowflake.py
@@ -0,0 +1,118 @@
+from typing import Dict, List, Optional, Union
+
+import requests
+import httpx
+
+
+def _raise_for_status(response: Union[requests.Response, httpx.Response]) -> None:
+    """Raise an informative HTTPError when Snowflake responds with an error."""
+    if 400 <= response.status_code < 600:
+        if hasattr(response, "reason"):
+            reason = response.reason
+        elif hasattr(response, "reason_phrase"):
+            reason = response.reason_phrase
+        else:
+            reason = "Error"
+        raise requests.HTTPError(f"{reason}: {response.text}", response=response)
+
+
+class SnowflakeClient:
+    """Wrapper over Snowflake Cortex REST endpoints."""
+
+    def __init__(self, account_id: Optional[str] = None, api_key: Optional[str] = None):
+        if account_id is None:
+            raise ValueError("account_id must be provided")
+        if api_key is None:
+            raise ValueError("api_key must be provided")
+
+        self.account_id = account_id.lower()
+        self.api_key = api_key
+
+        self.auth_type = "KEYPAIR_JWT"
+        if self.api_key.startswith("pat/"):
+            self.api_key = self.api_key[4:]
+            self.auth_type = "PROGRAMMATIC_ACCESS_TOKEN"
+
+    def _get_base_url(self) -> str:
+        return f"https://{self.account_id}.snowflakecomputing.com/api/v2"
+
+    def _get_headers(self) -> Dict[str, str]:
+        return {
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+            "Authorization": "Bearer " + self.api_key,
+            "X-Snowflake-Authorization-Token-Type": self.auth_type,
+        }
+
+    def embeddings(self, model_name: str, messages: List[str]) -> List[List[float]]:
+        """Request embedding vectors for the provided `messages`."""
+        url = f"{self._get_base_url()}/cortex/inference:embed"
+
+        payload = {"text": messages, "model": model_name}
+
+        response = requests.post(url, json=payload, headers=self._get_headers())
+        _raise_for_status(response)
+
+        embeddings = []
+        for item in response.json()["data"]:
+            embeddings.append(item["embedding"][0])
+        return embeddings
+
+    def completion(
+        self,
+        model_name: str,
+        messages: List[dict],
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_p: Optional[float] = None,
+    ) -> str:
+        """Generate a chat completion with the Cortex complete endpoint."""
+        url = f"{self._get_base_url()}/cortex/inference:complete"
+
+        payload = {
+            "model": model_name,
+            "stream": False,
+            "messages": messages,
+        }
+
+        if temperature:
+            payload["temperature"] = temperature
+        if max_tokens:
+            payload["max_tokens"] = max_tokens
+        if top_p:
+            payload["top_p"] = top_p
+
+        response = requests.post(url, json=payload, headers=self._get_headers())
+        _raise_for_status(response)
+        data = response.json()
+        return data["choices"][0]["message"]["content"]
+
+    async def acompletion(
+        self,
+        model_name: str,
+        messages: List[dict],
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_p: Optional[float] = None,
+    ) -> str:
+        """Async variant of `completion` using httpx."""
+        url = f"{self._get_base_url()}/cortex/inference:complete"
+
+        payload = {
+            "model": model_name,
+            "stream": False,
+            "messages": messages,
+        }
+
+        if temperature:
+            payload["temperature"] = temperature
+        if max_tokens:
+            payload["max_tokens"] = max_tokens
+        if top_p:
+            payload["top_p"] = top_p
+
+        async with httpx.AsyncClient() as client:
+            response = await client.post(url, json=payload, headers=self._get_headers())
+            _raise_for_status(response)
+            data = response.json()
+            return data["choices"][0]["message"]["content"]
diff --git a/mindsdb/interfaces/query_context/context_controller.py b/mindsdb/interfaces/query_context/context_controller.py
index 97a1ec83189..08188c8ed66 100644
--- a/mindsdb/interfaces/query_context/context_controller.py
+++ b/mindsdb/interfaces/query_context/context_controller.py
@@ -1,9 +1,9 @@
-from typing import List, Optional, Iterable
 import pickle
 import datetime as dt
+from typing import List, Optional, Iterable
 
-from sqlalchemy.orm.attributes import flag_modified
 import pandas as pd
+from sqlalchemy.orm.attributes import flag_modified
 
 from mindsdb_sql_parser import Select, Star, OrderBy
 
@@ -17,7 +17,6 @@
 )
 from mindsdb.integrations.utilities.query_traversal import query_traversal
 from mindsdb.utilities.cache import get_cache
-
 from mindsdb.interfaces.storage import db
 from mindsdb.utilities.context import context as ctx
 from mindsdb.utilities.config import config
@@ -70,14 +69,14 @@ def get_partitions(self, dn, step_call, query: Select) -> Iterable:
         :param query: AST query to execute
         :return: generator with query results
         """
-        if hasattr(dn, "has_support_stream") and dn.has_support_stream():
+        if dn.has_support_stream():
             query2 = self.get_partition_query(step_call.current_step_num, query, stream=True)
 
-            for df in dn.query_stream(query2, fetch_size=self.batch_size):
+            response = dn.query(query=query2, session=step_call.session)
+            for df in response.iterate_no_save():
                 max_track_value = self.get_max_track_value(df)
                 yield df
                 self.set_progress(max_track_value=max_track_value)
-
         else:
             while True:
                 query2 = self.get_partition_query(step_call.current_step_num, query, stream=False)
@@ -457,7 +456,7 @@ def _get_init_last_values(self, l_query: LastQuery, dn, session) -> dict:
 
                 idx = None
                 for i, col in enumerate(columns_info):
-                    if col["name"].upper() == info["column_name"].upper():
+                    if col.name.upper() == info["column_name"].upper():
                         idx = i
                         break
 
diff --git a/mindsdb/interfaces/query_context/last_query.py b/mindsdb/interfaces/query_context/last_query.py
index 1df233d4405..7e00a08c846 100644
--- a/mindsdb/interfaces/query_context/last_query.py
+++ b/mindsdb/interfaces/query_context/last_query.py
@@ -3,7 +3,17 @@
 from collections import defaultdict
 
 from mindsdb_sql_parser.ast import (
-    Identifier, Select, BinaryOperation, Last, Constant, Star, ASTNode, NullConstant, OrderBy, Function, TypeCast
+    Identifier,
+    Select,
+    BinaryOperation,
+    Last,
+    Constant,
+    Star,
+    ASTNode,
+    NullConstant,
+    OrderBy,
+    Function,
+    TypeCast,
 )
 from mindsdb.integrations.utilities.query_traversal import query_traversal
 
@@ -34,21 +44,21 @@ def __init__(self, query: ASTNode):
 
     def _find_last_columns(self, query: ASTNode) -> Union[dict, None]:
         """
-          This function:
-           - Searches LAST column in the input query
-           - Replaces it with constants and memorises link to these constants
-           - Link to constants will be used to inject values to query instead of LAST
-           - Provide checks:
-             - if it is possible to find the table for column
-             - if column in select target
-           - Generates and returns last_column variable which is dict
-                last_columns[table_name] = {
-                    'table': ,
-                    'column': ,
-                    'links': [, ... ],
-                    'target_idx': ,
-                    'gen_init_query': if true: to generate query to initial values for LAST
-                }
+        This function:
+         - Searches LAST column in the input query
+         - Replaces it with constants and memorises link to these constants
+         - Link to constants will be used to inject values to query instead of LAST
+         - Provide checks:
+           - if it is possible to find the table for column
+           - if column in select target
+         - Generates and returns last_column variable which is dict
+              last_columns[table_name] = {
+                  'table': 
, + 'column': , + 'links': [, ... ], + 'target_idx': , + 'gen_init_query': if true: to generate query to initial values for LAST + } """ # index last variables in query @@ -76,7 +86,6 @@ def replace_last_in_tree(node: ASTNode, injected: Constant): return found def index_query(node, is_table, parent_query, **kwargs): - parent_query_id = id(parent_query) last = None if is_table and isinstance(node, Identifier): @@ -105,13 +114,15 @@ def index_query(node, is_table, parent_query, **kwargs): if last is not None: # memorize - conditions.append({ - 'query_id': parent_query_id, - 'condition': node, - 'last': last, - 'column': col, - 'gen_init_query': gen_init_query # generate query to fetch initial last values from table - }) + conditions.append( + { + "query_id": parent_query_id, + "condition": node, + "last": last, + "column": col, + "gen_init_query": gen_init_query, # generate query to fetch initial last values from table + } + ) # find lasts query_traversal(query, index_query) @@ -122,7 +133,7 @@ def index_query(node, is_table, parent_query, **kwargs): self.query_orig = copy.deepcopy(query) for info in conditions: - self.last_idx[info['query_id']].append(info) + self.last_idx[info["query_id"]].append(info) # index query targets query_id = id(query) @@ -152,21 +163,20 @@ def index_query(node, is_table, parent_query, **kwargs): last_columns = {} for parent_query_id, items in self.last_idx.items(): for info in items: - col = info['column'] - last = info['last'] + col = info["column"] + last = info["last"] tables = tables_idx[parent_query_id] uniq_tables = len(set([id(v) for v in tables.values()])) if len(col.parts) > 1: - table = tables.get(col.parts[-2]) if table is None: - raise ValueError('cant find table') + raise ValueError("cant find table") elif uniq_tables == 1: table = list(tables.values())[0] else: # or just skip it? - raise ValueError('cant find table') + raise ValueError("cant find table") col_name = col.parts[-1] @@ -179,29 +189,46 @@ def index_query(node, is_table, parent_query, **kwargs): # will try to get by name ... else: - raise ValueError('Last value should be in query target') + raise ValueError("Last value should be in query target") last_columns[table_name] = { - 'table': table, - 'column': col_name, - 'links': [last], - 'target_idx': target_idx, - 'gen_init_query': info['gen_init_query'] + "table": table, + "column": col_name, + "links": [last], + "target_idx": target_idx, + "gen_init_query": info["gen_init_query"], } - elif last_columns[table_name]['column'] == col_name: - last_columns[table_name]['column'].append(last) + elif last_columns[table_name]["column"] == col_name: + last_columns[table_name]["column"].append(last) else: - raise ValueError('possible to use only one column') + raise ValueError("possible to use only one column") return last_columns def to_string(self) -> str: """ - String representation of the query - Used to identify query in query_context table + String representation of the query + Used to identify query in query_context table """ - return self.query_orig.to_string() + query = self.query_orig + if isinstance(query.from_table, Select) and query.targets == [Star()]: + # simplify nested query + if ( + query.group_by is None + and query.order_by is None + and query.having is None + and query.distinct is False + and query.where is None + and query.limit is None + and query.offset is None + and query.cte is None + ): + query = copy.deepcopy(query.from_table) + query.parentheses = False + query.alias = None + + return query.to_string() def get_last_columns(self) -> List[dict]: """ @@ -210,11 +237,11 @@ def get_last_columns(self) -> List[dict]: """ return [ { - 'table': info['table'], - 'table_name': table_name, - 'column_name': info['column'], - 'target_idx': info['target_idx'], - 'gen_init_query': info['gen_init_query'], + "table": info["table"], + "table_name": table_name, + "column_name": info["column"], + "target_idx": info["target_idx"], + "gen_init_query": info["gen_init_query"], } for table_name, info in self.last_tables.items() ] @@ -224,8 +251,8 @@ def apply_values(self, values: dict) -> ASTNode: Fills query with new values and return it """ for table_name, info in self.last_tables.items(): - value = values.get(table_name, {}).get(info['column']) - for last in info['links']: + value = values.get(table_name, {}).get(info["column"]) + for last in info["links"]: last.value = value return self.query @@ -239,9 +266,9 @@ def get_init_queries(self): # replace values for items in self.last_idx.values(): for info in items: - node = info['condition'] + node = info["condition"] back_up_values.append([node.op, node.args[1]]) - node.op = 'is not' + node.op = "is not" node.args[1] = NullConstant() query2 = copy.deepcopy(self.query) @@ -249,18 +276,16 @@ def get_init_queries(self): # return values for items in self.last_idx.values(): for info in items: - node = info['condition'] + node = info["condition"] op, arg1 = back_up_values.pop(0) node.op = op node.args[1] = arg1 for info in self.get_last_columns(): - if not info['gen_init_query']: + if not info["gen_init_query"]: continue - col = Identifier(info['column_name']) + col = Identifier(info["column_name"]) query2.targets = [col] - query2.order_by = [ - OrderBy(col, direction='DESC') - ] + query2.order_by = [OrderBy(col, direction="DESC")] query2.limit = Constant(1) yield query2, info diff --git a/mindsdb/interfaces/query_context/query_task.py b/mindsdb/interfaces/query_context/query_task.py index 57cc62d7f81..97cbbdcbf26 100644 --- a/mindsdb/interfaces/query_context/query_task.py +++ b/mindsdb/interfaces/query_context/query_task.py @@ -10,7 +10,6 @@ def __init__(self, *args, **kwargs): self.query_id = self.object_id def run(self, stop_event): - try: session = SessionController() SQLQuery(None, query_id=self.query_id, session=session, stop_event=stop_event) diff --git a/mindsdb/interfaces/tasks/task_thread.py b/mindsdb/interfaces/tasks/task_thread.py index f753a59928a..8b9eb7ca9e5 100644 --- a/mindsdb/interfaces/tasks/task_thread.py +++ b/mindsdb/interfaces/tasks/task_thread.py @@ -23,6 +23,9 @@ def run(self): # create context and session task_record = db.Tasks.query.get(self.task_id) + if task_record is None: + logger.error(f"Task record not found: {self.task_id}") + return ctx.set_default() ctx.company_id = task_record.company_id diff --git a/mindsdb/utilities/config.py b/mindsdb/utilities/config.py index 82a857b00c7..e3fff3bf023 100644 --- a/mindsdb/utilities/config.py +++ b/mindsdb/utilities/config.py @@ -13,6 +13,49 @@ # NOTE do not `import from mindsdb` here +def get_bool_env_var(env_name: str) -> bool: + """Read an environment variable and return its value as a boolean. + + Args: + env_name (str): name of the environment variable to read. + + Returns: + bool: True or False, or None if the variable is not set or empty. + + Raises: + ValueError: if the value is set but does not match any known boolean representation. + """ + value = os.environ.get(env_name) + if value is None or value == "": + return None + match value.lower(): + case "1" | "true" | "on" | "yes" | "y": + value = True + case "0" | "false" | "off" | "no" | "n": + value = False + case _: + raise ValueError(f"Expected a boolean value for the environment variable '{env_name}', but got '{value}'") + return value + + +def get_list_env_var(env_name: str) -> list[str]: + """Read an environment variable and return its value as a list of strings. + + The value is expected to be a comma-separated string. Whitespace around + each item is stripped, and empty items are ignored. + + Args: + env_name (str): name of the environment variable to read. + + Returns: + list[str]: list of non-empty strings, or None if the variable is not set or empty. + """ + value = os.environ.get(env_name) + if value is None or value.strip() == "": + return None + return [item.strip() for item in value.split(",") if item.strip()] + + def _merge_key_recursive(target_dict, source_dict, key): if key not in target_dict: target_dict[key] = source_dict[key] @@ -155,6 +198,7 @@ def __new__(cls, *args, **kwargs) -> "Config": "http_permanent_session_lifetime": datetime.timedelta(days=31), "username": "mindsdb", "password": "", + "token": None, # MINDSDB_AUTH_TOKEN }, "logging": { "handlers": { @@ -199,6 +243,27 @@ def __new__(cls, *args, **kwargs) -> "Config": "host": "0.0.0.0", # API server binds to all interfaces by default "port": "8000", }, + "mcp": { + "cors": { + "enabled": True, + "allow_origins": [], + "allow_origin_regex": r"https?://(localhost|127\.0\.0\.1)(:\d+)?", + "allow_headers": ["*"], + }, + "rate_limit": { + "enabled": False, + "requests_per_minute": 60, + }, + "oauth": { + "enabled": False, # MINDSDB_MCP_OAUTH_ENABLED + "issuer_url": "", # MINDSDB_MCP_OAUTH_ISSUER_URL + "client_id": "", # MINDSDB_MCP_OAUTH_CLIENT_ID + "client_secret": "", # MINDSDB_MCP_OAUTH_CLIENT_SECRET + "scope": "mcp:tools", # MINDSDB_MCP_OAUTH_SCOPE + "public_url": "", # MINDSDB_MCP_OAUTH_PUBLIC_URL + }, + "dns_rebinding_protection": False, # MINDSDB_MCP_DNS_REBINDING_PROTECTION + }, }, "cache": {"type": "local"}, "ml_task_queue": {"type": "local"}, @@ -215,6 +280,9 @@ def __new__(cls, *args, **kwargs) -> "Config": "data_catalog": { "enabled": False, }, + "data_stream": { + "fetch_size": 10000, + }, "byom": { "enabled": False, }, @@ -223,6 +291,7 @@ def __new__(cls, *args, **kwargs) -> "Config": "knowledge_bases": { "disable_autobatch": False, "disable_pgvector_autobatch": True, + "storage": None, }, } # endregion @@ -246,13 +315,17 @@ def prepare_env_config(self) -> None: """Collect config values from env vars to self._env_config""" self._env_config = { "logging": {"handlers": {"console": {}, "file": {}}}, - "api": {"http": {}}, + "api": { + "http": {}, + "mcp": {"cors": {}, "rate_limit": {}, "oauth": {}}, + }, "auth": {}, "paths": {}, "permanent_storage": {}, "ml_task_queue": {}, "gui": {}, "byom": {}, + "knowledge_bases": {}, } # region storage root path @@ -312,6 +385,10 @@ def prepare_env_config(self) -> None: elif http_auth_type != "": raise ValueError(f"Wrong value of env var MINDSDB_HTTP_AUTH_TYPE={http_auth_type}") + mindsdb_auth_token = os.environ.get("MINDSDB_AUTH_TOKEN", "") + if mindsdb_auth_token != "": + self._env_config["auth"]["token"] = mindsdb_auth_token + # region logging if os.environ.get("MINDSDB_LOG_LEVEL", "") != "": self._env_config["logging"]["handlers"]["console"]["level"] = os.environ["MINDSDB_LOG_LEVEL"] @@ -398,20 +475,16 @@ def prepare_env_config(self) -> None: if "default_reranking_model" not in self._env_config: self._env_config["default_reranking_model"] = {} self._env_config["default_reranking_model"].update(reranker_config) - if os.environ.get("MINDSDB_DATA_CATALOG_ENABLED", "").lower() in ("1", "true"): + if get_bool_env_var("MINDSDB_DATA_CATALOG_ENABLED") is True: self._env_config["data_catalog"] = {"enabled": True} - if os.environ.get("MINDSDB_NO_STUDIO", "").lower() in ("1", "true"): + if get_bool_env_var("MINDSDB_NO_STUDIO") is True: self._env_config["gui"]["open_on_start"] = False self._env_config["gui"]["autoupdate"] = False - mindsdb_gui_autoupdate = os.environ.get("MINDSDB_GUI_AUTOUPDATE", "").lower() - if mindsdb_gui_autoupdate in ("0", "false"): - self._env_config["gui"]["autoupdate"] = False - elif mindsdb_gui_autoupdate in ("1", "true"): - self._env_config["gui"]["autoupdate"] = True - elif mindsdb_gui_autoupdate != "": - raise ValueError(f"Wrong value of env var MINDSDB_GUI_AUTOUPDATE={mindsdb_gui_autoupdate}") + mindsdb_gui_autoupdate = get_bool_env_var("MINDSDB_GUI_AUTOUPDATE") + if mindsdb_gui_autoupdate is not None: + self._env_config["gui"]["autoupdate"] = mindsdb_gui_autoupdate if os.environ.get("MINDSDB_PID_FILE_CONTENT", "") != "": try: @@ -427,6 +500,51 @@ def prepare_env_config(self) -> None: elif mindsdb_byom_enabled != "": raise ValueError(f"Wrong value of env var MINDSDB_BYOM_ENABLED={mindsdb_byom_enabled}") + # region MCP config + mindsdb_mcp_enabled = get_bool_env_var("MINDSDB_MCP_CORS_ENABLED") + if mindsdb_mcp_enabled is not None: + self._env_config["api"]["mcp"]["cors"]["enabled"] = mindsdb_mcp_enabled + mindsdb_mcp_allow_origins = get_list_env_var("MINDSDB_MCP_ALLOW_ORIGINS") + if isinstance(mindsdb_mcp_allow_origins, list): + self._env_config["api"]["mcp"]["cors"]["allow_origins"] = mindsdb_mcp_allow_origins + mindsdb_mcp_allow_headers = get_list_env_var("MINDSDB_MCP_ALLOW_HEADERS") + if isinstance(mindsdb_mcp_allow_headers, list): + self._env_config["api"]["mcp"]["cors"]["allow_headers"] = mindsdb_mcp_allow_headers + mindsdb_mcp_allow_origin_regex = os.environ.get("MINDSDB_MCP_ALLOW_ORIGIN_REGEXP", "") + if mindsdb_mcp_allow_origin_regex != "": + self._env_config["api"]["mcp"]["cors"]["allow_origin_regex"] = mindsdb_mcp_allow_origin_regex + mindsdb_mcp_rate_limit_enabled = get_bool_env_var("MINDSDB_MCP_RATE_LIMIT_ENABLED") + if mindsdb_mcp_rate_limit_enabled is not None: + self._env_config["api"]["mcp"]["rate_limit"]["enabled"] = mindsdb_mcp_rate_limit_enabled + mindsdb_mcp_rate_limit_rpm = os.environ.get("MINDSDB_MCP_RATE_LIMIT_RPM", "") + if mindsdb_mcp_rate_limit_rpm != "": + self._env_config["api"]["mcp"]["rate_limit"]["requests_per_minute"] = int(mindsdb_mcp_rate_limit_rpm) + + mindsdb_mcp_oauth_enabled = get_bool_env_var("MINDSDB_MCP_OAUTH_ENABLED") + if mindsdb_mcp_oauth_enabled is not None: + self._env_config["api"]["mcp"]["oauth"]["enabled"] = mindsdb_mcp_oauth_enabled + mindsdb_mcp_oauth_issuer_url = os.environ.get("MINDSDB_MCP_OAUTH_ISSUER_URL", "") + if mindsdb_mcp_oauth_issuer_url != "": + self._env_config["api"]["mcp"]["oauth"]["issuer_url"] = mindsdb_mcp_oauth_issuer_url + mindsdb_mcp_oauth_client_id = os.environ.get("MINDSDB_MCP_OAUTH_CLIENT_ID", "") + if mindsdb_mcp_oauth_client_id != "": + self._env_config["api"]["mcp"]["oauth"]["client_id"] = mindsdb_mcp_oauth_client_id + mindsdb_mcp_oauth_client_secret = os.environ.get("MINDSDB_MCP_OAUTH_CLIENT_SECRET", "") + if mindsdb_mcp_oauth_client_secret != "": + self._env_config["api"]["mcp"]["oauth"]["client_secret"] = mindsdb_mcp_oauth_client_secret + mindsdb_mcp_oauth_scope = os.environ.get("MINDSDB_MCP_OAUTH_SCOPE", "") + if mindsdb_mcp_oauth_scope != "": + self._env_config["api"]["mcp"]["oauth"]["scope"] = mindsdb_mcp_oauth_scope + mindsdb_mcp_oauth_public_url = os.environ.get("MINDSDB_MCP_OAUTH_PUBLIC_URL", "") + if mindsdb_mcp_oauth_public_url != "": + self._env_config["api"]["mcp"]["oauth"]["public_url"] = mindsdb_mcp_oauth_public_url + mindsdb_mcp_dns_rebinding_protection = get_bool_env_var("MINDSDB_MCP_DNS_REBINDING_PROTECTION") + if mindsdb_mcp_dns_rebinding_protection is not None: + self._env_config["api"]["mcp"]["dns_rebinding_protection"] = mindsdb_mcp_dns_rebinding_protection + # endregion + + # Keep env-based KB defaults out of config.auto.json overrides. + def fetch_auto_config(self) -> bool: """Load dict readed from config.auto.json to `auto_config`. Do it only if `auto_config` was not loaded before or config.auto.json been changed. @@ -589,6 +707,7 @@ def parse_cmd_args(self) -> None: agent=None, project=None, update_gui=False, + mcp_stdio=False, ) return @@ -615,7 +734,7 @@ def parse_cmd_args(self) -> None: parser.add_argument("--project-name", type=str, default=None, help="MindsDB project name") parser.add_argument("--update-gui", action="store_true", default=False, help="Update GUI and exit") - parser.add_argument("--load-tokenizer", action="store_true", default=False, help="Preload tokenizer and exit") + parser.add_argument("--mcp-stdio", action="store_true", default=False, help="Run MCP with STDIO transport") self._cmd_args = parser.parse_args() diff --git a/mindsdb/utilities/fs.py b/mindsdb/utilities/fs.py index 2462960acca..dbed99cc8be 100644 --- a/mindsdb/utilities/fs.py +++ b/mindsdb/utilities/fs.py @@ -1,4 +1,5 @@ import os +import sys import json import time import tempfile @@ -6,6 +7,9 @@ from pathlib import Path from typing import Generator +import tarfile +import zipfile + import psutil from mindsdb.utilities import log @@ -127,6 +131,70 @@ def clean_unlinked_process_marks() -> list[int]: return deleted_pids +class PidFileLock: + """Cross-platform exclusive file lock context manager. + Uses fcntl.flock on Unix and msvcrt.locking on Windows. + + Attributes: + _lock_file_path (Path): path to lock file + _blocking (bool): if True, waits until the lock becomes available, otherwise raises OSError immediately if lock is held + _fh (int): lock file descriptor + """ + + def __init__(self, lock_file_path: Path, blocking: bool = True): + self._lock_file_path = lock_file_path + self._blocking = blocking + self._fh = None + + def __enter__(self): + self._lock_file_path.parent.mkdir(parents=True, exist_ok=True) + self._fh = open(self._lock_file_path, "a+") + try: + if sys.platform == "win32": + import msvcrt + + # NOTE if file is locked, LK_LOCK will raise OSError after 10 seconds, LK_NBLCK immediately + mode = msvcrt.LK_LOCK if self._blocking else msvcrt.LK_NBLCK + self._fh.seek(0) + msvcrt.locking(self._fh.fileno(), mode, 1) + else: + import fcntl + + flags = fcntl.LOCK_EX + if not self._blocking: + flags |= fcntl.LOCK_NB + fcntl.flock(self._fh.fileno(), flags) + except (OSError, IOError): + self._fh.close() + self._fh = None + logger.error(f"Failed to acquire lock on {self._lock_file_path}") + raise + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if self._fh is None: + return False + try: + if sys.platform == "win32": + import msvcrt + + self._fh.seek(0) + msvcrt.locking(self._fh.fileno(), msvcrt.LK_UNLCK, 1) + else: + import fcntl + + fcntl.flock(self._fh.fileno(), fcntl.LOCK_UN) + except (OSError, IOError): + pass + finally: + try: + self._fh.close() + except (OSError, IOError): + pass + self._fh = None + return False + + def create_pid_file(config): """ Create mindsdb process pid file. Check if previous process exists and is running @@ -140,48 +208,49 @@ def create_pid_file(config): p = get_tmp_dir() p.mkdir(parents=True, exist_ok=True) pid_file = p.joinpath("pid") - if pid_file.exists(): - # if process exists raise exception - pid_file_data_str = pid_file.read_text().strip() - pid = None - try: - pid_file_data = json.loads(pid_file_data_str) - if isinstance(pid_file_data, dict): - pid = pid_file_data.get("pid") - else: - pid = pid_file_data - except json.JSONDecodeError: - # is it just pid number (old approach)? - try: - pid = int(pid_file_data_str) - except Exception: - pass - logger.warning(f"Found existing PID file {pid_file} but it is not a valid JSON, removing") + lock_file = p.joinpath("pid.lock") - if pid is not None: + with PidFileLock(lock_file): + if pid_file.exists(): + pid_file_data_str = pid_file.read_text().strip() + pid = None try: - psutil.Process(int(pid)) - raise Exception(f"Found PID file with existing process: {pid} {pid_file}") - except (psutil.Error, ValueError): - pass - logger.warning(f"Found existing PID file {pid_file}({pid}), removing") - - pid_file.unlink(missing_ok=True) - - pid_file_content = config["pid_file_content"] - if pid_file_content is None or len(pid_file_content) == 0: - pid_file_data_str = str(os.getpid()) - else: - pid_file_data = {"pid": os.getpid()} - for key, value in pid_file_content.items(): - value_path = value.split(".") - value_obj = config - for path_part in value_path: - value_obj = value_obj.get(path_part) if value_obj else None - pid_file_data[key] = value_obj + pid_file_data = json.loads(pid_file_data_str) + if isinstance(pid_file_data, dict): + pid = pid_file_data.get("pid") + else: + pid = pid_file_data + except json.JSONDecodeError: + try: + pid = int(pid_file_data_str) + except Exception: + pass + logger.warning(f"Found existing PID file {pid_file} but it is not a valid JSON, removing") + + if pid is not None: + try: + psutil.Process(int(pid)) + raise Exception(f"Found PID file with existing process: {pid} {pid_file}") + except (psutil.Error, ValueError): + pass + logger.warning(f"Found existing PID file {pid_file}({pid}), removing") + + pid_file.unlink(missing_ok=True) + + pid_file_content = config["pid_file_content"] + if pid_file_content is None or len(pid_file_content) == 0: + pid_file_data_str = str(os.getpid()) + else: + pid_file_data = {"pid": os.getpid()} + for key, value in pid_file_content.items(): + value_path = value.split(".") + value_obj = config + for path_part in value_path: + value_obj = value_obj.get(path_part) if value_obj else None + pid_file_data[key] = value_obj - pid_file_data_str = json.dumps(pid_file_data) - pid_file.write_text(pid_file_data_str) + pid_file_data_str = json.dumps(pid_file_data) + pid_file.write_text(pid_file_data_str) def delete_pid_file(): @@ -193,45 +262,84 @@ def delete_pid_file(): return pid_file = get_tmp_dir().joinpath("pid") + lock_file = get_tmp_dir().joinpath("pid.lock") - if not pid_file.exists(): - return + with PidFileLock(lock_file): + if not pid_file.exists(): + return - pid_file_data_str = pid_file.read_text().strip() - pid = None - try: - pid_file_data = json.loads(pid_file_data_str) - if isinstance(pid_file_data, dict): - pid = pid_file_data.get("pid") - else: - # It's a simple number (old format or pid_file_content=None format) - pid = pid_file_data - except json.JSONDecodeError: - logger.warning(f"Found existing PID file {pid_file} but it is not a valid JSON") + pid_file_data_str = pid_file.read_text().strip() + pid = None + try: + pid_file_data = json.loads(pid_file_data_str) + if isinstance(pid_file_data, dict): + pid = pid_file_data.get("pid") + else: + # It's a simple number (old format or pid_file_content=None format) + pid = pid_file_data + except json.JSONDecodeError: + logger.warning(f"Found existing PID file {pid_file} but it is not a valid JSON") - if pid is not None and str(pid) != str(os.getpid()): - logger.warning(f"Process id in PID file ({pid_file}) doesn't match mindsdb pid") - return + if pid is not None and str(pid) != str(os.getpid()): + logger.warning(f"Process id in PID file ({pid_file}) doesn't match mindsdb pid") + return - pid_file.unlink(missing_ok=True) + pid_file.unlink(missing_ok=True) def __is_within_directory(directory, target): - abs_directory = os.path.abspath(directory) - abs_target = os.path.abspath(target) - prefix = os.path.commonprefix([abs_directory, abs_target]) - return prefix == abs_directory + abs_directory = os.path.realpath(directory) + abs_target = os.path.realpath(target) + try: + return os.path.commonpath([abs_directory, abs_target]) == abs_directory + except ValueError: + # can be raised on windows + return False + + +def __get_tar_members(archivefile, members): + if members is None: + return archivefile.getmembers() + + resolved_members = [] + for member in members: + if isinstance(member, tarfile.TarInfo): + resolved_members.append(member) + else: + resolved_members.append(archivefile.getmember(member)) + return resolved_members -def safe_extract(tarfile, path=".", members=None, *, numeric_owner=False): - # for py >= 3.12 - if hasattr(tarfile, "data_filter"): - tarfile.extractall(path, members=members, numeric_owner=numeric_owner, filter="data") +def safe_extract(archivefile, path=".", members=None, *, numeric_owner=False): + """ + Safely extract an archivefile, preventing path traversal attacks. + """ + if isinstance(archivefile, zipfile.ZipFile): + for member in archivefile.namelist(): + member_path = os.path.join(path, member) + if not __is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Zip File") + archivefile.extractall(path, members) return - # for py < 3.12 - for member in tarfile.getmembers(): - member_path = os.path.join(path, member.name) - if not __is_within_directory(path, member_path): - raise Exception("Attempted Path Traversal in Tar File") - tarfile.extractall(path, members=members, numeric_owner=numeric_owner) + if isinstance(archivefile, tarfile.TarFile): + # for py >= 3.12 + if hasattr(archivefile, "data_filter"): + archivefile.extractall(path, members=members, numeric_owner=numeric_owner, filter="data") + return + + # for py < 3.12 + for member in __get_tar_members(archivefile, members): + if member.issym() or member.islnk(): + raise Exception(f"Security Alert: Link entries are not allowed in tar file: {member.name}") + + if not (member.isfile() or member.isdir()): + raise Exception(f"Security Alert: Unsupported tar member type detected for member: {member.name}") + + member_path = os.path.join(path, member.name) + if not __is_within_directory(path, member_path): + raise Exception( + f"Security Alert: Attempted path traversal in tar file detected for member: {member.name}" + ) + + archivefile.extract(member, path=path, numeric_owner=numeric_owner) diff --git a/mindsdb/utilities/langfuse.py b/mindsdb/utilities/langfuse.py index def4ec98c7e..92320c48d5e 100644 --- a/mindsdb/utilities/langfuse.py +++ b/mindsdb/utilities/langfuse.py @@ -5,8 +5,8 @@ from mindsdb.utilities import log if TYPE_CHECKING: - from langfuse.callback import CallbackHandler - from langfuse.client import StatefulSpanClient + from langfuse._client.span import LangfuseSpan + from langfuse.langchain import CallbackHandler logger = log.getLogger(__name__) @@ -111,6 +111,7 @@ def __init__( public_key=public_key, secret_key=secret_key, host=host, + environment=environment, release=release, debug=debug, timeout=timeout, @@ -145,13 +146,14 @@ def setup_trace( self.set_tags(tags) try: - self.trace = self.client.trace( - name=name, input=input, metadata=self.metadata, tags=self.tags, user_id=user_id, session_id=session_id - ) + # SDK v3+: root observation is a span; trace attributes are set via update_trace. + self.trace = self.client.start_span(name=name, input=input, metadata=self.metadata) + self.trace.update_trace(tags=self.tags, user_id=user_id, session_id=session_id) except Exception: - logger.exception(f"Something went wrong while processing Langfuse trace {self.trace.id}:") + logger.exception("Something went wrong while creating Langfuse trace") + return - logger.info(f"Langfuse trace configured with ID: {self.trace.id}") + logger.info(f"Langfuse trace configured with ID: {self.trace.trace_id}") def get_trace_id(self) -> typing.Optional[str]: """ @@ -166,9 +168,9 @@ def get_trace_id(self) -> typing.Optional[str]: logger.debug("Langfuse trace is not setup.") return "" - return self.trace.id + return self.trace.trace_id - def start_span(self, name: str, input: typing.Optional[typing.Any] = None) -> typing.Optional["StatefulSpanClient"]: + def start_span(self, name: str, input: typing.Optional[typing.Any] = None) -> typing.Optional["LangfuseSpan"]: """ Create span. If Langfuse is disabled, nothing will be done. @@ -181,9 +183,9 @@ def start_span(self, name: str, input: typing.Optional[typing.Any] = None) -> ty logger.debug("Langfuse is disabled.") return None - return self.trace.span(name=name, input=input) + return self.trace.start_span(name=name, input=input) - def end_span_stream(self, span: typing.Optional["StatefulSpanClient"] = None) -> None: + def end_span_stream(self, span: typing.Optional["LangfuseSpan"] = None) -> None: """ End span. If Langfuse is disabled, nothing will happen. Args: @@ -195,10 +197,10 @@ def end_span_stream(self, span: typing.Optional["StatefulSpanClient"] = None) -> return span.end() - self.trace.update() + self.client.flush() def end_span( - self, span: typing.Optional["StatefulSpanClient"] = None, output: typing.Optional[typing.Any] = None + self, span: typing.Optional["LangfuseSpan"] = None, output: typing.Optional[typing.Any] = None ) -> None: """ End trace. If Langfuse is disabled, nothing will be done. @@ -216,8 +218,10 @@ def end_span( logger.debug("Langfuse span is not created.") return - span.end(output=output) - self.trace.update(output=output) + if output is not None: + span.update(output=output) + span.end() + self.trace.update_trace(output=output) metadata = self.metadata or {} @@ -225,9 +229,9 @@ def end_span( # Ensure all batched traces are sent before fetching. self.client.flush() metadata["tool_usage"] = self._get_tool_usage() - self.trace.update(metadata=metadata) + self.trace.update_trace(metadata=metadata) except Exception: - logger.exception(f"Something went wrong while processing Langfuse trace {self.trace.id}:") + logger.exception(f"Something went wrong while processing Langfuse trace {self.trace.trace_id}:") def get_langchain_handler(self) -> typing.Optional["CallbackHandler"]: """ @@ -238,7 +242,13 @@ def get_langchain_handler(self) -> typing.Optional["CallbackHandler"]: logger.debug("Langfuse is disabled.") return None - return self.trace.get_langchain_handler() + try: + from langfuse.langchain import CallbackHandler + except ImportError: + logger.debug("langfuse.langchain CallbackHandler is not available (install langchain extra if needed).") + return None + + return CallbackHandler(public_key=self.public_key) def set_metadata(self, custom_metadata: dict = None) -> None: """ @@ -267,8 +277,8 @@ def _get_tool_usage(self) -> typing.Dict: tool_usage = {} try: - fetched_trace = self.client.get_trace(self.trace.id) - steps = [s.name for s in fetched_trace.observations] + fetched_trace = self.client.api.trace.get(self.trace.trace_id) + steps = [s.name for s in fetched_trace.observations if s.name] for step in steps: if "AgentAction" in step: tool_name = step.split("-")[1] @@ -276,8 +286,8 @@ def _get_tool_usage(self) -> typing.Dict: tool_usage[tool_name] = 0 tool_usage[tool_name] += 1 except TraceNotFoundError: - logger.warning(f"Langfuse trace {self.trace.id} not found") + logger.warning(f"Langfuse trace {self.trace.trace_id} not found") except Exception: - logger.exception(f"Something went wrong while processing Langfuse trace {self.trace.id}:") + logger.exception(f"Something went wrong while processing Langfuse trace {self.trace.trace_id}:") return tool_usage diff --git a/mindsdb/utilities/log.py b/mindsdb/utilities/log.py index 8c76ad9d4ea..2ae311a61da 100644 --- a/mindsdb/utilities/log.py +++ b/mindsdb/utilities/log.py @@ -4,10 +4,15 @@ import logging import threading from typing import Any +import warnings from logging.config import dictConfig from mindsdb.utilities.config import config as app_config +# Suppress Pydantic warnings for third-party libraries +# TODO: Work on a better solution to this +warnings.filterwarnings("ignore", message="Field.*has conflict with protected namespace.*", category=UserWarning) + logging_initialized = False @@ -205,6 +210,7 @@ def get_handlers_config(process_name: str) -> dict: "class": "mindsdb.utilities.log.StreamSanitizingHandler", "formatter": console_handler_config.get("formatter", "default"), "level": console_handler_config_level, + "stream": console_handler_config.get("stream", "ext://sys.stderr"), } file_handler_config = app_config["logging"]["handlers"]["file"] diff --git a/mindsdb/integrations/handlers/aerospike_handler/tests/__init__.py b/mindsdb/utilities/types/__init__.py similarity index 100% rename from mindsdb/integrations/handlers/aerospike_handler/tests/__init__.py rename to mindsdb/utilities/types/__init__.py diff --git a/mindsdb/utilities/types/column.py b/mindsdb/utilities/types/column.py new file mode 100644 index 00000000000..e8d258468d3 --- /dev/null +++ b/mindsdb/utilities/types/column.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass, field, MISSING + +from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE + + +@dataclass(kw_only=True, slots=True) +class Column: + name: str = field(default=MISSING) + alias: str | None = None + table_name: str | None = None + table_alias: str | None = None + type: MYSQL_DATA_TYPE | None = None + database: str | None = None + flags: dict = None + charset: str | None = None + original_type: str | None = None + dtype: str | None = None + + def __post_init__(self): + if self.alias is None: + self.alias = self.name + if self.table_alias is None: + self.table_alias = self.table_name + + def get_hash_name(self, prefix): + table_name = self.table_name if self.table_alias is None else self.table_alias + name = self.name if self.alias is None else self.alias + + name = f"{prefix}_{table_name}_{name}" + return name diff --git a/mindsdb/utilities/utils.py b/mindsdb/utilities/utils.py index 3c9bd09162c..160b03fe79c 100644 --- a/mindsdb/utilities/utils.py +++ b/mindsdb/utilities/utils.py @@ -2,6 +2,8 @@ import re import typing +from pydantic import BaseModel, ValidationError + def parse_csv_attributes(csv_attributes: typing.Optional[str] = "") -> typing.Dict[str, str]: """ @@ -32,3 +34,24 @@ def parse_csv_attributes(csv_attributes: typing.Optional[str] = "") -> typing.Di raise ValueError(f"Failed to parse csv_attributes='{csv_attributes}': {e}") from e return attributes + + +def validate_pydantic_params(params: dict, schema: type[BaseModel], subject: str): + # check names and types + try: + schema.model_validate(params) + except ValidationError as e: + problems = [] + for error in e.errors(): + parameter = ".".join([str(i) for i in error["loc"]]) + param_type = error["type"] + if param_type == "extra_forbidden": + msg = f"Parameter '{parameter}' is not allowed" + else: + msg = f"Error in '{parameter}' (type: {param_type}): {error['msg']}. Input: {repr(error['input'])}" + problems.append(msg) + + msg = "\n".join(problems) + if len(problems) > 1: + msg = "\n" + msg + raise ValueError(f"Problem with {subject} parameters: {msg}") from e diff --git a/requirements/requirements-agents.txt b/requirements/requirements-agents.txt index 83b60d9f496..e96657bb724 100644 --- a/requirements/requirements-agents.txt +++ b/requirements/requirements-agents.txt @@ -1,19 +1,14 @@ -openai<3.0.0,>=2.9.0 - -langchain-community==0.3.27 -langchain-core==0.3.77 -langchain-experimental==0.3.4 - +openai<3.0.0,>=2.11.0 # When using agents, some LLMs may require the 'transformers' library (like Ollama): -transformers >= 4.42.4 +transformers==5.5.0 # Required for KB mindsdb-evaluator == 0.0.21 -litellm==1.63.14 -mcp~=1.10.1 # Required for MCP server +mcp~=1.26.0 # Required for MCP server # A2A requirements httpx==0.28.1 jwcrypto==1.5.6 -typing-extensions==4.14.1 +# fastmcp (via pydantic-ai) requires typing-extensions>=4.15.0 (py-key-value-aio chain) +typing-extensions>=4.15.0,<5 diff --git a/requirements/requirements-kb.txt b/requirements/requirements-kb.txt index eb5adbfaefb..334e7c0f352 100644 --- a/requirements/requirements-kb.txt +++ b/requirements/requirements-kb.txt @@ -1,4 +1,2 @@ lxml==5.3.0 # Is this transitive dependency? -pgvector==0.3.6 # Required for knowledge bases -langchain-core==0.3.77 -litellm==1.63.14 \ No newline at end of file +faiss-cpu==1.13.2 # default vector storage diff --git a/requirements/requirements-langfuse.txt b/requirements/requirements-langfuse.txt index fffecd7da86..7cd73e32d75 100644 --- a/requirements/requirements-langfuse.txt +++ b/requirements/requirements-langfuse.txt @@ -1 +1 @@ -langfuse==2.53.3 # Latest as of November 4, 2024 \ No newline at end of file +langfuse==3.2.5 \ No newline at end of file diff --git a/requirements/requirements-opentelemetry.txt b/requirements/requirements-opentelemetry.txt index eae7c0601c4..0b262f9b35a 100644 --- a/requirements/requirements-opentelemetry.txt +++ b/requirements/requirements-opentelemetry.txt @@ -1,6 +1,6 @@ -opentelemetry-api==1.27.0 -opentelemetry-sdk==1.27.0 -opentelemetry-exporter-otlp==1.27.0 -opentelemetry-instrumentation-requests==0.48b0 -opentelemetry-instrumentation-flask==0.48b0 -opentelemetry-distro==0.48b0 \ No newline at end of file +opentelemetry-api==1.39.1 +opentelemetry-sdk==1.39.1 +opentelemetry-exporter-otlp==1.39.1 +opentelemetry-instrumentation-requests==0.60b1 +opentelemetry-instrumentation-flask==0.60b1 +opentelemetry-distro==0.60b1 \ No newline at end of file diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index 58f887f4881..60607799622 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -15,9 +15,10 @@ ollama >= 0.1.7 # Langchain tests anthropic >= 0.21.3 # Langchain tests langchain-google-genai>=2.0.0 # Langchain tests mindsdb-sdk -filelock==3.20.1 +filelock==3.20.3 mysql-connector-python==9.1.0 walrus==0.9.3 pymongo == 4.8.0 pytest-json-report==1.5.0 -appdirs >= 1.0.0 \ No newline at end of file +appdirs >= 1.0.0 +pgvector==0.3.6 # Required for knowledge bases tests diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 57be65faec2..fbeea1e13b1 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,16 +1,16 @@ packaging -flask == 3.0.3 -werkzeug == 3.0.6 +flask == 3.1.3 +werkzeug == 3.1.6 flask-restx >= 1.3.0, < 2.0.0 -pandas == 2.2.3 -python-multipart == 0.0.20 -cryptography>=35.0 +pandas==2.3.1 +python-multipart == 0.0.26 +cryptography>=46.0.5 psycopg[binary] psutil~=7.0 sqlalchemy >= 2.0.0, < 3.0.0 psycopg2-binary # This is required for using sqlalchemy with postgres alembic >= 1.3.3 -redis >=5.0.0, < 6.0.0 +redis==6.4.0 walrus==0.9.3 flask-compress >= 1.0.0 appdirs >= 1.0.0 @@ -51,7 +51,7 @@ a2wsgi ~= 1.10.10 # WSGI wrapper for flask+starlette starlette>=0.49.1 sse-starlette==2.3.3 pydantic_core>=2.33.2 -pyjwt==2.10.1 +pyjwt==2.12.0 # files reading pymupdf==1.25.2 python-docx>=1.1.0 @@ -59,12 +59,17 @@ python-pptx>=0.6.0 filetype charset-normalizer openpyxl # used by pandas to read txt and xlsx files +xlrd>=2.0.1 # used by pandas to read legacy .xls files aipdf==0.0.6.3 # 0.0.7.0 requires openai>=2.0.0, conflicts with langchain-openai pyarrow<=19.0.0 # used by pandas to read feather files in Files handler -orjson==3.11.3 +orjson==3.11.6 -mind-castle >= 0.4.9 -pydantic-ai>=0.0.14 # Required for Pydantic AI agents +mind-castle==0.5.0 +pydantic-ai>=0.0.14 # Required for Pydantic AI agents (kept <2.0-openai-compatible: our stack uses langchain-openai==0.3.6, engine dropped langchain) bs4 # for rag HTMLDocumentLoader urllib3>=2.6.3 # not directly required, pinned by Snyk to avoid a vulnerability + +# kb providers +aiobotocore==3.4.0 +google-genai==1.70.0 diff --git a/scripts/run_unit_tests.sh b/scripts/run_unit_tests.sh index 28d3dc1c33c..1764d33c231 100755 --- a/scripts/run_unit_tests.sh +++ b/scripts/run_unit_tests.sh @@ -35,18 +35,12 @@ HANDLERS_TO_INSTALL=( timescaledb mssql oracle - slack redshift bigquery - clickhouse web databricks - github - ms_teams statsforecast chromadb - confluence - elasticsearch agents kb ) @@ -217,10 +211,6 @@ for handler in "${HANDLERS_TO_INSTALL[@]}"; do -r requirements/requirements-test.txt \ "${HANDLER_EXTRAS[@]}" - # Install onnxruntime for ChromaDB - echo "Installing onnxruntime..." - uv pip install --force-reinstall onnxruntime==1.20.1 - # Clone parser tests PARSER_VERSION=$(uv pip show mindsdb_sql_parser | grep Version | cut -d ' ' -f 2) if [[ ! -d "parser_tests" ]]; then diff --git a/scripts/test-artifacts.sh b/scripts/test-artifacts.sh new file mode 100755 index 00000000000..f1769663beb --- /dev/null +++ b/scripts/test-artifacts.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "${REPO_ROOT}" + +HANDLERS_TO_INSTALL=' +postgres +mysql +salesforce +snowflake +timescaledb +mssql +oracle +slack +redshift +bigquery +clickhouse +web +databricks +github +ms_teams +statsforecast +chromadb +confluence +' + +HANDLERS_TO_VERIFY=' +mysql +salesforce +postgres +snowflake +timescaledb +mssql +oracle +slack +file +redshift +bigquery +confluence +' + +INSTALL_HANDLERS=() +while IFS= read -r handler; do + handler=${handler//$'\r'/} + [[ -z "${handler}" || "${handler}" =~ ^[[:space:]]*# ]] && continue + INSTALL_HANDLERS+=("${handler}") +done <<< "${HANDLERS_TO_INSTALL}" + +HANDLER_EXTRAS=() +for handler in "${INSTALL_HANDLERS[@]}"; do + HANDLER_EXTRAS+=(".[${handler}]") +done + +uv pip install ".[agents,kb]" \ + -r requirements/requirements-test.txt \ + "${HANDLER_EXTRAS[@]}" + +uv pip install --force-reinstall onnxruntime==1.20.1 + +# Ensure parser tests are present (required for render tests when --runslow) +if [[ ! -d parser_tests ]]; then + git clone --branch v$(uv pip show mindsdb_sql_parser | grep Version | cut -d ' ' -f 2) \ + https://github.com/mindsdb/mindsdb_sql_parser.git parser_tests +fi + +# Run the exact test target used in CI +make unit_tests_slow + +# Generate the extra artifacts produced in CI +HANDLERS_TO_INSTALL="${HANDLERS_TO_INSTALL}" \ +HANDLERS_TO_VERIFY="${HANDLERS_TO_VERIFY}" \ +COVERAGE_FAIL_UNDER="80" \ +COVERAGE_FILE=.coverage.unit \ + uv run tests/scripts/check_handler_coverage.py > pytest-coverage-handlers.txt +COVERAGE_FILE=.coverage.unit uv run coverage html -d reports/htmlcov + +# Collect artifacts in a single directory +ARTIFACT_DIR="tests_artifacts" +mkdir -p "${ARTIFACT_DIR}" + +for artifact in pytest.xml coverage.xml .coverage.unit pytest-coverage.txt pytest-coverage-handlers.txt; do + if [[ -f "${artifact}" ]]; then + mv "${artifact}" "${ARTIFACT_DIR}/" + fi +done + +if [[ -d reports/htmlcov ]]; then + rm -rf "${ARTIFACT_DIR}/htmlcov" + mv reports/htmlcov "${ARTIFACT_DIR}/htmlcov" +fi diff --git a/tests/scripts/check_requirements.py b/tests/scripts/check_requirements.py index 800a7fa1d4c..f3ec6de303e 100644 --- a/tests/scripts/check_requirements.py +++ b/tests/scripts/check_requirements.py @@ -104,10 +104,13 @@ def get_requirements_with_DEP002(path): "langchain-experimental", "lxml", "openpyxl", + "xlrd", "onnxruntime", "litellm", "numba", # required in a few files for the hierarchicalforecast. Otherwise, uv may install an old version. "urllib3", # pinned by Snyk to avoid a vulnerability + "faiss-cpu", + "pyopenssl", ], } @@ -135,7 +138,7 @@ def get_requirements_with_DEP002(path): HUGGINGFACE_DEP002_IGNORE_HANDLER_DEPS = ["torch"] -RAG_DEP002_IGNORE_HANDLER_DEPS = ["sentence-transformers", "faiss-cpu"] +RAG_DEP002_IGNORE_HANDLER_DEPS = ["sentence-transformers"] SOLR_DEP002_IGNORE_HANDLER_DEPS = ["sqlalchemy-solr"] @@ -143,6 +146,8 @@ def get_requirements_with_DEP002(path): CHROMADB_EP002_IGNORE_HANDLER_DEPS = ["onnxruntime"] +FRESHDESK_EP002_IGNORE_HANDLER_DEPS = ["python-freshdesk"] + # The `pyarrow` package is used only if it is installed. # The handler can work without it. SNOWFLAKE_DEP003_IGNORE_HANDLER_DEPS = ["pyarrow"] @@ -158,6 +163,7 @@ def get_requirements_with_DEP002(path): + SOLR_DEP002_IGNORE_HANDLER_DEPS + OPENAI_DEP002_IGNORE_HANDLER_DEPS + CHROMADB_EP002_IGNORE_HANDLER_DEPS + + FRESHDESK_EP002_IGNORE_HANDLER_DEPS ) ) @@ -175,6 +181,7 @@ def get_requirements_with_DEP002(path): "IfxPyDbi", "ingres_sa_dialect", "pyodbc", + "freshdesk", ], # 'tests' is the mindsdb tests folder in the repo root, 'pyarrow' used in snowflake handler "DEP003": DEP003_IGNORE_HANDLER_DEPS, } @@ -195,6 +202,7 @@ def get_requirements_with_DEP002(path): "google-analytics-admin": ["google"], "google-auth": ["google"], "google-cloud-storage": ["google"], + "google-genai": ["google"], "google-auth-oauthlib": ["google_auth_oauthlib"], "google-api-python-client": ["googleapiclient"], "ibm-cos-sdk": ["ibm_boto3", "ibm_botocore"], @@ -252,6 +260,8 @@ def get_requirements_with_DEP002(path): "python-dotenv": ["dotenv"], "pyjwt": ["jwt"], "sklearn": ["scikit-learn"], + "types-aioboto3": ["aioboto3"], + "ag2": ["autogen"], } # We use this to exit with a non-zero status code if any check fails diff --git a/tests/unit/api/http/agents_test.py b/tests/unit/api/http/agents_test.py index a6253132384..bd2532bcd78 100644 --- a/tests/unit/api/http/agents_test.py +++ b/tests/unit/api/http/agents_test.py @@ -27,14 +27,13 @@ def test_prepare(client): @pytest.mark.deprecated( "MindsDB models are no longer used with agents. However, Minds still uses models, so this test is kept for now" ) -def test_post_agent_depreciated(client): +@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") +def test_post_agent_depreciated(check_agent_llm, client): create_request = { "agent": { "name": "test_post_agent_depreciated", - "model_name": "test_model", - "params": {"k1": "v1"}, - "provider": "mindsdb", - "skills": ["test_skill"], + "model": {"provider": "openai", "model_name": "test_model"}, + "params": {"timeout": 10}, } } @@ -45,9 +44,8 @@ def test_post_agent_depreciated(client): expected_agent = { "name": "test_post_agent_depreciated", - "model_name": "test_model", - "provider": "mindsdb", - "params": {"k1": "v1"}, + "model": {"provider": "openai", "model_name": "test_model"}, + "params": {"timeout": 10}, "id": created_agent["id"], "project_id": created_agent["project_id"], "created_at": created_agent["created_at"], @@ -57,7 +55,9 @@ def test_post_agent_depreciated(client): assert created_agent == expected_agent -def test_post_agent(client): +@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") +@patch("mindsdb.interfaces.agents.agents_controller.check_agent_data") +def test_post_agent(check_agent_data, check_agent_llm, client): create_request = { "agent": { "name": "TEST_post_agent", @@ -161,7 +161,9 @@ def test_get_agents_project_not_found(client): assert get_response.status_code == HTTPStatus.NOT_FOUND -def test_get_agent(client): +@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") +@patch("mindsdb.interfaces.agents.agents_controller.check_agent_data") +def test_get_agent(check_agent_data, check_agent_llm, client): create_request = { "agent": { "name": "test_get_agent", @@ -236,13 +238,13 @@ def test_get_agent_project_not_found(client): @pytest.mark.deprecated( "MindsDB models are no longer used with agents. However, Minds still uses models, so this test is kept for now" ) -def test_put_agent_update_depreciated(client): +@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") +def test_put_agent_update_depreciated(check_agent_llm, client): create_request = { "agent": { "name": "test_put_agent_update_depreciated", - "model_name": "test_model", - "params": {"k1": "v1", "k2": "v2"}, - "provider": "mindsdb", + "model": {"provider": "openai", "model_name": "test_model"}, + "params": {"timeout": 10}, } } @@ -251,7 +253,7 @@ def test_put_agent_update_depreciated(client): update_request = { "agent": { - "params": {"k1": "v1.1", "k2": None, "k3": "v3"}, + "params": {"timeout": 20}, } } @@ -262,9 +264,8 @@ def test_put_agent_update_depreciated(client): expected_agent = { "name": "test_put_agent_update_depreciated", - "model_name": "test_model", - "params": {"k1": "v1.1", "k3": "v3"}, - "provider": "mindsdb", + "model": {"provider": "openai", "model_name": "test_model"}, + "params": {"timeout": 20}, "id": updated_agent["id"], "project_id": updated_agent["project_id"], "created_at": updated_agent["created_at"], @@ -277,7 +278,9 @@ def test_put_agent_update_depreciated(client): @pytest.mark.deprecated( "MindsDB models are no longer used with agents. However, Minds still uses models, so this test is kept for now" ) -def test_put_agent_update(client): +@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") +@patch("mindsdb.interfaces.agents.agents_controller.check_agent_data") +def test_put_agent_update(check_agent_data, check_agent_llm, client): create_request = { "agent": { "name": "test_put_agent_update", @@ -292,7 +295,7 @@ def test_put_agent_update(client): update_request = { "agent": { - "params": {"k1": "v1.1", "k2": None, "k3": "v3"}, + "params": {"timeout": 5}, "data": { "tables": ["example_db.customers", "example_db.orders"], "knowledge_bases": ["example_kb"], @@ -307,7 +310,7 @@ def test_put_agent_update(client): expected_agent = { "name": "test_put_agent_update", - "params": {"k1": "v1.1", "k3": "v3"}, + "params": {"timeout": 5}, "id": updated_agent["id"], "project_id": updated_agent["project_id"], "created_at": updated_agent["created_at"], @@ -356,7 +359,9 @@ def test_put_agent_no_agent(client): # assert '404' in response.status -def test_delete_agent(client): +@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") +@patch("mindsdb.interfaces.agents.agents_controller.check_agent_data") +def test_delete_agent(check_agent_data, check_agent_llm, client): create_request = { "agent": { "name": "test_delete_agent", @@ -385,13 +390,14 @@ def test_delete_agent_not_found(client): assert delete_response.status_code == HTTPStatus.NOT_FOUND -def test_agent_completions(client): +@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") +def test_agent_completions(check_agent_llm, client): create_request = { "agent": { "name": "test_agent", "model_name": "test_model", "provider": "mindsdb", - "params": {"prompt_template": "Test message!", "user_column": "content"}, + "params": {"prompt_template": "Test message!"}, } } diff --git a/tests/unit/api/http/byom_test.py b/tests/unit/api/http/byom_test.py index f8870a699d4..3259482ee1a 100644 --- a/tests/unit/api/http/byom_test.py +++ b/tests/unit/api/http/byom_test.py @@ -20,6 +20,7 @@ def predict(self, df): ) +@pytest.mark.skipif(os.environ.get("MINDSDB_COMMUNITY_HANDLERS") != "true", reason="BYOM is not enabled") def test_disabled_byom(client): """Test disabled byom""" config._config["byom"]["enabled"] = False @@ -35,6 +36,7 @@ def test_disabled_byom(client): assert response.status_code == HTTPStatus.FORBIDDEN +@pytest.mark.skipif(os.environ.get("MINDSDB_COMMUNITY_HANDLERS") != "true", reason="BYOM is not enabled") def test_path_traversal(client): """Test uploading a file""" config._config["byom"]["enabled"] = True @@ -53,6 +55,7 @@ def test_path_traversal(client): @pytest.mark.slow +@pytest.mark.skipif(os.environ.get("MINDSDB_COMMUNITY_HANDLERS") != "true", reason="BYOM is not enabled") def test_conflict(client): """Test that it is not possible to create two engins with the same name""" config._config["byom"]["enabled"] = True diff --git a/tests/unit/api/http/config_test.py b/tests/unit/api/http/config_test.py new file mode 100644 index 00000000000..672d7d31cd0 --- /dev/null +++ b/tests/unit/api/http/config_test.py @@ -0,0 +1,9 @@ +def test_get_config_returns_knowledge_bases_storage(client): + response = client.get("/api/config/") + + assert response.status_code == 200 + payload = response.get_json() + assert "knowledge_bases" in payload + assert "storage" in payload["knowledge_bases"] + assert "available_vector_engines" in payload["knowledge_bases"] + assert "pgvector_enabled" in payload["knowledge_bases"] diff --git a/tests/unit/api/http/files_test.py b/tests/unit/api/http/files_test.py index 7fd56c878ac..c2eecce5e81 100644 --- a/tests/unit/api/http/files_test.py +++ b/tests/unit/api/http/files_test.py @@ -1,5 +1,6 @@ import io import os.path +import os from http import HTTPStatus @@ -172,22 +173,27 @@ def test_archive_file_with_extension_upload(client): assert "File name cannot contain extension." in data["detail"] -def test_put_file_with_path_in_filename_multipart(client): - """Test uploading a file with path traversal in the filename via multipart form data""" - file = io.BytesIO(b"Hello, World!") +def test_zipfile_traversal(client): + """Test uploading a zip archive with path traversal filenames""" + import zipfile + import io - data = { - "file": (file, "../test.txt"), - "source_type": "file", - } + # Create a zip file in memory with a symlink + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED) as zf: + zf.writestr("../../../../etc/passwd", "malicious content") + zip_buffer.seek(0) + data = {"file": (zip_buffer, "archive.zip")} response = client.put( - "/api/files/testfile", + "/api/files/archive", data=data, content_type="multipart/form-data", follow_redirects=True, ) # Should fail due to path validation (ValueError is raised) assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR + data = response.get_json() + assert "Attempted Path Traversal in Zip File" in data["detail"] def test_put_file_with_invalid_parameters_multipart(client): diff --git a/tests/unit/api/http/handlers_test.py b/tests/unit/api/http/handlers_test.py new file mode 100644 index 00000000000..0b586bcd7e4 --- /dev/null +++ b/tests/unit/api/http/handlers_test.py @@ -0,0 +1,153 @@ +import tempfile +from http import HTTPStatus +from pathlib import Path +from unittest.mock import patch + + +def test_icon_builtin_handler(client): + """ + A built-in handler with a registered icon and a valid local path must + return the icon file (HTTP 200). + """ + with tempfile.TemporaryDirectory() as tmp: + icon_file = Path(tmp) / "icon.svg" + icon_file.write_text("") + + meta = { + "path": Path(tmp), + "icon": {"name": "icon.svg", "type": "svg", "data": ""}, + "import": {"success": True, "error_message": None}, + } + + with patch.object( + client.application.integration_controller, + "get_handlers_metadata", + return_value={"mysql": meta}, + ): + response = client.get("/api/handlers/mysql/icon", follow_redirects=True) + + status_code = response.status_code + response.close() + + assert status_code == HTTPStatus.OK + + +def test_icon_community_stub_no_path(client): + """ + An unfetched community handler stub (path=None, no 'icon' key) must + return HTTP 404 cleanly β€” no exception should propagate. + """ + meta = { + "path": None, + "import": { + "success": None, + "error_message": None, + "folder": "github_handler", + }, + "name": "github", + "support_level": "community", + } + + with patch.object( + client.application.integration_controller, + "get_handlers_metadata", + return_value={"github": meta}, + ): + response = client.get("/api/handlers/github/icon", follow_redirects=True) + + assert response.status_code == HTTPStatus.NOT_FOUND + + +def test_icon_unknown_handler(client): + """ + A request for an icon of an unknown handler must return HTTP 404. + """ + with patch.object( + client.application.integration_controller, + "get_handlers_metadata", + return_value={}, + ): + response = client.get("/api/handlers/does_not_exist/icon", follow_redirects=True) + + assert response.status_code == HTTPStatus.NOT_FOUND + + +def test_handler_info_returns_404_when_not_found(client): + """ + GET /handlers/ must return HTTP 404 when get_handler_meta() returns + None (unknown handler or failed fetch) instead of crashing with TypeError. + """ + with patch.object( + client.application.integration_controller, + "get_handler_meta", + return_value=None, + ): + response = client.get("/api/handlers/nonexistent", follow_redirects=True) + + assert response.status_code == HTTPStatus.NOT_FOUND + + +def test_handler_info_returns_200_without_icon_key(client): + """ + GET /handlers/ must not raise KeyError when the handler metadata has + no 'icon' key (community stub or handler without an icon). + """ + meta = { + "path": None, + "import": {"success": None, "error_message": None, "folder": "github_handler"}, + "name": "github", + "title": "GitHub", + "description": "GitHub handler", + "permanent": False, + "connection_args": None, + "class_type": None, + "type": "data", + "support_level": "community", + } + + with patch.object( + client.application.integration_controller, + "get_handler_meta", + return_value=meta, + ): + response = client.get("/api/handlers/github", follow_redirects=True) + + assert response.status_code == HTTPStatus.OK + body = response.get_json() + assert body["name"] == "github" + assert "path" not in body + assert "icon" not in body + + +def test_handlers_list_skips_none_meta(client): + """ + The listing endpoint must not crash when get_handlers_import_status() + returns None for a handler (e.g. an unfetched community handler that + failed to load). The None entry is silently skipped and the remaining + handlers are returned normally. + """ + mysql_meta = { + "path": None, + "import": {"success": True, "error_message": None, "folder": "mysql_handler"}, + "name": "mysql", + "type": "data", + "title": "MySQL", + "description": "MySQL handler", + "permanent": False, + "connection_args": None, + "class_type": "sql", + "support_level": "community", + "icon": None, + } + + with patch.object( + client.application.integration_controller, + "get_handlers_import_status", + return_value={"broken_community": None, "mysql": mysql_meta}, + ): + response = client.get("/api/handlers/", follow_redirects=True) + + assert response.status_code == HTTPStatus.OK + names = [h["name"] for h in response.get_json()] + assert "mysql" in names + assert "broken_community" not in names diff --git a/tests/unit/api/http/knowledge_bases_test.py b/tests/unit/api/http/knowledge_bases_test.py index b4bd4f3488d..4ccfccfe7a7 100644 --- a/tests/unit/api/http/knowledge_bases_test.py +++ b/tests/unit/api/http/knowledge_bases_test.py @@ -3,17 +3,17 @@ from unittest.mock import patch -@patch("mindsdb.integrations.handlers.chromadb_handler.chromadb_handler.ChromaDBHandler") -@patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") -def test_update_kb_embeddings(mock_embedding, chroma, client): +@patch("mindsdb.integrations.handlers.duckdb_faiss_handler.duckdb_faiss_handler.DuckDBFaissHandler") +@patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") +def test_update_kb_embeddings(mock_embedding, handler, client): # for test of embeddings - mock_embedding().data = [{"embedding": [0.1, 0.2]}] + mock_embedding().embeddings.return_value = [{"embedding": [0.1, 0.2]}] integration_data = { "database": { "name": "kb_vector_db", - "engine": "chromadb", - "parameters": {"persist_directory": "kb_vector_db"}, + "engine": "duckdb_faiss", + "parameters": {}, } } response = client.post("/api/databases", json=integration_data, follow_redirects=True) @@ -54,5 +54,5 @@ def test_update_kb_embeddings(mock_embedding, chroma, client): ) assert update_response.status_code == HTTPStatus.OK - kwargs = mock_embedding.call_args_list[0][1] + kwargs = mock_embedding.call_args_list[0][0][0] assert kwargs["api_key"] == "embed-key-2" diff --git a/tests/unit/api/http/test_integrations_passthrough.py b/tests/unit/api/http/test_integrations_passthrough.py new file mode 100644 index 00000000000..8785926fb78 --- /dev/null +++ b/tests/unit/api/http/test_integrations_passthrough.py @@ -0,0 +1,198 @@ +"""HTTP-layer tests for the /api/integrations//passthrough routes. + +Exercises the Flask blueprint in isolation: the session's integration +controller is mocked to return handlers that satisfy +PassthroughProtocol, so these tests do not touch real handlers and do +not make network calls. +""" + +from http import HTTPStatus +from unittest.mock import MagicMock, patch + +from mindsdb.integrations.libs.passthrough import PassthroughMixin +from mindsdb.integrations.libs.passthrough_types import PassthroughResponse + + +class _StubPassthroughHandler(PassthroughMixin): + """Handler double: the HTTP layer checks the PassthroughProtocol, then + calls `api_passthrough`. We bypass all mixin internals by overriding + `api_passthrough` directly so the endpoint test does not depend on + connection_data, base_url resolution, or the requests library.""" + + def __init__(self, response: PassthroughResponse): + self._response = response + self.calls: list = [] + + def api_passthrough(self, req): # type: ignore[override] + self.calls.append(req) + return self._response + + def test_passthrough(self): + return {"ok": True, "status_code": self._response.status_code} + + +def _patch_handler(handler): + """Patch FakeMysqlProxy so the endpoint resolves `name` to `handler`.""" + proxy = MagicMock() + proxy.session.integration_controller.get_data_handler.return_value = handler + return patch( + "mindsdb.api.http.namespaces.integrations.FakeMysqlProxy", + return_value=proxy, + ) + + +def test_passthrough_happy_path_returns_200_and_serialized_body(client): + handler = _StubPassthroughHandler( + PassthroughResponse( + status_code=200, + headers={"X-Safe": "1"}, + body={"hello": "world"}, + content_type="application/json", + ) + ) + + with _patch_handler(handler): + response = client.post( + "/api/integrations/any_ds/passthrough", + json={"method": "GET", "path": "/me"}, + ) + + assert response.status_code == HTTPStatus.OK + payload = response.get_json() + assert payload == { + "status_code": 200, + "headers": {"X-Safe": "1"}, + "body": {"hello": "world"}, + "content_type": "application/json", + } + # Request actually reached the mixin with the parsed PassthroughRequest. + assert len(handler.calls) == 1 + assert handler.calls[0].method == "GET" + assert handler.calls[0].path == "/me" + + +def test_passthrough_returns_501_when_handler_does_not_support_mixin(client): + # A bare object does not satisfy PassthroughProtocol, so the endpoint + # should surface passthrough_not_supported (501) instead of a 500. + with _patch_handler(object()): + response = client.post( + "/api/integrations/mysql/passthrough", + json={"method": "GET", "path": "/anything"}, + ) + + assert response.status_code == HTTPStatus.NOT_IMPLEMENTED + payload = response.get_json() + assert payload["error_code"] == "passthrough_not_supported" + assert "mysql" in payload["message"] + + +def test_passthrough_returns_400_on_invalid_method(client): + handler = _StubPassthroughHandler(PassthroughResponse(status_code=200, headers={}, body=None, content_type=None)) + + with _patch_handler(handler): + response = client.post( + "/api/integrations/any_ds/passthrough", + json={"method": "TRACE", "path": "/me"}, + ) + + assert response.status_code == HTTPStatus.BAD_REQUEST + payload = response.get_json() + assert payload["error_code"] == "invalid_request" + # The handler must not have been invoked when validation fails up front. + assert handler.calls == [] + + +def _patch_handler_modules(modules: dict): + return patch( + "mindsdb.api.http.namespaces.integrations.integration_controller.handler_modules", + modules, + create=True, + ) + + +def test_capabilities_returns_handlers_dict_and_legacy_list(client): + # Two opted-in handlers covering both auth modes, one non-opt-in, and + # one broken module that lacks a Handler attribute. auth_modes is + # surfaced from the handler's declarative `_auth_mode` class attr β€” + # not inferred from header format. + class _BearerHandler(PassthroughMixin): + pass # inherits _auth_mode = "bearer" + + class _CustomHeaderHandler(PassthroughMixin): + _auth_header_name = "X-Shopify-Access-Token" + _auth_header_format = "{token}" + _auth_mode = "custom" + + class _NotOptedIn: + pass + + bearer_mod = MagicMock() + bearer_mod.Handler = _BearerHandler + custom_mod = MagicMock() + custom_mod.Handler = _CustomHeaderHandler + plain_mod = MagicMock() + plain_mod.Handler = _NotOptedIn + no_handler_mod = MagicMock(spec=[]) + + fake_modules = { + "hubspot": bearer_mod, + "shopify": custom_mod, + "mysql": plain_mod, + "broken": no_handler_mod, + } + + with _patch_handler_modules(fake_modules): + response = client.get("/api/integrations/capabilities") + + assert response.status_code == HTTPStatus.OK + payload = response.get_json() + + # New structured shape: every opted-in handler appears with auth_modes + # and operations metadata. + assert payload["handlers"] == { + "hubspot": {"auth_modes": ["bearer"], "operations": ["passthrough"]}, + "shopify": {"auth_modes": ["custom"], "operations": ["passthrough"]}, + } + + # Legacy flat list: only bearer-auth handlers (Minds migration compat). + assert payload["bearer_passthrough"] == ["hubspot"] + + +def test_capabilities_auth_mode_is_declarative_not_format_derived(client): + # Handler keeps the default "Bearer {token}" header format but flags + # itself as oauth_refresh. The old format-matching heuristic would + # have bucketed this as "bearer"; the new declarative path returns + # the explicit mode and correctly omits it from the legacy list. + class _OAuthRefreshHandler(PassthroughMixin): + _auth_mode = "oauth_refresh" + # _auth_header_format intentionally left as the default. + + oauth_mod = MagicMock() + oauth_mod.Handler = _OAuthRefreshHandler + + with _patch_handler_modules({"hubspot_oauth": oauth_mod}): + response = client.get("/api/integrations/capabilities") + + assert response.status_code == HTTPStatus.OK + payload = response.get_json() + assert payload["handlers"] == { + "hubspot_oauth": {"auth_modes": ["oauth_refresh"], "operations": ["passthrough"]}, + } + # oauth_refresh is NOT surfaced in the legacy bearer-only list even + # though the underlying header format is still "Bearer {token}". + assert payload["bearer_passthrough"] == [] + + +def test_capabilities_empty_when_no_handlers_opted_in(client): + class _NotOptedIn: + pass + + plain_mod = MagicMock() + plain_mod.Handler = _NotOptedIn + + with _patch_handler_modules({"mysql": plain_mod}): + response = client.get("/api/integrations/capabilities") + + assert response.status_code == HTTPStatus.OK + payload = response.get_json() + assert payload == {"handlers": {}, "bearer_passthrough": []} diff --git a/tests/unit/api/http/test_sql_query.py b/tests/unit/api/http/test_sql_query.py new file mode 100644 index 00000000000..b40096ecdcc --- /dev/null +++ b/tests/unit/api/http/test_sql_query.py @@ -0,0 +1,145 @@ +""" +Tests for POST /sql/query endpoint with different response_format values: +1. DEFAULT (None) - returns JSON response +2. SSE ("sse") - returns Server-Sent Events stream +3. JSONLINES ("jsonlines") - returns JSON Lines stream +""" + +import json +from http import HTTPStatus +from unittest.mock import patch, MagicMock + +import pandas as pd + +from mindsdb.api.executor.data_types.sql_answer import SQLAnswer +from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE +from mindsdb.api.executor.sql_query.result_set import ResultSet +from mindsdb.utilities.types.column import Column + + +def create_mock_sql_answer(): + """Create a mock SQLAnswer with table data for testing.""" + columns = [ + Column(name="id", alias="id"), + Column(name="name", alias="name"), + Column(name="value", alias="value"), + ] + + df = pd.DataFrame( + [ + [1, "test1", 100], + [2, "test2", 200], + [3, "test3", 300], + ] + ) + + result_set = ResultSet(columns=columns, df=df) + + return SQLAnswer( + resp_type=RESPONSE_TYPE.TABLE, + result_set=result_set, + ) + + +def check_response(response_data: dict): + # Check response structure for default format + assert response_data["type"] == "table" + assert "data" in response_data + assert "column_names" in response_data + assert "context" in response_data + + # Check data content + assert response_data["column_names"] == ["id", "name", "value"] + assert len(response_data["data"]) == 3 + assert response_data["data"][0] == [1, "test1", 100] + assert response_data["data"][1] == [2, "test2", 200] + assert response_data["data"][2] == [3, "test3", 300] + + +def setup_mock_proxy(mock_proxy_class): + """Configure mock proxy with default behavior.""" + mock_proxy = MagicMock() + mock_proxy_class.return_value = mock_proxy + mock_proxy.process_query.return_value = create_mock_sql_answer() + mock_proxy.get_context.return_value = {} + return mock_proxy + + +class TestSQLQueryResponseFormat: + @patch("mindsdb.api.http.namespaces.sql.FakeMysqlProxy") + def test_query_default_format(self, mock_proxy_class, client): + """Test POST /sql/query with default response format (no response_format parameter).""" + setup_mock_proxy(mock_proxy_class) + + response = client.post( + "/api/sql/query", + json={"query": "SELECT * FROM table"}, + ) + + assert response.status_code == HTTPStatus.OK + response_data = response.json + check_response(response_data) + + @patch("mindsdb.api.http.namespaces.sql.FakeMysqlProxy") + def test_query_sse_format(self, mock_proxy_class, client): + """Test POST /sql/query with SSE response format (response_format="sse").""" + setup_mock_proxy(mock_proxy_class) + + response = client.post( + "/api/sql/query", + json={ + "query": "SELECT * FROM table", + "response_format": "sse", + }, + ) + + assert response.status_code == HTTPStatus.OK + assert "text/event-stream" in response.content_type + + # Parse SSE response and build unified response dict + response_text = response.get_data(as_text=True) + lines = [line.replace("data: ", "") for line in response_text.split("\n") if line.startswith("data: ")] + + assert len(lines) > 1 + header = json.loads(lines[0]) + data_rows = json.loads(lines[1]) + + response_data = { + "type": header["type"], + "column_names": header["column_names"], + "data": data_rows, + "context": {}, + } + check_response(response_data) + + @patch("mindsdb.api.http.namespaces.sql.FakeMysqlProxy") + def test_query_jsonlines_format(self, mock_proxy_class, client): + """Test POST /sql/query with JSONLINES response format (response_format="jsonlines").""" + setup_mock_proxy(mock_proxy_class) + + response = client.post( + "/api/sql/query", + json={ + "query": "SELECT * FROM table", + "response_format": "jsonlines", + }, + ) + + assert response.status_code == HTTPStatus.OK + assert response.content_type == "application/jsonlines" + + # Parse JSONLINES response and build unified response dict + response_text = response.get_data(as_text=True) + lines = [line for line in response_text.split("\n") if line.strip()] + + assert len(lines) > 1 + header = json.loads(lines[0]) + data_rows = json.loads(lines[1]) + + response_data = { + "type": header["type"], + "column_names": header["column_names"], + "data": data_rows, + "context": {}, + } + check_response(response_data) diff --git a/mindsdb/integrations/handlers/airtable_handler/tests/__init__.py b/tests/unit/api/mcp/__init__.py similarity index 100% rename from mindsdb/integrations/handlers/airtable_handler/tests/__init__.py rename to tests/unit/api/mcp/__init__.py diff --git a/tests/unit/api/mcp/test_completions.py b/tests/unit/api/mcp/test_completions.py new file mode 100644 index 00000000000..ab03ecff049 --- /dev/null +++ b/tests/unit/api/mcp/test_completions.py @@ -0,0 +1,135 @@ +""" +Unit tests for the MCP completion handler (mindsdb/api/mcp/completions.py). +""" + +import asyncio +from unittest.mock import MagicMock, patch + +from mcp.types import PromptReference, ResourceTemplateReference +from mcp.shared.memory import create_connected_server_and_client_session + +from mindsdb.api.mcp.mcp_instance import mcp + +# --------------------------------------------------------------------------- +# Patch targets +# --------------------------------------------------------------------------- + +_PATCH_GET_DB_NAMES = "mindsdb.api.mcp.completions._get_database_names" +_PATCH_CTX = "mindsdb.api.mcp.completions.ctx" +_PATCH_SESSION = "mindsdb.api.mcp.completions.SessionController" + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _run(coro): + return asyncio.run(coro) + + +def _complete(ref, argument: dict, context_arguments: dict | None = None) -> list[str]: + """Run a completion request and return the list of completion values.""" + + async def _inner(): + async with create_connected_server_and_client_session(mcp) as client: + result = await client.complete( + ref=ref, + argument=argument, + context_arguments=context_arguments, + ) + return result.completion.values + + return _run(_inner()) + + +_PROMPT_REF = PromptReference(type="ref/prompt", name="sample_table") +_RESOURCE_REF = ResourceTemplateReference( + type="ref/resource", + uri="schema://databases/{database_name}/tables", +) + + +def _make_table_mock(name: str) -> MagicMock: + t = MagicMock() + t.TABLE_NAME = name + return t + + +class TestDatabaseNameCompletion: + def test_returns_matching_databases(self): + with patch(_PATCH_GET_DB_NAMES, return_value=["pg_prod", "pg_staging", "mysql_db"]): + values = _complete(_PROMPT_REF, {"name": "database_name", "value": "pg"}) + + assert values == ["pg_prod", "pg_staging"] + + def test_prefix_filters_case_sensitively(self): + with patch(_PATCH_GET_DB_NAMES, return_value=["Postgres", "postgres"]): + values = _complete(_PROMPT_REF, {"name": "database_name", "value": "post"}) + + assert values == ["postgres"] + + def test_empty_prefix_returns_all_databases(self): + db_names = ["pg", "mysql", "mongo"] + with patch(_PATCH_GET_DB_NAMES, return_value=db_names): + values = _complete(_PROMPT_REF, {"name": "database_name", "value": ""}) + + assert values == db_names + + def test_no_match_returns_empty_list(self): + with patch(_PATCH_GET_DB_NAMES, return_value=["pg", "mysql"]): + values = _complete(_PROMPT_REF, {"name": "database_name", "value": "oracle"}) + + assert values == [] + + +class TestTableNameCompletion: + def test_returns_matching_tables(self): + with patch(_PATCH_SESSION) as SC: + SC.return_value.datahub.get.return_value.get_tables.return_value = [ + _make_table_mock("orders"), + _make_table_mock("order_items"), + _make_table_mock("users"), + ] + + # match 2/3 + values = _complete( + _RESOURCE_REF, + {"name": "table_name", "value": "ord"}, + context_arguments={"database_name": "pg"}, + ) + + SC.return_value.datahub.get.assert_called_with("pg") + assert values == ["orders", "order_items"] + + # match all + values = _complete( + _RESOURCE_REF, + {"name": "table_name", "value": ""}, + context_arguments={"database_name": "pg"}, + ) + + assert values == ["orders", "order_items", "users"] + + # match 0 + values = _complete( + _RESOURCE_REF, + {"name": "table_name", "value": "qwerty"}, + context_arguments={"database_name": "pg"}, + ) + + assert values == [] + + def test_missing_database_name_context_returns_empty(self): + """When database_name is not in context_arguments, return empty.""" + with patch(_PATCH_SESSION): + values = _complete( + _RESOURCE_REF, + {"name": "table_name", "value": "ord"}, + context_arguments=None, + ) + + assert values == [] + + def test_unknown_argument_name_returns_empty(self): + values = _complete(_PROMPT_REF, {"name": "unknown_param", "value": "foo"}) + assert values == [] diff --git a/tests/unit/api/mcp/test_prompts.py b/tests/unit/api/mcp/test_prompts.py new file mode 100644 index 00000000000..2e7ea7b5d60 --- /dev/null +++ b/tests/unit/api/mcp/test_prompts.py @@ -0,0 +1,45 @@ +""" +Unit tests for MCP prompts (mindsdb/api/mcp/prompts/*). + +mcp.get_prompt() is async; tests run it with asyncio.run(). +""" + +import json +import asyncio + +from mindsdb.api.mcp.mcp_instance import mcp + + +def _run(coro): + return asyncio.run(coro) + + +def _get_sample_table_prompt(database_name: str, table_name: str): + """Call sample_table prompt and return the GetPromptResult.""" + return _run(mcp.get_prompt("sample_table", {"database_name": database_name, "table_name": table_name})) + + +def _get_first_message_text(prompt: object) -> str: + """Return the text content of the first message.""" + raw = prompt.messages[0].content.text + # FastMCP serialises the TextContent to JSON inside the PromptMessage + return json.loads(raw)["text"] + + +class TestPrompt: + def test_sample_table_exists(self): + # sample_table exists and has description + prompts = _run(mcp.list_prompts()) + prompt = next(p for p in prompts if p.name == "sample_table") + assert prompt.description # non-empty + + def test_sample_table_content(self): + # test content of the prompt + result = _get_sample_table_prompt("MyDB", "mytable") + assert len(result.messages) == 1 + assert result.messages[0].role == "user" + assert result.messages[0].content.type == "text" + + text = _get_first_message_text(result) + assert "`MyDB`.`mytable`" in text + assert "limit 5" in text.lower() diff --git a/tests/unit/api/mcp/test_query_tool.py b/tests/unit/api/mcp/test_query_tool.py new file mode 100644 index 00000000000..bd4d0bcd430 --- /dev/null +++ b/tests/unit/api/mcp/test_query_tool.py @@ -0,0 +1,129 @@ +""" +Unit tests for the MCP tools (mindsdb/api/mcp/tools/*). +""" + +import asyncio +import json +from unittest.mock import patch + + +_PATCH_PROXY = "mindsdb.api.mcp.tools.query.FakeMysqlProxy" + + +def _run(coro): + """Run an async coroutine synchronously.""" + return asyncio.run(coro) + + +def _call_tool(sql: str, context=None): + """Call the MCP query tool synchronously and return parsed JSON.""" + args = {"query": sql} + if context is not None: + args["context"] = context + + from mindsdb.api.mcp.mcp_instance import mcp + + content, _ = _run(mcp.call_tool("query", args)) + return json.loads(content[0].text) + + +def _make_proxy_ok(mock_proxy_cls, affected_rows=0): + """Configure mock proxy to return an OK response.""" + mock_proxy_cls.return_value.process_query.return_value.dump_http_response.return_value = { + "type": "ok", + "affected_rows": affected_rows, + } + return mock_proxy_cls.return_value + + +def _make_proxy_table(mock_proxy_cls, column_names, data): + """Configure mock proxy to return a table response.""" + mock_proxy_cls.return_value.process_query.return_value.dump_http_response.return_value = { + "type": "table", + "column_names": column_names, + "data": data, + } + return mock_proxy_cls.return_value + + +def _make_proxy_error(mock_proxy_cls, error_message, error_code=0): + """Configure mock proxy to return an error response.""" + mock_proxy_cls.return_value.process_query.return_value.dump_http_response.return_value = { + "type": "error", + "error_code": error_code, + "error_message": error_message, + } + return mock_proxy_cls.return_value + + +class TestResponseTypes: + def test_select_returns_table_type(self): + expected_data = [[1, "alice"], [2, "bob"]] + columns_list = ["id", "name"] + with patch(_PATCH_PROXY) as MockProxy: + _make_proxy_table(MockProxy, columns_list, expected_data) + result = _call_tool("SELECT * FROM mydb.users") + + assert result["type"] == "table" + assert result["column_names"] == columns_list + assert result["data"] == expected_data + + def test_select_empty_result(self): + columns_list = ["id", "name"] + with patch(_PATCH_PROXY) as MockProxy: + _make_proxy_table(MockProxy, columns_list, []) + result = _call_tool("SELECT * FROM mydb.users WHERE 1=0") + + assert result["type"] == "table" + assert result["column_names"] == columns_list + assert result["data"] == [] + + def test_insert_returns_ok_type(self): + with patch(_PATCH_PROXY) as MockProxy: + _make_proxy_ok(MockProxy, affected_rows=1) + result = _call_tool("INSERT INTO mydb.t (id) VALUES (1)") + + assert result["type"] == "ok" + assert result["affected_rows"] == 1 + + def test_proxy_error_response_returns_error_type(self): + error_message = "Table 'x' doesn't exist" + with patch(_PATCH_PROXY) as MockProxy: + _make_proxy_error(MockProxy, error_message, error_code=123) + result = _call_tool("SELECT * FROM mydb.x") + + assert result["type"] == "error" + assert result["error_message"] == error_message + assert result["error_code"] == 123 + + def test_exception_in_process_query_returns_error_type(self): + error_message = "connection refused" + with patch(_PATCH_PROXY) as MockProxy: + MockProxy.return_value.process_query.side_effect = Exception(error_message) + result = _call_tool("SELECT 1") + + assert result["type"] == "error" + assert result["error_message"] == error_message + + +class TestContextParameter: + def test_context_is_passed_to_set_context(self): + with patch(_PATCH_PROXY) as MockProxy: + proxy = _make_proxy_ok(MockProxy) + _call_tool("SELECT 1", context={"db": "my_postgres"}) + + proxy.set_context.assert_called_once_with({"db": "my_postgres"}) + + def test_omitted_context_defaults_to_empty_dict(self): + with patch(_PATCH_PROXY) as MockProxy: + proxy = _make_proxy_ok(MockProxy) + _call_tool("SELECT 1") # no context argument + + proxy.set_context.assert_called_once_with({}) + + def test_explicit_none_context_defaults_to_empty_dict(self): + with patch(_PATCH_PROXY) as MockProxy: + proxy = _make_proxy_ok(MockProxy) + _call_tool("SELECT 1", context=None) + + proxy.set_context.assert_called_once_with({}) diff --git a/tests/unit/api/mcp/test_resources.py b/tests/unit/api/mcp/test_resources.py new file mode 100644 index 00000000000..6bac3891875 --- /dev/null +++ b/tests/unit/api/mcp/test_resources.py @@ -0,0 +1,177 @@ +""" +Unit tests for MCP resources (mindsdb/api/mcp/resources/*) +""" + +import asyncio +import json +from unittest.mock import MagicMock, patch + +import pandas as pd + +from mindsdb.integrations.libs.response import TableResponse as HandlerTableResponse +from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE + + +_PATCH_SESSION = "mindsdb.api.mcp.resources.schema.SessionController" +_PATCH_TABLE_RESPONSE = "mindsdb.api.mcp.resources.schema.TableResponse" +_PATCH_RESPONSE_TYPE = "mindsdb.api.mcp.resources.schema.RESPONSE_TYPE" + + +def _run(coro): + return asyncio.run(coro) + + +def _read(uri: str) -> list: + """Read a resource and return parsed JSON payload.""" + from mindsdb.api.mcp.mcp_instance import mcp + + contents = list(_run(mcp.read_resource(uri))) + return json.loads(contents[0].content) + + +def _make_table_mock(name: str, table_type: str = "BASE TABLE", schema: str = "public") -> MagicMock: + t = MagicMock() + t.TABLE_NAME = name + t.TABLE_TYPE = table_type + t.TABLE_SCHEMA = schema + return t + + +def _make_columns_table_response(rows: list[dict]) -> MagicMock: + """Build a mock HandlerTableResponse with COLUMNS_TABLE type.""" + tr = MagicMock(spec=HandlerTableResponse) + tr.type = RESPONSE_TYPE.COLUMNS_TABLE + tr.fetchall.return_value = pd.DataFrame(rows) + return tr + + +def _make_kb(name, project, metadata_cols=None, content_cols=None, id_col="id"): + return { + "name": name, + "project": project, + "metadata_columns": metadata_cols or [], + "content_columns": content_cols or ["body"], + "id_column": id_col, + } + + +class TestListDatabases: + def test_returns_only_data_type_databases(self): + from mindsdb.api.mcp.mcp_instance import mcp + + with patch(_PATCH_SESSION) as SC: + SC.return_value.database_controller.get_list.return_value = [ + {"name": "pg_prod", "type": "data"}, + {"name": "mindsdb", "type": "project"}, + {"name": "mysql_db", "type": "data"}, + ] + + result = list(_run(mcp.read_resource("schema://databases"))) + + assert len(result) == 1 + assert json.loads(result[0].content) == ["pg_prod", "mysql_db"] + assert result[0].mime_type == "application/json" + + def test_filters_out_all_non_data_types(self): + with patch(_PATCH_SESSION) as SC: + SC.return_value.database_controller.get_list.return_value = [ + {"name": "mindsdb", "type": "project"}, + {"name": "files", "type": "files"}, + ] + result = _read("schema://databases") + + assert result == [] + + +class TestDbTables: + def test_returns_table_names(self): + with patch(_PATCH_SESSION) as SC: + SC.return_value.datahub.get.return_value.get_tables.return_value = [ + _make_table_mock("orders"), + _make_table_mock("users"), + ] + result = _read("schema://databases/mydb/tables") + + SC.return_value.datahub.get.assert_called_once_with("mydb") + + names = [t["TABLE_NAME"] for t in result] + assert names == ["orders", "users"] + assert set(result[0].keys()) == {"TABLE_NAME", "TABLE_TYPE", "TABLE_SCHEMA"} + + def test_returns_table_type_and_schema(self): + with patch(_PATCH_SESSION) as SC: + SC.return_value.datahub.get.return_value.get_tables.return_value = [ + _make_table_mock("orders", table_type="VIEW", schema="myschema"), + ] + result = _read("schema://databases/mydb/tables") + + assert result[0]["TABLE_TYPE"] == "VIEW" + assert result[0]["TABLE_SCHEMA"] == "myschema" + + def test_empty_database_returns_empty_list(self): + with patch(_PATCH_SESSION) as SC: + SC.return_value.datahub.get.return_value.get_tables.return_value = [] + result = _read("schema://databases/emptydb/tables") + + assert result == [] + + +class TestDbTableColumns: + def test_returns_column_names_and_types(self): + rows = [ + {"COLUMN_NAME": "id", "MYSQL_DATA_TYPE": "int"}, + {"COLUMN_NAME": "email", "MYSQL_DATA_TYPE": "varchar(255)"}, + ] + with ( + patch(_PATCH_SESSION) as SC, + patch(_PATCH_TABLE_RESPONSE, HandlerTableResponse), + patch(_PATCH_RESPONSE_TYPE, RESPONSE_TYPE), + ): + SC.return_value.integration_controller.get_data_handler.return_value.get_columns.return_value = ( + _make_columns_table_response(rows) + ) + + result = _read("schema://databases/mydb/tables/orders/columns") + SC.return_value.integration_controller.get_data_handler.assert_called_once_with("mydb") + SC.return_value.integration_controller.get_data_handler.return_value.get_columns.assert_called_once_with( + "orders" + ) + + assert result[0] == {"COLUMN_NAME": "id", "MYSQL_DATA_TYPE": "int"} + assert result[1] == {"COLUMN_NAME": "email", "MYSQL_DATA_TYPE": "varchar(255)"} + + +class TestListKnowledgeBases: + def test_returns_knowledge_bases_from_all_projects(self): + with patch(_PATCH_SESSION) as SC: + SC.return_value.datahub.get_projects_names.return_value = ["mindsdb", "my_project"] + SC.return_value.kb_controller.list.side_effect = [ + [_make_kb("kb1", "mindsdb")], + [_make_kb("kb2", "my_project")], + ] + result = _read("schema://knowledge_bases") + + assert len(result) == 2 + assert result[0]["name"] == "kb1" + assert result[1]["name"] == "kb2" + + def test_returns_correct_kb_fields(self): + kb = _make_kb( + "docs_kb", + "mindsdb", + metadata_cols=["source", "date"], + content_cols=["body"], + id_col="doc_id", + ) + with patch(_PATCH_SESSION) as SC: + SC.return_value.datahub.get_projects_names.return_value = ["mindsdb"] + SC.return_value.kb_controller.list.return_value = [kb] + result = _read("schema://knowledge_bases") + + assert result[0] == { + "name": "docs_kb", + "project": "mindsdb", + "metadata_columns": ["source", "date"], + "content_columns": ["body"], + "id_column": "doc_id", + } diff --git a/tests/unit/executor/test_agent.py b/tests/unit/executor/test_agent.py index 88306a748f9..a41d36f0a6c 100644 --- a/tests/unit/executor/test_agent.py +++ b/tests/unit/executor/test_agent.py @@ -1,15 +1,15 @@ -import time import os import json from unittest.mock import patch, AsyncMock +from sqlalchemy.orm.attributes import flag_modified import pandas as pd import pytest import sys from openai.types.chat import ChatCompletion from tests.unit.executor_test_base import BaseExecutorDummyML -from tests.unit.executor.test_knowledge_base import set_litellm_embedding +from tests.unit.executor.test_knowledge_base import set_embedding def action_response(type="final_query", sql="", text=""): @@ -18,16 +18,19 @@ def action_response(type="final_query", sql="", text=""): return json.dumps({"sql_query": sql, "type": type, "text": text, "short_description": "a tool"}) -def set_openai_completion(mock_openai, llm_response): +def set_openai_completion(mock_openai, llm_response, add_planning=True): if isinstance(llm_response, str): llm_responses = [ action_response(sql=f"select '{llm_response}' as answer"), ] + elif not isinstance(llm_response, list): + llm_responses = [llm_response] else: llm_responses = llm_response - # always add plan response - llm_responses.insert(0, '{"plan":"my plan is ...", "estimated_steps":3}') + if add_planning: + # add plan response + llm_responses.insert(0, '{"plan":"my plan is ...", "estimated_steps":3}') mock_openai.agent_calls = [] calls = [] @@ -104,84 +107,10 @@ def setup_method(self): config["knowledge_bases"]["disable_autobatch"] = True - @pytest.mark.slow - def unused_test_mindsdb_provider(self): - # pydantic agent doesn't support using mindsdb model - from mindsdb.api.executor.exceptions import ExecutorException - - agent_response = "how can I help you" - # model - self.run_sql( - f""" - CREATE model base_model - PREDICT output - using - column='question', - output='{agent_response}', - engine='dummy_ml', - join_learn_process=true - """ - ) - - self.run_sql("CREATE ML_ENGINE langchain FROM langchain") - - agent_params = """ - USING - provider='mindsdb', - model = "base_model", -- < - prompt_template="Answer the user input in a helpful way" - """ - self.run_sql(f""" - CREATE AGENT my_agent {agent_params} - """) - with pytest.raises(ExecutorException): - self.run_sql(f""" - CREATE AGENT my_agent {agent_params} - """) - self.run_sql(f""" - CREATE AGENT IF NOT EXISTS my_agent {agent_params} - """) - - ret = self.run_sql("select * from my_agent where question = 'hi'") - - assert agent_response in ret.answer[0] - - @pytest.mark.skipif( - sys.platform in ["darwin", "win32"], reason="Mocking doesn't work on Windows or macOS for some reason" - ) - @patch("openai.OpenAI") - def unused_test_openai_provider_with_model(self, mock_openai): - # pydantic agent doesn't support using mindsdb model - - agent_response = "how can I assist you today?" - set_openai_completion(mock_openai, agent_response) - - self.run_sql("CREATE ML_ENGINE langchain FROM langchain") - - self.run_sql(""" - CREATE MODEL lang_model - PREDICT answer USING - engine = "langchain", - model = "gpt-3.5-turbo", - openai_api_key='--', - prompt_template="Answer the user input in a helpful way"; - """) - - time.sleep(5) - - self.run_sql(""" - CREATE AGENT my_agent - USING - model='lang_model' - """) - ret = self.run_sql("select * from my_agent where question = 'hi'") - - assert agent_response in ret.answer[0] - @patch("pydantic_ai.providers.openai.AsyncOpenAI") def test_openai_provider(self, mock_openai): - agent_response = "how can I assist you today?" - set_openai_completion(mock_openai, agent_response) + # test response + set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) self.run_sql(""" CREATE AGENT my_agent @@ -193,6 +122,10 @@ def test_openai_provider(self, mock_openai): }, prompt_template="Answer the user input in a helpful way" """) + + agent_response = "how can I assist you today?" + set_openai_completion(mock_openai, agent_response) + ret = self.run_sql("select * from my_agent where question = 'hi'") # check model params @@ -252,10 +185,8 @@ def config_get_side_effect(key, default=None): mock_config_get.side_effect = config_get_side_effect - agent_response = "how can I assist you today?" - set_openai_completion(mock_openai, agent_response) - # Create an agent with only provider specified - should use default LLM params + set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) self.run_sql(""" CREATE AGENT default_params_agent USING @@ -266,6 +197,8 @@ def config_get_side_effect(key, default=None): }, prompt_template="Answer the user input in a helpful way" """) + agent_response = "how can I assist you today?" + set_openai_completion(mock_openai, agent_response) # Check that the agent was created with the default parameters agent_info = self.run_sql("SELECT * FROM information_schema.agents WHERE name = 'default_params_agent'") @@ -273,7 +206,7 @@ def config_get_side_effect(key, default=None): # Verify the agent has the user-specified parameters but not default parameters agent_params = json.loads(agent_info["PARAMS"].iloc[0]) assert agent_params.get("prompt_template") == "Answer the user input in a helpful way" - assert agent_params["model"]["model_name"] == "gpt-3" + assert "gpt-3" in agent_info["MODEL"][0] # Default parameters should NOT be stored in the database # They will be applied at runtime via get_agent_llm_params @@ -291,19 +224,18 @@ def config_get_side_effect(key, default=None): # --- Test that agent creation works with minimal syntax using default_llm config --- - mock_openai.reset_mock() - agent_response = "how can I assist you today?" - set_openai_completion(mock_openai, agent_response) - # Create an agent with minimal syntax - should use all default LLM params + set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) self.run_sql(""" CREATE AGENT minimal_syntax_agent USING - data = { - "tables": ['test.table1', 'test.table2'] - } + data = { } """) + mock_openai.reset_mock() + agent_response = "how can I assist you today?" + set_openai_completion(mock_openai, agent_response) + ret = self.run_sql("select * from minimal_syntax_agent where question = 'hi'") assert agent_response in ret.answer[0] @@ -314,18 +246,21 @@ def config_get_side_effect(key, default=None): @pytest.mark.skipif(sys.platform == "darwin", reason="Fails on macOS") @patch("pydantic_ai.providers.openai.AsyncOpenAI") def test_agent_stream(self, mock_openai): - agent_response = "how can I assist you today?" - set_openai_completion(mock_openai, agent_response) - + set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) self.run_sql(""" CREATE AGENT my_agent USING - provider='openai', - model = "gpt-3.5-turbo", - openai_api_key='--', + model={ + "model_name": "gpt-3.5-turbo", + "provider": "openai", + "api_key": "--" + }, prompt_template="Answer the user input in a helpful way" """) + agent_response = "how can I assist you today?" + set_openai_completion(mock_openai, agent_response) + agents_controller = self.command_executor.session.agents_controller agent = agents_controller.get_agent("my_agent") @@ -340,11 +275,7 @@ def test_agent_stream(self, mock_openai): def _create_kb_storage(self, kb_name): self.run_sql(f""" create database db_{kb_name} - with - engine='chromadb', - PARAMETERS = {{ - 'persist_directory': '{kb_name}' - }} + with engine='duckdb_faiss' """) return f"db_{kb_name}.default_collection" @@ -355,10 +286,10 @@ def _drop_kb_storage(self, vector_table_name): self.run_sql(f"drop database {db_name}") - @patch("litellm.embedding") + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") @patch("pydantic_ai.providers.openai.AsyncOpenAI") - def test_agent_retrieval(self, mock_openai, mock_litellm_embedding): - set_litellm_embedding(mock_litellm_embedding) + def test_agent_retrieval(self, mock_openai, mock_embedding): + set_embedding(mock_embedding) vector_table_name = self._create_kb_storage("kb_review") self.run_sql(f""" @@ -374,16 +305,18 @@ def test_agent_retrieval(self, mock_openai, mock_litellm_embedding): os.environ["OPENAI_API_KEY"] = "--" + set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) self.run_sql(""" create agent retrieve_agent using - model='gpt-3.5-turbo', - provider='openai', + model={ + "model_name": "gpt-3.5-turbo", + "provider": "openai" + }, prompt_template='Answer the user input in a helpful way using tools', data = { "knowledge_bases": ["kb_review"] - }, - mode='retrieval' + } """) agent_response = "the answer is yes" @@ -417,10 +350,12 @@ def test_agent_retrieval(self, mock_openai, mock_litellm_embedding): self._drop_kb_storage(vector_table_name) # should not be possible to drop demo agent - def test_drop_demo_agent(self): + @patch("pydantic_ai.providers.openai.AsyncOpenAI") + def test_drop_demo_agent(self, mock_openai): """should not be possible to drop demo agent""" from mindsdb.api.executor.exceptions import ExecutorException + set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) self.run_sql(""" CREATE AGENT my_demo_agent USING @@ -429,37 +364,49 @@ def test_drop_demo_agent(self): 'model_name': "gpt-3.5-turbo", 'api_key': '-key-' }, - prompt_template="--", - is_demo=true; + prompt_template="--" """) + + # mark as demo in db + agent = self.db.Agents.query.filter_by(name="my_demo_agent").first() + agent.params["is_demo"] = True + flag_modified(agent, "params") + self.db.session.commit() with pytest.raises(ExecutorException): - self.run_sql("drop agent my_agent") + self.run_sql("drop agent my_demo_agent") @patch("pydantic_ai.providers.openai.AsyncOpenAI") def test_agent_default_prompt_template(self, mock_openai): """Test that agents work correctly with default prompt templates in different modes""" - agent_response = "default prompt template response" - set_openai_completion(mock_openai, agent_response) # Test non-retrieval mode with no prompt_template (should use default) + set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) self.run_sql(""" CREATE AGENT default_prompt_agent USING - provider='openai', - model = "gpt-3.5-turbo", - openai_api_key='--' + model={ + "model_name": "gpt-3.5-turbo", + "provider": "openai", + "api_key": "--" + } """) + + agent_response = "default prompt template response" + set_openai_completion(mock_openai, agent_response) + ret = self.run_sql("select * from default_prompt_agent where question = 'test question'") assert agent_response in ret.answer[0] # Test retrieval mode with no prompt_template (should use default retrieval template) + set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) self.run_sql(""" CREATE AGENT default_retrieval_agent USING - provider='openai', - model = "gpt-3.5-turbo", - openai_api_key='--', - mode='retrieval' + model={ + "model_name": "gpt-3.5-turbo", + "provider": "openai", + "api_key": "--" + } """) mock_openai.reset_mock() @@ -468,9 +415,9 @@ def test_agent_default_prompt_template(self, mock_openai): assert agent_response in ret.answer[0] @patch("pydantic_ai.providers.openai.AsyncOpenAI") - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_agent_permissions(self, mock_litellm_embedding, mock_openai): - set_litellm_embedding(mock_litellm_embedding) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_agent_permissions(self, mock_embedding, mock_openai): + set_embedding(mock_embedding) vector_table_name = self._create_kb_storage("kb_show") @@ -495,11 +442,14 @@ def test_agent_permissions(self, mock_litellm_embedding, mock_openai): select id, planet_name content from files.show1 """) + set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) self.run_sql(""" CREATE AGENT my_agent USING - model = "gpt-3.5-turbo", - openai_api_key='--', + model={ + "model_name": "gpt-3.5-turbo", + "api_key": '--' + }, data = { "knowledge_bases": ["kb_show*"], "tables": ["files.show*"] @@ -585,9 +535,9 @@ def test_agent_permissions(self, mock_litellm_embedding, mock_openai): self._drop_kb_storage(vector_table_name) @patch("pydantic_ai.providers.openai.AsyncOpenAI") - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_agent_new_syntax(self, mock_litellm_embedding, mock_openai): - set_litellm_embedding(mock_litellm_embedding) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_agent_new_syntax(self, mock_embedding, mock_openai): + set_embedding(mock_embedding) vector_table_name = self._create_kb_storage("kb") df = get_dataset_planets() # create 2 files and KBs @@ -605,6 +555,7 @@ def test_agent_new_syntax(self, mock_litellm_embedding, mock_openai): select id, planet_name content from files.file{i} where id != 1000 """) + set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) self.run_sql(""" CREATE AGENT my_agent USING @@ -657,6 +608,7 @@ def test_agent_new_syntax(self, mock_litellm_embedding, mock_openai): assert "important user instruction β„–42" in mock_openai.agent_calls[0] # --- ALTER AGENT --- + set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) self.run_sql(""" ALTER AGENT my_agent USING @@ -713,9 +665,9 @@ def test_agent_new_syntax(self, mock_litellm_embedding, mock_openai): self._drop_kb_storage(vector_table_name) @patch("pydantic_ai.providers.openai.AsyncOpenAI") - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_agent_accept_wrong_quoting(self, mock_litellm_embedding, mock_openai): - set_litellm_embedding(mock_litellm_embedding) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_agent_accept_wrong_quoting(self, mock_embedding, mock_openai): + set_embedding(mock_embedding) vector_table_name = self._create_kb_storage("kb1") self.run_sql(f""" create knowledge base kb1 @@ -727,11 +679,14 @@ def test_agent_accept_wrong_quoting(self, mock_litellm_embedding, mock_openai): self.save_file("file1", df) + set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) self.run_sql(""" CREATE AGENT my_agent USING - model = "gpt-3.5-turbo", - openai_api_key='--', + model={ + "model_name": "gpt-3.5-turbo", + "api_key": '--' + }, data = { "knowledge_bases": ["kb1"], "tables": ["files.file1", "files.file2.*"] @@ -765,11 +720,14 @@ def test_3_part_table(self, mock_pg, mock_openai): df = get_dataset_planets() self.set_handler(mock_pg, name="pg", tables={"planets": df}, schema="public") + set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) self.run_sql(""" CREATE AGENT my_agent USING - model = "gpt-3.5-turbo", - openai_api_key='--', + model={ + "model_name": "gpt-3.5-turbo", + "api_key": '--' + }, data = { "tables": ["pg.public.*"] } @@ -792,19 +750,23 @@ def test_3_part_table(self, mock_pg, mock_openai): assert "Moon" in mock_openai.agent_calls[3] assert "Moon" in mock_openai.agent_calls[4] + @patch("pydantic_ai.providers.openai.AsyncOpenAI") @patch("mindsdb.interfaces.agents.pydantic_ai_agent.PydanticAIAgent._get_completion_stream") - def test_agent_query_param_override(self, mock_get_completion): + def test_agent_query_param_override(self, mock_get_completion, mock_openai): """ Test that agent parameters can be overridden per-query using the USING clause in SELECT. """ mock_get_completion.return_value = [{"type": "data", "content": "-"}] + set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) self.run_sql( """ CREATE AGENT override_agent USING - model = 'gpt-4o', - openai_api_key = 'sk-override', + model={ + "model_name": "gpt-4o", + "api_key": 'sk-override' + }, prompt_template = 'Answer questions', timeout = 60; """ diff --git a/tests/unit/executor/test_api_handler.py b/tests/unit/executor/test_api_handler.py index cbc6a8ff862..2e0ee4e9582 100644 --- a/tests/unit/executor/test_api_handler.py +++ b/tests/unit/executor/test_api_handler.py @@ -1,15 +1,14 @@ import sys import types -from unittest.mock import patch import datetime as dt +from unittest.mock import patch +from dataclasses import dataclass import pandas as pd from tests.unit.executor_test_base import BaseExecutorDummyML -from dataclasses import dataclass - # import modules virtually if it is not installed try: @@ -26,6 +25,11 @@ class TestApiHandler(BaseExecutorDummyML): + def setup_method(self): + super().setup_method() + self.setup_community_handler("github") + self.setup_community_handler("email") + @patch("github.Github") def test_github(self, Github): """ @@ -115,7 +119,7 @@ class Issue: assert args[0] == "feature" assert kwargs["body"] == "do better" - @patch("mindsdb.integrations.handlers.email_handler.email_handler.EmailClient") + @patch("mindsdb_community_handlers.email_handler.email_handler.EmailClient") def test_email(self, EmailClient): """ Test for APITable diff --git a/tests/unit/executor/test_base_queires.py b/tests/unit/executor/test_base_queires.py index 5fbece5c4d3..0a0e3c2ab79 100644 --- a/tests/unit/executor/test_base_queires.py +++ b/tests/unit/executor/test_base_queires.py @@ -899,6 +899,40 @@ def test_subselect_1row_aggregate(self, data_handler): assert len(ret) == 1 assert ret["result"][0] == 1 + @patch("mindsdb.integrations.handlers.postgres_handler.Handler") + def test_cte_join(self, data_handler): + self.set_handler(data_handler, name="pg", tables={"stores": get_stores_df()}) + self.save_file("regions", get_regions_df()) + + ret = self.run_sql(""" + WITH regions AS ( + SELECT DISTINCT id, name FROM files.regions + ), + stores AS ( + SELECT * FROM pg.stores + LIMIT 10 + ) + SELECT format, region_id FROM pg.stores s + JOIN regions r on r.id = s.region_id + WHERE s.format IN (SELECT format FROM stores WHERE format='a') + LIMIT 100; + """) + assert len(ret) > 1 + assert ret["format"][0] == "a" + + @patch("mindsdb.integrations.handlers.postgres_handler.Handler") + def test_view_duplicated_cols(self, data_handler): + self.set_handler(data_handler, name="pg", tables={"stores": get_stores_df(), "regions": get_regions_df()}) + + with pytest.raises(Exception): + # `id` exists in both tables, should raise an exception + self.run_sql(""" + create view v1 ( + select * from pg.stores s + join pg.regions r on r.id = s.region_id + ) + """) + class TestSet(BaseExecutorTest): @pytest.mark.parametrize("var", ["var", "@@var", "@@session.var", "session var"]) diff --git a/tests/unit/executor/test_executor.py b/tests/unit/executor/test_executor.py index c901e7bab55..89a4acdfda5 100644 --- a/tests/unit/executor/test_executor.py +++ b/tests/unit/executor/test_executor.py @@ -11,7 +11,9 @@ from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.api.executor.utilities.sql import query_df +from mindsdb_sql_parser import parse_sql + +from mindsdb.api.executor.utilities.sql import query_df, query_dfs # How to run: # env PYTHONPATH=./ pytest tests/unit/test_executor.py @@ -1618,6 +1620,75 @@ def test_query_df_functions(self): result = query_df(df, query)["result"][0] assert isinstance(result, dt.time) + def test_not_exists_correlated_subquery(self): + a = pd.DataFrame( + [ + {"tab_num": 1, "shop": 1}, + {"tab_num": 1, "shop": 2}, + {"tab_num": 1, "shop": 3}, + {"tab_num": 2, "shop": 1}, + {"tab_num": 2, "shop": 2}, + {"tab_num": 3, "shop": 1}, + ] + ) + b = pd.DataFrame([{"shop": 1}, {"shop": 2}, {"shop": 3}]) + + result = query_dfs( + {"A": a, "B": b}, + parse_sql( + """ + SELECT DISTINCT a1.tab_num + FROM A a1 + WHERE NOT EXISTS ( + SELECT * FROM B b + WHERE NOT EXISTS ( + SELECT * FROM A a2 + WHERE a2.tab_num = a1.tab_num AND a2.shop = b.shop + ) + ) + """, + dialect="mindsdb", + ), + ) + + # Only tab_num=1 covers all shops {1, 2, 3} + assert list(result["tab_num"]) == [1] + + def test_exists_correlated_subquery(self): + # EXISTS version: find tab_num values missing at least one shop. + # tab_num=2 misses shop=3, tab_num=3 misses shops 2 and 3. + a = pd.DataFrame( + [ + {"tab_num": 1, "shop": 1}, + {"tab_num": 1, "shop": 2}, + {"tab_num": 1, "shop": 3}, + {"tab_num": 2, "shop": 1}, + {"tab_num": 2, "shop": 2}, + {"tab_num": 3, "shop": 1}, + ] + ) + b = pd.DataFrame([{"shop": 1}, {"shop": 2}, {"shop": 3}]) + + result = query_dfs( + {"A": a, "B": b}, + parse_sql( + """ + SELECT DISTINCT a1.tab_num + FROM A a1 + WHERE EXISTS ( + SELECT * FROM B b + WHERE NOT EXISTS ( + SELECT * FROM A a2 + WHERE a2.tab_num = a1.tab_num AND a2.shop = b.shop + ) + ) + """, + dialect="mindsdb", + ), + ) + + assert sorted(result["tab_num"].tolist()) == [2, 3] + class TestIfExistsIfNotExists(BaseExecutorMockPredictor): def setup_method(self, method): diff --git a/tests/unit/executor/test_files.py b/tests/unit/executor/test_files.py index 0181da273fa..cdbee61fbdb 100644 --- a/tests/unit/executor/test_files.py +++ b/tests/unit/executor/test_files.py @@ -152,8 +152,8 @@ def test_multi_table_relational_division(self): """ ) - assert len(result) == 3 - assert sorted(result["tab_num"].tolist()) == [1, 2, 3] + assert len(result) == 2 + assert sorted(result["tab_num"].tolist()) == [1, 2] def test_multi_table_join_with_aliases(self): """Test JOIN with aliases and database prefixes""" diff --git a/tests/unused/unit/handler_tests/test_handler_metrics.py b/tests/unit/executor/test_handler_metrics.py similarity index 55% rename from tests/unused/unit/handler_tests/test_handler_metrics.py rename to tests/unit/executor/test_handler_metrics.py index 26ee07040ed..40ae9813a39 100644 --- a/tests/unused/unit/handler_tests/test_handler_metrics.py +++ b/tests/unit/executor/test_handler_metrics.py @@ -1,52 +1,58 @@ import datetime import pandas as pd -from unit.executor_test_base import BaseExecutorDummyML +from tests.unit.executor_test_base import BaseExecutorDummyML class TestHandlerMetrics(BaseExecutorDummyML): def test_handler_query_time(self): - self.set_data('tasks', pd.DataFrame([ - {'a': 1, 'b': datetime.datetime(2020, 1, 1)}, - {'a': 2, 'b': datetime.datetime(2020, 1, 2)}, - {'a': 1, 'b': datetime.datetime(2020, 1, 3)}, - ])) + self.set_data( + "tasks", + pd.DataFrame( + [ + {"a": 1, "b": datetime.datetime(2020, 1, 1)}, + {"a": 2, "b": datetime.datetime(2020, 1, 2)}, + {"a": 1, "b": datetime.datetime(2020, 1, 3)}, + ] + ), + ) # Create & predict a simple model. - self.run_sql('create database proj') + self.run_sql("create database proj") self.run_sql( - ''' + """ CREATE model proj.task_model from dummy_data (select * from tasks) PREDICT a using engine='dummy_ml', tag = 'first', join_learn_process=true - ''' + """ ) - self.wait_predictor('proj', 'task_model') - self.run_sql(''' + self.wait_predictor("proj", "task_model") + self.run_sql(""" SELECT m.* FROM dummy_data.tasks as t JOIN proj.task_model as m - ''') + """) # Import here so we don't reuse registry across test functions. from mindsdb.metrics import metrics + query_time_metric = list(metrics.INTEGRATION_HANDLER_QUERY_TIME.collect())[0] query_size_metric = list(metrics.INTEGRATION_HANDLER_RESPONSE_SIZE.collect())[0] assert len(query_time_metric.samples) == 3 assert len(query_size_metric.samples) == 3 for sample in query_time_metric.samples: - assert sample.name.startswith('mindsdb_integration_handler_query_seconds') - if sample.name.endswith('count'): + assert sample.name.startswith("mindsdb_integration_handler_query_seconds") + if sample.name.endswith("count"): assert sample.value == 1.0 - elif sample.name.endswith('sum'): + elif sample.name.endswith("sum"): assert sample.value > 0.0 - elif sample.name.endswith('created'): + elif sample.name.endswith("created"): assert sample.value > 0.0 for sample in query_size_metric.samples: - assert sample.name.startswith('mindsdb_integration_handler_response_size') - if sample.name.endswith('count'): + assert sample.name.startswith("mindsdb_integration_handler_response_size") + if sample.name.endswith("count"): assert sample.value == 1.0 - elif sample.name.endswith('sum'): + elif sample.name.endswith("sum"): assert sample.value > 0.0 - elif sample.name.endswith('created'): + elif sample.name.endswith("created"): assert sample.value > 0.0 diff --git a/tests/unit/executor/test_knowledge_base.py b/tests/unit/executor/test_knowledge_base.py index 485e9bb9e20..7646caca9ae 100644 --- a/tests/unit/executor/test_knowledge_base.py +++ b/tests/unit/executor/test_knowledge_base.py @@ -1,6 +1,7 @@ import time import json import tempfile +import datetime as dt from unittest.mock import patch, MagicMock import threading @@ -8,7 +9,6 @@ import pandas as pd import pytest -import sys from tests.unit.executor_test_base import BaseExecutorDummyML from mindsdb.integrations.utilities.rag.rerankers.base_reranker import ( @@ -32,12 +32,13 @@ def task_monitor(): worker.join() -def dummy_embeddings(string, dimension=None): +def dummy_embeddings(string, dimension=None, base=None): # Imitates embedding generation: create vectors which are similar for similar words in inputs if dimension is None: dimension = 25**2 embeds = [0] * dimension - base = 25 + if base is None: + base = 25 string = string.lower().replace(",", " ").replace(".", " ") for word in string.split(): @@ -60,13 +61,11 @@ def dummy_embeddings(string, dimension=None): return embeds -def set_litellm_embedding(mock_litellm_embedding, dimension=None): +def set_embedding(mock_embedding, dimension=None, base=None): def resp_f(input, *args, **kwargs): - mock_response = MagicMock() - mock_response.data = [{"embedding": dummy_embeddings(s, dimension)} for s in input] - return mock_response + return [dummy_embeddings(s, dimension, base) for s in input] - mock_litellm_embedding.side_effect = resp_f + mock_embedding().embeddings.side_effect = resp_f class BaseTestKB(BaseExecutorDummyML): @@ -94,7 +93,7 @@ def _create_kb( if embedding_model is None: embedding_model = { - "provider": "bedrock", + "provider": "openai", "model_name": "dummy_model", "api_key": "dummy_key", } @@ -133,7 +132,6 @@ def _create_kb( ) def _get_storage_table(self, kb_name): - # default chromadb db_name = f"db_{kb_name}" self._drop_storage_db(db_name) @@ -141,10 +139,7 @@ def _get_storage_table(self, kb_name): self.run_sql(f""" create database {db_name} with - engine='chromadb', - PARAMETERS = {{ - 'persist_directory': '{kb_name}' - }} + engine='duckdb_faiss' """) self.storages.append(db_name) @@ -172,16 +167,16 @@ def _get_ral_table(self): return pd.DataFrame(data, columns=["ral", "english", "italian"]) -class TestKB(BaseTestKB): +class TestKBNOAutoBatch(BaseTestKB): def setup_method(self): super().setup_method() from mindsdb.utilities.config import config config["knowledge_bases"]["disable_autobatch"] = True - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_kb(self, mock_litellm_embedding): - set_litellm_embedding(mock_litellm_embedding) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_kb(self, mock_embedding): + set_embedding(mock_embedding) self._create_kb("kb_review") @@ -191,7 +186,7 @@ def test_kb(self, mock_litellm_embedding): ret = self.run_sql("select * from kb_review") assert len(ret) == 1 - # show tables in default chromadb + # show tables in default vectordb ret = self.run_sql("show knowledge bases") db_name = ret.STORAGE[0].split(".")[0] @@ -199,9 +194,9 @@ def test_kb(self, mock_litellm_embedding): # only one default collection there assert len(ret) == 1 - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_kb_metadata(self, mock_litellm_embedding): - set_litellm_embedding(mock_litellm_embedding) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_kb_metadata(self, mock_embedding): + set_embedding(mock_embedding) record = { "review": "all is good, haven't used yet", @@ -330,13 +325,8 @@ class _Choice: def __init__(self, content): self.message = _Msg(content) - class _Resp: - def __init__(self, content): - self.choices = [_Choice(content)] - async def _fake_call_llm(messages): - content = '{"ranking": [{"doc_index": 2, "score": 0.9}, {"doc_index": 1, "score": 0.6}, {"doc_index": 3, "score": 0.1}]}' - return _Resp(content) + return '{"ranking": [{"doc_index": 2, "score": 0.9}, {"doc_index": 1, "score": 0.6}, {"doc_index": 3, "score": 0.1}]}' # Bind the async method to this reranker instance reranker._call_llm = _fake_call_llm # type: ignore @@ -361,16 +351,11 @@ class _Choice: def __init__(self, content): self.message = _Msg(content) - class _Resp: - def __init__(self, content): - self.choices = [_Choice(content)] - async def _fake_call_llm(messages): # Returns code-fenced JSON, includes only two entries, one without score - content = """```json + return """```json {"ranking": [1, {"doc_index": 3, "score": 0.8}]} ```""" - return _Resp(content) reranker._call_llm = _fake_call_llm # type: ignore @@ -394,14 +379,9 @@ class _Choice: def __init__(self, content): self.message = _Msg(content) - class _Resp: - def __init__(self, content): - self.choices = [_Choice(content)] - async def _fake_call_llm(messages): # Invalid JSON forces fallback - content = "not-json" - return _Resp(content) + return "not-json" reranker._call_llm = _fake_call_llm # type: ignore @@ -412,9 +392,9 @@ async def _fake_call_llm(messages): # Fallback pattern should be descending assert scores[0] > scores[1] > scores[2] - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_join_kb_table(self, mock_litellm_embedding): - set_litellm_embedding(mock_litellm_embedding) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_join_kb_table(self, mock_embedding): + set_embedding(mock_embedding) df = self._get_ral_table() self.save_file("ral", df) @@ -480,15 +460,12 @@ def test_join_kb_table(self, mock_litellm_embedding): assert set(ret["id"]) == {"9016", "9023"} @pytest.mark.slow - @pytest.mark.skipif(sys.platform == "win32", reason="Causes hard crash on windows.") - @pytest.mark.skipif(sys.platform == "darwin", reason="Causes hard crash on mac.") - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_kb_partitions(self, mock_handler, mock_litellm_embedding): - set_litellm_embedding(mock_litellm_embedding) + def test_kb_partitions(self, mock_handler, mock_embedding): + set_embedding(mock_embedding) df = self._get_ral_table() - self.save_file("ral", df) df = pd.concat([df] * 30) # unique ids @@ -555,7 +532,15 @@ def stream_f(*args, **kwargs): yield df[chunk_size * i : chunk_size * (i + 1) :] # --- stream mode --- - mock_handler().query_stream.side_effect = stream_f + # Mock native_query to return TableResponse with generator + mock_handler().stream_response = True + + def native_query_with_generator(*args, **kwargs): + from mindsdb.integrations.libs.response import TableResponse + + return TableResponse(data_generator=stream_f()) + + mock_handler().native_query.side_effect = native_query_with_generator # test iterate check_partition( @@ -573,13 +558,14 @@ def stream_f(*args, **kwargs): """ ) - # test threads - check_partition( - """ - insert into kb_part SELECT id, english FROM pg.ral - using batch_size=20, track_column=id, threads = 3 - """ - ) + # switched off for faiss + # # test threads + # check_partition( + # """ + # insert into kb_part SELECT id, english FROM pg.ral + # using batch_size=20, track_column=id, threads = 3 + # """ + # ) # without track column check_partition( @@ -590,7 +576,15 @@ def stream_f(*args, **kwargs): ) # --- general mode --- - mock_handler().query_stream = None + # Mock native_query to return TableResponse with full data + mock_handler().stream_response = False + + def native_query_without_generator(*args, **kwargs): + from mindsdb.integrations.libs.response import TableResponse + + return TableResponse(data=df) + + mock_handler().native_query.side_effect = native_query_without_generator # test iterate check_partition( @@ -600,25 +594,26 @@ def stream_f(*args, **kwargs): """ ) - # test threads - check_partition( - """ - insert into kb_part SELECT id, english FROM pg.ral - using batch_size=20, track_column=id, threads = 3 - """ - ) + # switched off for faiss + # # test threads + # check_partition( + # """ + # insert into kb_part SELECT id, english FROM pg.ral + # using batch_size=20, track_column=id, threads = 3 + # """ + # ) - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_kb_algebra(self, mock_litellm_embedding): - set_litellm_embedding(mock_litellm_embedding) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_kb_algebra(self, mock_embedding): + set_embedding(mock_embedding) lines, i = [], 0 for color in ("white", "red", "green"): for size in ("big", "middle", "small"): for shape in ("square", "triangle", "circle"): i += 1 - lines.append([i, i, f"{color} {size} {shape}", color, size, shape]) - df = pd.DataFrame(lines, columns=["id", "num", "content", "color", "size", "shape"]) + lines.append([i, i, f"{color} {size} {shape}", color, size, shape, dt.date(2000, 1, i)]) + df = pd.DataFrame(lines, columns=["id", "num", "content", "color", "size", "shape", "valid_date"]) self.save_file("items", df) @@ -727,9 +722,46 @@ def test_kb_algebra(self, mock_litellm_embedding): else: assert "small" in content - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_select_allowed_columns(self, mock_litellm_embedding): - set_litellm_embedding(mock_litellm_embedding) + # -- metadata: like, not like + for query in ("trian%", "%riangl%", "%angle"): + ret = self.run_sql(f"select * from kb_alg where shape like '{query}'") + + # only triangle + assert set(ret["shape"]) == {"triangle"} + + # -- metadata: '>=', '>', '<=', '<' + + ret = self.run_sql("select * from kb_alg where color > 'red'") + # only white + assert set(ret["color"]) == {"white"} + + ret = self.run_sql("select * from kb_alg where color < 'red'") + # only green + assert set(ret["color"]) == {"green"} + + ret = self.run_sql("select * from kb_alg where color <= 'red' and color > 'green'") + # only red + assert set(ret["color"]) == {"red"} + + # filter by int + ret = self.run_sql("select * from kb_alg where num >= 10") + assert ret["num"].min() == 10 + + # filter by date + ret = self.run_sql("select * from kb_alg where valid_date >= '2000-01-15'") + assert ret["valid_date"].min() > "2000-01-14" and ret["valid_date"].min() < "2000-01-16" + + ret = self.run_sql("select * from kb_alg where valid_date < '2000-01-15'") + assert ret["valid_date"].max() > "2000-01-13" and ret["valid_date"].min() < "2000-01-15" + + # -- filter by id and content + ret = self.run_sql("select * from kb_alg where content = 'green' and id < 22") + assert ret["color"][0] == "green" + assert ret["id"].max() < 22 + + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_select_allowed_columns(self, mock_embedding): + set_embedding(mock_embedding) # -- no metadata are specified, generated from inserts -- self._create_kb("kb1") @@ -772,9 +804,9 @@ def test_select_allowed_columns(self, mock_litellm_embedding): @patch("mindsdb.interfaces.knowledge_base.llm_client.OpenAI") @patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores") - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_evaluate(self, mock_litellm_embedding, mock_get_scores, mock_openai): - set_litellm_embedding(mock_litellm_embedding) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_evaluate(self, mock_embedding, mock_get_scores, mock_openai): + set_embedding(mock_embedding) question, answer = "2+2", "4" agent_response = f""" @@ -892,13 +924,13 @@ def test_evaluate(self, mock_litellm_embedding, mock_get_scores, mock_openai): assert len(df) > 0 @patch("mindsdb.utilities.config.Config.get") - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") @patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores") - def test_save_default_params(self, mock_get_scores, mock_litellm_embedding, mock_config_get): + def test_save_default_params(self, mock_get_scores, mock_embedding, mock_config_get): # reranking result mock_get_scores.side_effect = lambda query, docs: [0.8 for _ in docs] - set_litellm_embedding(mock_litellm_embedding) + set_embedding(mock_embedding) def config_get_side_effect(key, default=None): if key == "default_embedding_model": @@ -932,10 +964,10 @@ def config_get_side_effect(key, default=None): assert "openai_model" not in ret["RERANKING_MODEL"][0] - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_relevance_filtering_gt_operator(self, mock_litellm_embedding): + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_relevance_filtering_gt_operator(self, mock_embedding): """Test relevance filtering with GREATER_THAN operator""" - set_litellm_embedding(mock_litellm_embedding) + set_embedding(mock_embedding) test_data = [ {"id": "1", "content": "This is about machine learning and AI"}, @@ -966,9 +998,9 @@ def test_relevance_filtering_gt_operator(self, mock_litellm_embedding): assert isinstance(ret, pd.DataFrame) @patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores") - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_alter_kb(self, mock_litellm_embedding, mock_get_scores): - set_litellm_embedding(mock_litellm_embedding) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_alter_kb(self, mock_embedding, mock_get_scores): + set_embedding(mock_embedding) self._create_kb( "kb1", @@ -1022,6 +1054,11 @@ def test_alter_kb(self, mock_litellm_embedding, mock_get_scores): assert kb.params["reranking_model"]["provider"] == "ollama" assert "api_key" not in kb.params["reranking_model"] + # disable reranking model and ensure config is cleared + self.run_sql("ALTER KNOWLEDGE BASE kb1 USING reranking_model = false") + kb = self.db.KnowledgeBase.query.filter_by(name="kb1").first() + assert kb.params["reranking_model"] == {} + @patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores") @patch("mindsdb.interfaces.knowledge_base.llm_client.OpenAI") def test_ollama(self, mock_openai, mock_get_scores): @@ -1042,9 +1079,9 @@ def test_ollama(self, mock_openai, mock_get_scores): assert "api_key" not in ret["EMBEDDING_MODEL"][0] assert "api_key" not in ret["RERANKING_MODEL"][0] - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_kb_uppercase_source_columns(self, mock_litellm_embedding): - set_litellm_embedding(mock_litellm_embedding) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_kb_uppercase_source_columns(self, mock_embedding): + set_embedding(mock_embedding) df = pd.DataFrame( [ @@ -1116,37 +1153,37 @@ def test_kb_uppercase_source_columns(self, mock_litellm_embedding): assert len(ret) == 2 assert ret["category"][0] == "Home" - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_dimension_mismatch(self, mock_litellm_embedding): + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_dimension_mismatch(self, mock_embedding): temp_dir = tempfile.mkdtemp() self.run_sql(f""" - create database my_chroma - with - engine='chromadb', + create database my_faiss + with + engine='duckdb_faiss', PARAMETERS = {{ 'persist_directory': '{temp_dir}' }} """) - set_litellm_embedding(mock_litellm_embedding, dimension=1000) - self._create_kb("kb1", storage="my_chroma.table1") + set_embedding(mock_embedding, dimension=1000) + self._create_kb("kb1", storage="my_faiss.table1") self.run_sql("insert into kb1 (content) values ('review')") # change dimension - set_litellm_embedding(mock_litellm_embedding, dimension=1500) + set_embedding(mock_embedding, dimension=1500) with pytest.raises(ValueError): - self._create_kb("kb2", storage="my_chroma.table1") + self._create_kb("kb2", storage="my_faiss.table1") self.run_sql("drop knowledge base kb1") - self.run_sql("drop table my_chroma.table1") - self.run_sql("drop database my_chroma") + self.run_sql("drop table my_faiss.table1") + self.run_sql("drop database my_faiss") - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_duplicated_ids(self, mock_litellm_embedding): - set_litellm_embedding(mock_litellm_embedding) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_duplicated_ids(self, mock_embedding): + set_embedding(mock_embedding) self._create_kb("kb1") @@ -1176,9 +1213,9 @@ def test_duplicated_ids(self, mock_litellm_embedding): ret = self.run_sql("select * from kb1 where id = 2") assert len(ret) == 1 - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_update(self, mock_litellm_embedding): - set_litellm_embedding(mock_litellm_embedding) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_update(self, mock_embedding): + set_embedding(mock_embedding) self._create_kb("kb1") @@ -1195,11 +1232,157 @@ def test_update(self, mock_litellm_embedding): assert len(ret) == 1 assert ret["chunk_content"][0] == "dog" + @patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores") + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_reranking(self, mock_embedding, mock_get_scores): + set_embedding(mock_embedding) + + self._create_kb( + "kb_ral", + content_columns=["english"], + reranking_model={ + "provider": "openai", + "model_name": "gpt-3", + "api_key": "embed-key-1", + }, + ) + + df = self._get_ral_table() + self.save_file("ral", df) + + self.run_sql( + """ + insert into kb_ral + select * from files.ral + """ + ) + + # rank from greater to lower + mock_get_scores.side_effect = lambda query, docs: [1 - i / 4 for i in range(len(docs))] + ret = self.run_sql("select * from kb_ral where content='white'") + assert "white" in ret["chunk_content"].iloc[0] + + # reverse rank: from lower to greater. the most semantic result have to be moved back + mock_get_scores.side_effect = lambda query, docs: [i / 4 for i in range(len(docs))] + ret = self.run_sql("select * from kb_ral where content='white'") + assert "white" not in ret["chunk_content"].iloc[0] + + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_hybrid_search(self, mock_embedding): + df = self._get_ral_table() + self.save_file("ral", df) + + set_embedding(mock_embedding) + + self._create_kb("kb_hybrid", content_columns=["english"]) + + self.run_sql("insert into kb_hybrid select * from files.ral") + + # changing embedding config, making semantic search irrelevant + set_embedding(mock_embedding, base=20) + + # white is not at the top + ret = self.run_sql("select * from kb_hybrid where content='white'") + assert "white" not in ret["chunk_content"].iloc[0] + + # but it is when hybrid search is used + ret = self.run_sql(""" + select * from kb_hybrid where content='white' + and hybrid_search_alpha = 0 + """) + assert "white" in ret["chunk_content"].iloc[0] + + # checking alpha=0.5 + ret = self.run_sql(""" + select * from kb_hybrid where content='white' + and hybrid_search = true + """) + assert "white" in ret["chunk_content"].iloc[0] + + # @pytest.mark.slow + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_create_index(self, mock_embedding): + set_embedding(mock_embedding) + + df = self._get_ral_table() + + df = pd.concat([df] * 30) + # unique ids + df["id"] = list(map(str, range(len(df)))) + self.save_file("ral", df) + + # create kb, fill it + self._create_kb("kb_ral", content_columns=["english"]) + + self.run_sql("insert into kb_ral select * from files.ral") + + # create index default index (ivf_file, for windows it is ivf) + self.run_sql( + """ + CREATE INDEX ON KNOWLEDGE_BASE kb_ral WITH (nlist=1) + """ + ) + + # check kb works after index was created + ret = self.run_sql("select * from kb_ral where content='white'") + assert "white" in ret["chunk_content"].iloc[0] + + # specified index + self.run_sql( + """ + CREATE INDEX ON KNOWLEDGE_BASE kb_ral + WITH (nlist=1, type='ivf', train_count=50) + """ + ) + ret = self.run_sql("select * from kb_ral where content='white'") + assert "white" in ret["chunk_content"].iloc[0] + + def test_providers(self): + with patch("mindsdb.interfaces.knowledge_base.llm_client.BedrockClient.embeddings") as embed: + with patch( + "mindsdb.integrations.utilities.rag.rerankers.base_reranker.AsyncBedrockClient.acompletion" + ) as rerank: + embed.return_value = [[1, 1, 1]] + rerank.return_value = "100" + self._create_kb( + "kb_test", + embedding_model={ + "provider": "bedrock", + "model_name": "amazon.titan", + "aws_access_key_id": "-", + "aws_region_name": "us-east-2", + "aws_secret_access_key": "-", + }, + reranking_model={ + "provider": "bedrock", + "model_name": "llama3", + "aws_access_key_id": "-", + "aws_region_name": "us-east-2", + "aws_secret_access_key": "-", + }, + ) + assert embed.call_args_list[0][0][0] == "amazon.titan" + assert rerank.call_args_list[0][1]["model_name"] == "llama3" + + with patch("mindsdb.interfaces.knowledge_base.llm_client.SnowflakeClient.embeddings") as embed: + embed.return_value = [[1, 1, 1]] + self._create_kb( + "kb_test", + embedding_model={"provider": "snowflake", "model_name": "arctic", "account_id": "ABC", "api_key": "-"}, + ) + assert embed.call_args_list[0][0][0] == "arctic" + with patch("mindsdb.interfaces.knowledge_base.llm_client.GeminiClient.embeddings") as embed: + embed.return_value = [[1, 1, 1]] + self._create_kb( + "kb_test", embedding_model={"provider": "gemini", "model_name": "gemini-embedding", "api_key": "-"} + ) + assert embed.call_args_list[0][0][0] == "gemini-embedding" + class TestKBAutoBatch(BaseTestKB): - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_no_autobatch(self, mock_litellm_embedding): - set_litellm_embedding(mock_litellm_embedding) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_no_autobatch(self, mock_embedding): + set_embedding(mock_embedding) df = self._get_ral_table() self.save_file("ral", df) @@ -1219,9 +1402,9 @@ def test_no_autobatch(self, mock_litellm_embedding): ret = self.run_sql("select * from kb_ral limit 1") assert len(ret) == 1 - @patch("mindsdb.integrations.handlers.litellm_handler.litellm_handler.embedding") - def test_autobatch(self, mock_litellm_embedding): - set_litellm_embedding(mock_litellm_embedding) + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") + def test_autobatch(self, mock_embedding): + set_embedding(mock_embedding) df = self._get_ral_table() self.save_file("ral", df) diff --git a/tests/unit/executor/test_lowercase.py b/tests/unit/executor/test_lowercase.py index 8f8e0a74870..d7e9d2a32b0 100644 --- a/tests/unit/executor/test_lowercase.py +++ b/tests/unit/executor/test_lowercase.py @@ -4,7 +4,7 @@ import pandas as pd from tests.unit.executor_test_base import BaseExecutorDummyML -from tests.unit.executor.test_agent import set_litellm_embedding +from tests.unit.executor.test_agent import set_embedding class TestLowercase(BaseExecutorDummyML): @@ -166,13 +166,15 @@ def test_model_name_lowercase(self): self.run_sql(f"DROP MODEL `{another_name}`") self.run_sql(f"DROP MODEL {another_name}") - def test_agent_name_lowercase(self): + @patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") + def test_agent_name_lowercase(self, check_agent_llm): agent_params = """ - model='gpt-3.5-turbo', - provider='openai', + model={ + "model_name": "gpt-3.5-turbo", + "provider": "openai" + }, prompt_template='Answer the user input in a helpful way using tools', - max_iterations=5, - mode='retrieval' + mode='text' """ # mixed case: agent @@ -204,18 +206,14 @@ def test_agent_name_lowercase(self): self.run_sql(f"drop agent `{another_agent_name}`") self.run_sql(f"drop agent {another_agent_name}") - @patch("litellm.embedding") + @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") @patch("openai.OpenAI") - def test_knowledgebase_name_lowercase(self, mock_openai, mock_litellm_embedding): - set_litellm_embedding(mock_litellm_embedding) + def test_knowledgebase_name_lowercase(self, mock_openai, mock_embedding): + set_embedding(mock_embedding) self.run_sql(""" create database my_kb_storage - with - engine='chromadb', - PARAMETERS = { - 'persist_directory': 'my_kb_storage' - } + with engine='duckdb_faiss' """) kb_params = """ @@ -278,7 +276,8 @@ def test_job_name_lowercase(self): self.run_sql(f"DROP JOB {another_name}") - def test_chatbot_lowercase(self): + @patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") + def test_chatbot_lowercase(self, check_agent_llm): self.run_sql("create agent my_agent using model={'provider': 'openai', 'model_name': 'gpt-3.5'}") self.run_sql("create database my_db using engine='dummy_data'") diff --git a/tests/unit/executor/test_mongodb_handler.py b/tests/unit/executor/test_mongodb_handler.py index fac9b4de9e0..9d0a7382084 100644 --- a/tests/unit/executor/test_mongodb_handler.py +++ b/tests/unit/executor/test_mongodb_handler.py @@ -1,13 +1,21 @@ import unittest + +import pytest from mindsdb_sql_parser import parse_sql -from mindsdb.integrations.handlers.mongodb_handler.utils.mongodb_render import MongodbRender -from mindsdb.integrations.handlers.mongodb_handler.utils.mongodb_parser import MongodbParser +try: + from mindsdb.integrations.handlers.mongodb_handler.utils.mongodb_render import MongodbRender + from mindsdb.integrations.handlers.mongodb_handler.utils.mongodb_parser import MongodbParser + + MONGODB_HANDLER_AVAILABLE = True +except ImportError: + MONGODB_HANDLER_AVAILABLE = False # How to run: # env PYTHONPATH=./ pytest tests/unit/test_mongodb_handler.py +@pytest.mark.skipif(not MONGODB_HANDLER_AVAILABLE, reason="mongodb_handler not installed (community handler)") class TestMongoDBConverters(unittest.TestCase): def test_ast_to_mongo(self): sql = """ @@ -105,6 +113,7 @@ def test_mongo_parser(self): assert MongodbParser().from_string(mql).to_string() == expected_mql +@pytest.mark.skipif(not MONGODB_HANDLER_AVAILABLE, reason="mongodb_handler not installed (community handler)") class TestMongoDBHandler(unittest.TestCase): def test_mongo_handler(self): # TODO how to test mongo handler diff --git a/tests/unit/executor/test_schema.py b/tests/unit/executor/test_schema.py index 8c80177c006..540467a9c78 100644 --- a/tests/unit/executor/test_schema.py +++ b/tests/unit/executor/test_schema.py @@ -12,7 +12,8 @@ def test_show(self): self.run_sql(f"show {item}") @pytest.mark.slow - def test_schema(self): + @patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") + def test_schema(self, check_agent): # --- create objects + describe --- # todo: create knowledge base (requires chromadb) @@ -91,15 +92,15 @@ def test_schema(self): # agent self.run_sql(""" CREATE AGENT agent1 - USING model = 'pred1' + USING model = {'model_name': "pred1", "provider": "openai"} """) self.run_sql(""" CREATE AGENT proj2.agent2 - USING model = 'pred2' -- it looks up in agent's project + USING model = {'model_name': "pred2", "provider": "openai"} -- it looks up in agent's project """) df = self.run_sql("describe agent agent1") - assert df.NAME[0] == "agent1" and df.MODEL_NAME[0] == "pred1" + assert df.NAME[0] == "agent1" and "pred1" in df.MODEL[0] # chatbot self.run_sql(""" diff --git a/tests/unused/unit/executor/test_udf.py b/tests/unit/executor/test_udf.py similarity index 60% rename from tests/unused/unit/executor/test_udf.py rename to tests/unit/executor/test_udf.py index 95a8ea7be0d..fa320e188b3 100644 --- a/tests/unused/unit/executor/test_udf.py +++ b/tests/unit/executor/test_udf.py @@ -1,51 +1,45 @@ import os -from textwrap import dedent from tempfile import TemporaryDirectory - +from textwrap import dedent from unittest.mock import patch import pandas as pd import pytest - -from mindsdb_sql_parser.ast.mindsdb import CreateMLEngine from mindsdb_sql_parser.ast import Identifier - +from mindsdb_sql_parser.ast.mindsdb import CreateMLEngine from tests.unit.executor_test_base import BaseExecutorDummyML -@pytest.mark.parametrize('byom_type', ['inhouse', 'venv']) +@pytest.mark.skip(reason="BYOM feature is currently disabled in MindsDB") +@pytest.mark.parametrize("byom_type", ["inhouse", "venv"]) class TestBYOM(BaseExecutorDummyML): - def _create_engine(self, name, code, **kwargs): - with TemporaryDirectory(prefix='udf_test_') as temp_dir: - code_path = os.path.join(temp_dir, 'code.py') - reqs_path = os.path.join(temp_dir, 'reqs.py') + with TemporaryDirectory(prefix="udf_test_") as temp_dir: + code_path = os.path.join(temp_dir, "code.py") + reqs_path = os.path.join(temp_dir, "reqs.py") - open(code_path, 'w').write(code) - open(reqs_path, 'w').write('') + open(code_path, "w").write(code) + open(reqs_path, "w").write("") params = { - 'code': code_path, - 'modules': reqs_path, + "code": code_path, + "modules": reqs_path, } params.update(kwargs) ret = self.command_executor.execute_command( - CreateMLEngine( - name=Identifier(name), - handler='byom', - params=params - ) + CreateMLEngine(name=Identifier(name), handler="byom", params=params) ) assert ret.error_code is None - @patch('mindsdb.integrations.handlers.postgres_handler.Handler') + @patch("mindsdb.integrations.handlers.postgres_handler.Handler") def test_udf(self, data_handler, byom_type): - - df = pd.DataFrame([ - {'a': 3, 'b': 4, 'c': 'a', 'd': 'b'}, - ]) - self.set_handler(data_handler, name='pg', tables={'sample': df}) + df = pd.DataFrame( + [ + {"a": 3, "b": 4, "c": "a", "d": "b"}, + ] + ) + self.set_handler(data_handler, name="pg", tables={"sample": df}) code = dedent(""" from os import listdir # imported function @@ -64,27 +58,25 @@ def add2(a: int, b: int) -> int: return a + b """) - self._create_engine(name='myml', code=code, - type=byom_type, mode='custom_function') + self._create_engine(name="myml", code=code, type=byom_type, mode="custom_function") # convert to explicit types, because duckdb doesn't convert it and fails - ret = self.run_sql(''' + ret = self.run_sql(""" select myml.fibo(b) x, myml.add1(a::char,b::char) y, myml.add2(a,b) z from pg.sample - ''') - assert ret['x'][0] == 3 - assert ret['y'][0] == '34' - assert ret['z'][0] == 7 + """) + assert ret["x"][0] == 3 + assert ret["y"][0] == "34" + assert ret["z"][0] == 7 # test without table - ret = self.run_sql(''' + ret = self.run_sql(""" select myml.fibo(4) x - ''') - assert ret['x'][0] == 3 + """) + assert ret["x"][0] == 3 def test_byom(self, byom_type): - code = dedent(""" from datetime import datetime import pandas as pd @@ -101,17 +93,17 @@ def predict(self, df): return df[[self.target_col]] """) - self._create_engine(name='myml', code=code, type=byom_type) + self._create_engine(name="myml", code=code, type=byom_type) - self.run_sql(''' + self.run_sql(""" create model m1 predict output_col using engine='myml', join_learn_process=true - ''') + """) - ret = self.run_sql(''' + ret = self.run_sql(""" select * from m1 where input_col = 'my_input' - ''') - assert ret['output_col'][0] == 'my_input>my_response' + """) + assert ret["output_col"][0] == "my_input>my_response" diff --git a/tests/unit/executor_test_base.py b/tests/unit/executor_test_base.py index d305cd9d90f..3cb3e2e8640 100644 --- a/tests/unit/executor_test_base.py +++ b/tests/unit/executor_test_base.py @@ -2,6 +2,7 @@ import datetime as dt import json import os +import pytest import sys import tempfile import shutil @@ -15,10 +16,19 @@ from prometheus_client import REGISTRY from mindsdb_sql_parser import parse_sql +from mindsdb.interfaces.database.integrations import integration_controller +from mindsdb.utilities.config import Config + from mindsdb.utilities import log from mindsdb.utilities.constants import DEFAULT_COMPANY_ID, DEFAULT_USER_ID from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender +from mindsdb.integrations.utilities.community_handler_fetcher import ( + community_handlers_enabled, + fetch_handler, + get_community_handlers_storage_dir, +) + logger = log.getLogger(__name__) @@ -59,6 +69,8 @@ def setup_class(cls): with open(cfg_file, "w") as fd: json.dump(config, fd) + cls._original_storage_dir_env = os.environ.get("MINDSDB_STORAGE_DIR") + cls._original_config_path_env = os.environ.get("MINDSDB_CONFIG_PATH") os.environ["MINDSDB_STORAGE_DIR"] = cls.storage_dir os.environ["MINDSDB_CONFIG_PATH"] = cfg_file @@ -83,6 +95,11 @@ def teardown_class(cls): if env_var_name in os.environ: del os.environ[env_var_name] + if cls._original_storage_dir_env is not None: + os.environ["MINDSDB_STORAGE_DIR"] = cls._original_storage_dir_env + if cls._original_config_path_env is not None: + os.environ["MINDSDB_CONFIG_PATH"] = cls._original_config_path_env + # remove import of mindsdb for next tests unload_module("mindsdb") @@ -205,7 +222,7 @@ def setup_method(self, import_dummy_ml=False): super().setup_method() self.set_executor(import_dummy_ml=import_dummy_ml) - def _import_handler(self, integration_controller, handler_name, handler_dir): + def _import_handler(self, integration_controller, handler_name, handler_dir, is_community=False): handler_meta = { "import": { "success": None, @@ -216,9 +233,45 @@ def _import_handler(self, integration_controller, handler_name, handler_dir): "path": handler_dir, "name": handler_name, "permanent": False, + "community": is_community, } integration_controller.handlers_import_status[handler_name] = handler_meta - integration_controller.import_handler(handler_name, "") + # For community handlers: import_handler uses spec_from_file_location (path-based). + # For built-in handlers: pass "" as base_import so importlib resolves from sys.path. + if is_community: + integration_controller.import_handler(handler_name) + else: + integration_controller.import_handler(handler_name, "") + + def setup_community_handler(self, handler_name: str): + """ + Register and import a community handler for testing. + + Requires MINDSDB_COMMUNITY_HANDLERS=true β€” this mirrors the production + gate and also exercises the fetch mechanism when the env var is set. + Skips the test if the env var is not set or the handler cannot be fetched. + """ + + if not community_handlers_enabled(): + pytest.skip( + f"Community handler '{handler_name}' test skipped: set MINDSDB_COMMUNITY_HANDLERS=true to enable" + ) + + storage_root = Path(Config()["paths"]["root"]) + storage_dir = get_community_handlers_storage_dir(storage_root) + handler_dir_name = f"{handler_name}_handler" + handler_dir = storage_dir / handler_dir_name + + if not (handler_dir / "__init__.py").exists(): + try: + handler_dir = fetch_handler(handler_dir_name, storage_dir) + except Exception as e: + pytest.skip(f"Could not fetch community handler '{handler_name}': {e}") + + if handler_dir is None or not (handler_dir / "__init__.py").exists(): + pytest.skip(f"Community handler '{handler_name}' not available") + + self._import_handler(integration_controller, handler_name, handler_dir, is_community=True) def set_executor( self, @@ -339,11 +392,10 @@ def set_handler(self, mock_handler, name, tables, engine="postgres", schema=None self.db.session.add(r) self.db.session.commit() - from mindsdb.integrations.libs.response import RESPONSE_TYPE - from mindsdb.integrations.libs.response import HandlerResponse as Response + from mindsdb.integrations.libs.response import TableResponse def handler_response(df, affected_rows: None | int = None): - response = Response(RESPONSE_TYPE.TABLE, df, affected_rows=affected_rows) + response = TableResponse(data=df, affected_rows=affected_rows) return response def get_tables_f(): diff --git a/tests/unit/handlers/base_handler_test.py b/tests/unit/handlers/base_handler_test.py index 85e4133fbfc..be54f494402 100644 --- a/tests/unit/handlers/base_handler_test.py +++ b/tests/unit/handlers/base_handler_test.py @@ -2,7 +2,7 @@ from unittest.mock import MagicMock, Mock from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, + DataHandlerResponse as Response, HandlerStatusResponse as StatusResponse, ) @@ -167,22 +167,6 @@ def get_columns_query(self): """ pass - def test_native_query(self): - """ - Tests the `native_query` method to ensure it executes a SQL query using a mock cursor and returns a Response object. - """ - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager() - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - query_str = f"SELECT * FROM {self.mock_table}" - data = self.handler.native_query(query_str) - - assert isinstance(data, Response) - self.assertFalse(data.error_code) - def test_get_columns(self): """ Tests if the `get_tables` method calls `native_query` with the correct SQL query. diff --git a/tests/unit/handlers/test_access_handler.py b/tests/unit/handlers/community_handlers/test_access_handler.py similarity index 96% rename from tests/unit/handlers/test_access_handler.py rename to tests/unit/handlers/community_handlers/test_access_handler.py index 212832091fb..b360d3ef602 100644 --- a/tests/unit/handlers/test_access_handler.py +++ b/tests/unit/handlers/community_handlers/test_access_handler.py @@ -3,6 +3,8 @@ import pandas as pd import sys +import pytest + from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse, RESPONSE_TYPE # Mock pyodbc and sqlalchemy_access before importing the handler @@ -13,7 +15,10 @@ sys.modules["sqlalchemy_access"] = MagicMock() sys.modules["sqlalchemy_access.base"] = MagicMock() -from mindsdb.integrations.handlers.access_handler.access_handler import AccessHandler +try: + from mindsdb.integrations.handlers.access_handler.access_handler import AccessHandler +except ImportError: + pytestmark = pytest.mark.skip("access_handler not installed (community handler)") class BaseAccessHandlerTest(unittest.TestCase): diff --git a/tests/unit/handlers/test_clickhouse.py b/tests/unit/handlers/community_handlers/test_clickhouse.py similarity index 79% rename from tests/unit/handlers/test_clickhouse.py rename to tests/unit/handlers/community_handlers/test_clickhouse.py index 404c888a4d7..68ec1d895fd 100644 --- a/tests/unit/handlers/test_clickhouse.py +++ b/tests/unit/handlers/community_handlers/test_clickhouse.py @@ -6,7 +6,8 @@ from sqlalchemy.exc import SQLAlchemyError from mindsdb_sql_parser import parse_sql -from base_handler_test import BaseDatabaseHandlerTest +from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager +from mindsdb.integrations.libs.response import TableResponse try: from mindsdb.integrations.handlers.clickhouse_handler.clickhouse_handler import ClickHouseHandler @@ -67,6 +68,21 @@ def test_connect_success(self): f"clickhouse+{self.dummy_connection_data['protocol']}://{self.dummy_connection_data['user']}:{self.dummy_connection_data['password']}@{self.dummy_connection_data['host']}:{self.dummy_connection_data['port']}/{self.dummy_connection_data['database']}" ) + def test_native_query(self): + """ + Tests the `native_query` method to ensure it executes a SQL query using a mock cursor and returns a Response object. + """ + mock_conn = MagicMock() + mock_cursor = MockCursorContextManager() + + self.handler.connect = MagicMock(return_value=mock_conn) + mock_conn.cursor = MagicMock(return_value=mock_cursor) + + query_str = f"SELECT * FROM {self.mock_table}" + data = self.handler.native_query(query_str) + + assert isinstance(data, TableResponse) + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/handlers/test_confluence.py b/tests/unit/handlers/community_handlers/test_confluence.py similarity index 94% rename from tests/unit/handlers/test_confluence.py rename to tests/unit/handlers/community_handlers/test_confluence.py index f5af306caff..0a56a9a68a5 100644 --- a/tests/unit/handlers/test_confluence.py +++ b/tests/unit/handlers/community_handlers/test_confluence.py @@ -2,29 +2,28 @@ import unittest from unittest.mock import MagicMock, call, patch +import pytest import pandas as pd from base_handler_test import BaseHandlerTestSetup, BaseAPIResourceTestSetup -from mindsdb.integrations.handlers.confluence_handler.confluence_api_client import ConfluenceAPIClient -from mindsdb.integrations.handlers.confluence_handler.confluence_handler import ConfluenceHandler -from mindsdb.integrations.handlers.confluence_handler.confluence_tables import ( - ConfluenceBlogPostsTable, - ConfluenceDatabasesTable, - ConfluencePagesTable, - ConfluenceSpacesTable, - ConfluenceWhiteboardsTable, - ConfluenceTasksTable, -) -from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, - HandlerStatusResponse as StatusResponse, - RESPONSE_TYPE, -) -from mindsdb.integrations.utilities.sql_utils import ( - FilterCondition, - FilterOperator, - SortColumn, -) + +try: + from mindsdb.integrations.handlers.confluence_handler.confluence_api_client import ConfluenceAPIClient + from mindsdb.integrations.handlers.confluence_handler.confluence_handler import ConfluenceHandler + from mindsdb.integrations.handlers.confluence_handler.confluence_tables import ( + ConfluenceBlogPostsTable, + ConfluenceDatabasesTable, + ConfluencePagesTable, + ConfluenceSpacesTable, + ConfluenceWhiteboardsTable, + ConfluenceTasksTable, + ) + +except ImportError: + pytestmark = pytest.mark.skip("Confluence handler not installed") + +from mindsdb.integrations.libs.response import TableResponse, HandlerStatusResponse as StatusResponse, RESPONSE_TYPE +from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, SortColumn class TestConfluenceHandler(BaseHandlerTestSetup, unittest.TestCase): @@ -103,21 +102,21 @@ def test_check_connection_failure(self): def test_get_tables(self): """ - Test that the `get_tables` method returns a list of table names. + Test that the `get_tables` method returns a TableResponse with a list of table names. """ response = self.handler.get_tables() - self.assertIsInstance(response, Response) + self.assertIsInstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) self.assertEqual(response.data_frame.columns.tolist(), ["table_name", "table_type"]) def test_get_columns(self): """ - Test that the `get_columns` method returns a list of columns for a table. + Test that the `get_columns` method returns a TableResponse with a list of columns for a table. """ response = self.handler.get_columns("spaces") - self.assertIsInstance(response, Response) + self.assertIsInstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) self.assertEqual(response.data_frame.columns.tolist(), ["Field", "Type"]) diff --git a/tests/unit/handlers/test_databricks.py b/tests/unit/handlers/community_handlers/test_databricks.py similarity index 89% rename from tests/unit/handlers/test_databricks.py rename to tests/unit/handlers/community_handlers/test_databricks.py index df976cc4ce6..3b968ada2de 100644 --- a/tests/unit/handlers/test_databricks.py +++ b/tests/unit/handlers/community_handlers/test_databricks.py @@ -13,13 +13,14 @@ DatabricksHandler, ) - DATABRICKS_AVAILABLE = True except ImportError: pytestmark = pytest.mark.skip("Databricks handler not installed") - DATABRICKS_AVAILABLE = False from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, + TableResponse, + ErrorResponse, + OkResponse, + DataHandlerResponse, RESPONSE_TYPE, HandlerStatusResponse as StatusResponse, ) @@ -52,7 +53,6 @@ def set_results(self, results: List[tuple], columns: List[str]): CONNECT_PATCH_PATH = "mindsdb.integrations.handlers.databricks_handler.databricks_handler.connect" -@pytest.mark.skipif(not DATABRICKS_AVAILABLE, reason="Databricks not installed") class TestInstallationCheck(unittest.TestCase): """Test handler installation and information schema.""" @@ -77,7 +77,6 @@ def test_connection_args_validation(self): self.assertIn("access_token", str(ctx.exception)) -@pytest.mark.skipif(not DATABRICKS_AVAILABLE, reason="Databricks not installed") class TestDatabricksHandler(unittest.TestCase): dummy_connection_data = OrderedDict( server_hostname="adb-1234567890123456.7.azuredatabricks.net", @@ -146,7 +145,6 @@ def test_check_connection_failure(self): self.assertTrue(response.error_message) -@pytest.mark.skipif(not DATABRICKS_AVAILABLE, reason="Databricks not installed") class TestTableOperations(unittest.TestCase): """Test table operations (DDL & DML).""" @@ -171,7 +169,7 @@ def tearDown(self): def test_native_query(self): """ - Tests the `native_query` method to ensure it executes a SQL query using a mock cursor and returns a Response object. + Tests the `native_query` method to ensure it executes a SQL query using a mock cursor and returns a TableResponse object. """ self.mock_cursor.set_results([], []) @@ -179,8 +177,17 @@ def test_native_query(self): data = self.handler.native_query(query_str) self.mock_cursor.execute.assert_called_once_with(query_str) - self.assertIsInstance(data, Response) - self.assertFalse(data.error_code) + self.assertIsInstance(data, DataHandlerResponse) + self.assertNotIsInstance(data, ErrorResponse) + + def test_native_query_empty_select_returns_table(self): + self.mock_cursor.set_results([], ["id", "name"]) + + response = self.handler.native_query("SELECT id, name FROM table WHERE 1 = 0") + + self.assertEqual(response.type, RESPONSE_TYPE.TABLE) + self.assertEqual(list(response.data_frame.columns), ["id", "name"]) + self.assertEqual(len(response.data_frame), 0) def test_get_tables(self): """ @@ -202,6 +209,14 @@ def test_get_tables(self): """ self.handler.native_query.assert_called_once_with(expected_query) + def test_get_tables_returns_non_table_response_without_transform(self): + expected = ErrorResponse(error_message="boom") + self.handler.native_query = MagicMock(return_value=expected) + + result = self.handler.get_tables() + + self.assertIs(result, expected) + def test_get_columns(self): """ Tests if the `get_columns` method correctly constructs the SQL query and if it calls `native_query` with the correct query. @@ -241,14 +256,12 @@ def test_native_query_server_error(self): result = self.handler.native_query("SELECT * FROM test_table") - self.assertEqual(result.type, RESPONSE_TYPE.ERROR) + self.assertIsInstance(result, ErrorResponse) self.assertIn("Server error", result.error_message) def test_get_tables_all_schemas(self): """Test get_tables with all=True.""" - self.handler.native_query = MagicMock( - return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame([{"table_name": "t1"}])) - ) + self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame([{"table_name": "t1"}]))) self.handler.get_tables(all=True) @@ -276,7 +289,7 @@ def test_get_columns_with_schema(self): ] ) - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=mock_df)) + self.handler.native_query = MagicMock(return_value=TableResponse(data=mock_df)) self.handler.get_columns("test_table", schema_name="my_schema") @@ -284,7 +297,6 @@ def test_get_columns_with_schema(self): self.assertIn("'my_schema'", query) -@pytest.mark.skipif(not DATABRICKS_AVAILABLE, reason="Databricks not installed") class TestAdvancedQueries(unittest.TestCase): dummy_connection_data = OrderedDict( server_hostname="test.azuredatabricks.net", @@ -357,7 +369,6 @@ def test_join_query(self): self.assertEqual(result.type, RESPONSE_TYPE.TABLE) -@pytest.mark.skipif(not DATABRICKS_AVAILABLE, reason="Databricks not installed") class TestDateTimeFunctions(unittest.TestCase): """Test date/time functions and INTERVAL transformations.""" @@ -415,7 +426,7 @@ def test_query_transforms_date_add_day_interval(self): """Test DATE_ADD with INTERVAL DAY is transformed to integer argument.""" query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '30' DAY) AS due_date FROM orders LIMIT 1") # breakpoint() - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -426,7 +437,7 @@ def test_query_transforms_date_add_day_interval(self): def test_query_transforms_date_add_days_plural(self): """Test DATE_ADD with INTERVAL DAYS (plural) is transformed correctly.""" query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL 7 DAYS) AS due_date FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -437,7 +448,7 @@ def test_query_transforms_date_add_days_plural(self): def test_query_transforms_date_sub_day_interval(self): """Test DATE_SUB with INTERVAL DAY is transformed to integer argument.""" query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '5' DAY) AS past_date FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -448,7 +459,7 @@ def test_query_transforms_date_sub_day_interval(self): def test_query_transforms_date_add_week_interval(self): """Test DATE_ADD with INTERVAL WEEK is converted to days.""" query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '2' WEEK) AS future_date FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -459,7 +470,7 @@ def test_query_transforms_date_add_week_interval(self): def test_query_transforms_date_sub_week_interval(self): """Test DATE_SUB with INTERVAL WEEK is converted to days.""" query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '2' WEEK) AS past_date FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -470,7 +481,7 @@ def test_query_transforms_date_sub_week_interval(self): def test_query_transforms_date_add_month_interval(self): """Test DATE_ADD with INTERVAL MONTH uses ADD_MONTHS function.""" query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '2' MONTH) AS future_date FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -481,7 +492,7 @@ def test_query_transforms_date_add_month_interval(self): def test_query_transforms_date_sub_month_interval(self): """Test DATE_SUB with INTERVAL MONTH uses ADD_MONTHS with negative value.""" query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '3' MONTH) AS past_date FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -492,7 +503,7 @@ def test_query_transforms_date_sub_month_interval(self): def test_query_transforms_date_add_year_interval(self): """Test DATE_ADD with INTERVAL YEAR uses ADD_MONTHS with 12x multiplier.""" query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '1' YEAR) AS future_date FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -503,7 +514,7 @@ def test_query_transforms_date_add_year_interval(self): def test_query_transforms_date_sub_year_interval(self): """Test DATE_SUB with INTERVAL YEAR uses ADD_MONTHS with negative 12x value.""" query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '2' YEAR) AS past_date FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -514,7 +525,7 @@ def test_query_transforms_date_sub_year_interval(self): def test_query_transforms_date_add_hour_interval(self): """Test DATE_ADD with INTERVAL HOUR uses TIMESTAMPADD function.""" query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '6' HOUR) AS future_time FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -525,7 +536,7 @@ def test_query_transforms_date_add_hour_interval(self): def test_query_transforms_date_sub_hour_interval(self): """Test DATE_SUB with INTERVAL HOUR uses TIMESTAMPADD with negative value.""" query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '3' HOUR) AS past_time FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -536,7 +547,7 @@ def test_query_transforms_date_sub_hour_interval(self): def test_query_transforms_date_add_minute_interval(self): """Test DATE_ADD with INTERVAL MINUTE uses TIMESTAMPADD function.""" query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '30' MINUTE) AS future_time FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -547,7 +558,7 @@ def test_query_transforms_date_add_minute_interval(self): def test_query_transforms_date_add_second_interval(self): """Test DATE_ADD with INTERVAL SECOND uses TIMESTAMPADD function.""" query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '45' SECOND) AS future_time FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -558,7 +569,7 @@ def test_query_transforms_date_add_second_interval(self): def test_query_without_interval_unchanged(self): """Test that queries without INTERVAL pass through unchanged.""" query = parse_sql("SELECT DATE_ADD(o_orderdate, 10) AS future_date FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -569,7 +580,7 @@ def test_query_without_interval_unchanged(self): def test_query_transforms_date_add_quarter_interval(self): """Test DATE_ADD with INTERVAL QUARTER uses ADD_MONTHS with 3x multiplier.""" query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '2' QUARTER) AS future_date FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -580,7 +591,7 @@ def test_query_transforms_date_add_quarter_interval(self): def test_query_transforms_date_sub_quarter_interval(self): """Test DATE_SUB with INTERVAL QUARTER uses ADD_MONTHS with negative 3x value.""" query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '1' QUARTER) AS past_date FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -591,7 +602,7 @@ def test_query_transforms_date_sub_quarter_interval(self): def test_query_transforms_date_sub_minute_interval(self): """Test DATE_SUB with INTERVAL MINUTE uses TIMESTAMPADD with negative value.""" query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '15' MINUTE) AS past_time FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) @@ -602,7 +613,7 @@ def test_query_transforms_date_sub_minute_interval(self): def test_query_transforms_date_sub_second_interval(self): """Test DATE_SUB with INTERVAL SECOND uses TIMESTAMPADD with negative value.""" query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '30' SECOND) AS past_time FROM orders") - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.OK)) + self.handler.native_query = MagicMock(return_value=OkResponse()) self.handler.query(query) diff --git a/tests/unit/handlers/test_dynamodb.py b/tests/unit/handlers/community_handlers/test_dynamodb.py similarity index 64% rename from tests/unit/handlers/test_dynamodb.py rename to tests/unit/handlers/community_handlers/test_dynamodb.py index f1aef2481b1..535d11548ab 100644 --- a/tests/unit/handlers/test_dynamodb.py +++ b/tests/unit/handlers/community_handlers/test_dynamodb.py @@ -3,34 +3,36 @@ from botocore.client import ClientError from unittest.mock import patch, MagicMock, Mock +import pytest + from mindsdb_sql_parser import ast from mindsdb_sql_parser.ast.select.star import Star from mindsdb_sql_parser.ast.select.identifier import Identifier from base_handler_test import BaseHandlerTestSetup -from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, - HandlerStatusResponse as StatusResponse, - RESPONSE_TYPE -) -from mindsdb.integrations.handlers.dynamodb_handler.dynamodb_handler import DynamoDBHandler +from mindsdb.integrations.libs.response import TableResponse, HandlerStatusResponse as StatusResponse, RESPONSE_TYPE +try: + from mindsdb.integrations.handlers.dynamodb_handler.dynamodb_handler import DynamoDBHandler + +except ImportError: + pytestmark = pytest.mark.skip("DynamoDB handler not installed") -class TestDynamoDBHandler(BaseHandlerTestSetup, unittest.TestCase): +class TestDynamoDBHandler(BaseHandlerTestSetup, unittest.TestCase): @property def dummy_connection_data(self): return OrderedDict( - aws_access_key_id='AQAXEQK89OX07YS34OP', - aws_secret_access_key='wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY', - region_name='us-east-2', + aws_access_key_id="AQAXEQK89OX07YS34OP", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + region_name="us-east-2", ) def create_handler(self): - return DynamoDBHandler('dynamodb', connection_data=self.dummy_connection_data) + return DynamoDBHandler("dynamodb", connection_data=self.dummy_connection_data) def create_patcher(self): - return patch('boto3.client') + return patch("boto3.client") def test_connect_failure_with_missing_connection_data(self): """ @@ -58,8 +60,8 @@ def test_check_connection_failure_with_incorrect_credentials(self): Test if the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on failed connection due to incorrect credentials. """ self.mock_connect.return_value.list_tables.side_effect = ClientError( - error_response={'Error': {'Code': 'AccessDeniedException', 'Message': 'Access Denied'}}, - operation_name='list_tables' + error_response={"Error": {"Code": "AccessDeniedException", "Message": "Access Denied"}}, + operation_name="list_tables", ) response = self.handler.check_connection() @@ -72,7 +74,7 @@ def test_check_connection_success(self): """ Test if the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on a successful connection. """ - self.mock_connect.return_value.list_tables.return_value = {'TableNames': ['table1', 'table2']} + self.mock_connect.return_value.list_tables.return_value = {"TableNames": ["table1", "table2"]} response = self.handler.check_connection() self.assertTrue(response.success) @@ -81,15 +83,12 @@ def test_check_connection_success(self): def test_query_select_success(self): """ - Test if the `query` method returns a response object with a data frame containing the query result. + Test if the `query` method returns a TableResponse object with a data frame containing the query result. `native_query` cannot be tested directly because it depends on some pre-processing steps handled by the `query` method. """ mock_boto3_client = Mock() mock_boto3_client.execute_statement.return_value = { - 'Items': [ - {'id': {'N': '1'}, 'name': {'S': 'Alice'}}, - {'id': {'N': '2'}, 'name': {'S': 'Bob'}} - ] + "Items": [{"id": {"N": "1"}, "name": {"S": "Alice"}}, {"id": {"N": "2"}, "name": {"S": "Bob"}}] } self.handler.connect = MagicMock(return_value=mock_boto3_client) @@ -97,18 +96,18 @@ def test_query_select_success(self): targets=[ Star(), ], - from_table=ast.Identifier('table1') + from_table=ast.Identifier("table1"), ) response = self.handler.query(query) - assert isinstance(response, Response) + assert isinstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame self.assertEqual(len(df), 2) - self.assertEqual(df.columns.tolist(), ['id', 'name']) - self.assertEqual(df['id'].tolist(), [1, 2]) - self.assertEqual(df['name'].tolist(), ['Alice', 'Bob']) + self.assertEqual(df.columns.tolist(), ["id", "name"]) + self.assertEqual(df["id"].tolist(), [1, 2]) + self.assertEqual(df["name"].tolist(), ["Alice", "Bob"]) def test_query_select_failure_with_unsupported_clause(self): """ @@ -118,8 +117,8 @@ def test_query_select_failure_with_unsupported_clause(self): targets=[ Star(), ], - from_table=ast.Identifier('table1'), - limit=10 + from_table=ast.Identifier("table1"), + limit=10, ) with self.assertRaises(ValueError): self.handler.query(query) @@ -132,62 +131,58 @@ def test_query_insert_failure(self): mock_boto3_client.execute_statement.return_value = {} self.handler.connect = MagicMock(return_value=mock_boto3_client) - query = ast.Insert( - table=Identifier('table1'), - columns=['id', 'name'], - values=[[1, 'Alice']] - ) + query = ast.Insert(table=Identifier("table1"), columns=["id", "name"], values=[[1, "Alice"]]) with self.assertRaises(ValueError): self.handler.query(query) def test_get_tables(self): """ - Test if the `get_tables` method returns a response object with a list of tables. + Test if the `get_tables` method returns a TableResponse object with a list of tables. """ mock_boto3_client = Mock() - mock_boto3_client.list_tables.return_value = {'TableNames': ['table1', 'table2']} + mock_boto3_client.list_tables.return_value = {"TableNames": ["table1", "table2"]} self.handler.connection = mock_boto3_client response = self.handler.get_tables() - assert isinstance(response, Response) + assert isinstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame self.assertEqual(len(df), 2) - self.assertEqual(df.columns.tolist(), ['table_name']) - self.assertEqual(df['table_name'].tolist(), ['table1', 'table2']) + self.assertEqual(df.columns.tolist(), ["table_name"]) + self.assertEqual(df["table_name"].tolist(), ["table1", "table2"]) def test_get_columns(self): """ - Test if the `get_columns` method returns a response object with a list of columns for a given table. + Test if the `get_columns` method returns a TableResponse object with a list of columns for a given table. """ mock_boto3_client = Mock() mock_boto3_client.describe_table.return_value = { - 'Table': { - 'KeySchema': [ - {'AttributeName': 'id', 'KeyType': 'HASH'}, - {'AttributeName': 'name', 'KeyType': 'RANGE'} + "Table": { + "KeySchema": [ + {"AttributeName": "id", "KeyType": "HASH"}, + {"AttributeName": "name", "KeyType": "RANGE"}, + ], + "AttributeDefinitions": [ + {"AttributeName": "id", "AttributeType": "N"}, + {"AttributeName": "name", "AttributeType": "S"}, ], - 'AttributeDefinitions': [ - {'AttributeName': 'id', 'AttributeType': 'N'}, - {'AttributeName': 'name', 'AttributeType': 'S'} - ] } } self.handler.connection = mock_boto3_client - response = self.handler.get_columns('table1') + response = self.handler.get_columns("table1") - assert isinstance(response, Response) + assert isinstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame self.assertEqual(len(df), 2) - self.assertEqual(df.columns.tolist(), ['column_name', 'data_type']) - self.assertEqual(df['column_name'].tolist(), ['id', 'name']) - self.assertEqual(df['data_type'].tolist(), ['N', 'S']) + self.assertEqual(df.columns.tolist(), ["column_name", "data_type"]) + self.assertEqual(df["column_name"].tolist(), ["id", "name"]) + self.assertEqual(df["data_type"].tolist(), ["N", "S"]) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/unit/handlers/community_handlers/test_jira.py b/tests/unit/handlers/community_handlers/test_jira.py new file mode 100644 index 00000000000..dd1e4d8f6a0 --- /dev/null +++ b/tests/unit/handlers/community_handlers/test_jira.py @@ -0,0 +1,397 @@ +import pytest +import unittest + +from unittest.mock import patch, MagicMock +from requests.exceptions import HTTPError + +import pandas as pd + + +from base_handler_test import BaseHandlerTestSetup +from mindsdb.integrations.libs.response import ( + HandlerResponse as Response, + HandlerStatusResponse as StatusResponse, + RESPONSE_TYPE, +) + +try: + from mindsdb.integrations.handlers.jira_handler.jira_handler import JiraHandler + from mindsdb.integrations.handlers.jira_handler.jira_tables import ( + JiraAttachmentsTable, + JiraCommentsTable, + JiraIssuesTable, + JiraUsersTable, + JiraProjectsTable, + JiraGroupsTable, + SERVER_COLUMNS, + ) +except ImportError: + pytestmark = pytest.mark.skip("Jira handler not installed") + + +class TestJiraHandler(BaseHandlerTestSetup, unittest.TestCase): + @property + def dummy_connection_data(self): + return { + "jira_url": "https://your-domain.atlassian.net", + "jira_username": "username", + "jira_api_token": "your_api_token", + "is_cloud": False, + } + + @property + def err_to_raise_on_connect_failure(self): + return HTTPError("Failed to connect to Jira") + + def create_handler(self): + return JiraHandler("jira", self.dummy_connection_data) + + def create_patcher(self): + return patch("mindsdb.integrations.handlers.jira_handler.jira_handler.Jira") + + def test_connect_cloud_success(self): + """Ensure cloud connections normalize credentials and reuse Jira constructor correctly.""" + mock_client = MagicMock() + self.mock_connect.return_value = mock_client + + connection = self.handler.connect() + + self.assertIs(connection, mock_client) + self.assertTrue(self.handler.is_connected) + self.mock_connect.assert_called_once_with( + username=self.dummy_connection_data["jira_username"], + password=self.dummy_connection_data["jira_api_token"], + url=self.dummy_connection_data["jira_url"], + cloud=True, + ) + + def test_connect_reuse_existing_connection(self): + """If already connected, connect should reuse the existing client.""" + cached_connection = MagicMock() + self.handler.connection = cached_connection + self.handler.is_connected = True + + connection = self.handler.connect() + + self.assertIs(connection, cached_connection) + self.mock_connect.assert_not_called() + + def test_connect_runtime_error_on_missing_cached_connection(self): + """Marking the handler as connected without a cached client should raise.""" + self.handler.is_connected = True + self.handler.connection = None + + with self.assertRaises(RuntimeError): + self.handler.connect() + + def test_check_connection_http_error(self): + """check_connection should surface HTTP errors from the Jira client.""" + mock_client = MagicMock() + mock_client.myself.side_effect = HTTPError("Unauthorized") + self.mock_connect.return_value = mock_client + + response = self.handler.check_connection() + + assert isinstance(response, StatusResponse) + self.assertFalse(response.success) + self.assertIn("Unauthorized", response.error_message) + self.assertFalse(self.handler.is_connected) + + def test_native_query_http_error(self): + """native_query should return an error response when Jira raises HTTPError.""" + mock_client = MagicMock() + mock_client.jql.side_effect = HTTPError("Bad JQL") + self.mock_connect.return_value = mock_client + + response = self.handler.native_query("project = TEST") + + assert isinstance(response, Response) + self.assertEqual(response.type, RESPONSE_TYPE.ERROR) + self.assertIn("Bad JQL", response.error_message) + + def test_native_query_returns_empty_dataframe_when_no_issues(self): + """Ensure native_query returns an empty dataframe with expected columns.""" + mock_client = MagicMock() + mock_client.jql.return_value = {} + self.mock_connect.return_value = mock_client + + response = self.handler.native_query("project = TEST") + + assert isinstance(response, Response) + self.assertEqual(response.type, RESPONSE_TYPE.TABLE) + self.assertTrue(response.data_frame.empty) + issues_columns = JiraIssuesTable(self.handler).get_columns() + self.assertListEqual(list(response.data_frame.columns), issues_columns) + + def test_attachments_table_fetches_missing_fields(self): + """Attachments table should refresh issues to retrieve missing attachment fields.""" + mock_client = MagicMock() + self.mock_connect.return_value = mock_client + + issue_without_attachments = {"id": "1", "key": "ISSUE-1", "fields": {}} + mock_client.get_all_projects.return_value = [{"id": "100"}] + mock_client.get_all_project_issues.return_value = [issue_without_attachments] + mock_client.get_issue.return_value = { + "fields": {"attachment": [{"id": "att-1", "filename": "log.txt", "size": 10, "mimeType": "text/plain"}]} + } + + attachments_table = JiraAttachmentsTable(self.handler) + result_df = attachments_table.list(limit=1) + + self.assertEqual(len(result_df), 1) + self.assertEqual(result_df.loc[0, "attachment_id"], "att-1") + self.assertEqual(result_df.loc[0, "issue_key"], "ISSUE-1") + self.assertEqual(result_df.loc[0, "filename"], "log.txt") + + def test_issues_table_missing_assignee(self): + """Test that issues without assignee are handled correctly.""" + mock_client = MagicMock() + self.mock_connect.return_value = mock_client + + mock_issues = [ + { + "id": "1", + "key": "TEST-1", + "fields": { + "project": {"id": "10001", "key": "TEST", "name": "Test Project"}, + "summary": "Issue with assignee", + "priority": {"name": "High"}, + "creator": {"displayName": "John Doe"}, + "assignee": {"displayName": "Jane Smith"}, + "status": {"name": "In Progress"}, + }, + }, + { + "id": "2", + "key": "TEST-2", + "fields": { + "project": {"id": "10001", "key": "TEST", "name": "Test Project"}, + "summary": "Unassigned issue", + "priority": {"name": "Medium"}, + "creator": {"displayName": "John Doe"}, + "status": {"name": "Open"}, + }, + }, + { + "id": "3", + "key": "TEST-3", + "fields": { + "project": {"id": "10001", "key": "TEST", "name": "Test Project"}, + "summary": "Issue without priority", + "creator": {"displayName": "John Doe"}, + "status": {"name": "Done"}, + }, + }, + ] + + mock_client.get_all_projects.return_value = [{"id": "10001"}] + mock_client.get_all_project_issues.return_value = mock_issues + + issues_table = JiraIssuesTable(self.handler) + result_df = issues_table.list(conditions=[]) + + self.assertEqual(len(result_df), 3) + self.assertIsNotNone(result_df) + + expected_columns = issues_table.get_columns() + for col in expected_columns: + self.assertIn(col, result_df.columns) + + self.assertEqual(result_df.loc[0, "assignee"], "Jane Smith") + self.assertTrue(pd.isna(result_df.loc[1, "assignee"])) + self.assertTrue(pd.isna(result_df.loc[2, "assignee"])) + + self.assertEqual(result_df.loc[0, "priority"], "High") + self.assertEqual(result_df.loc[1, "priority"], "Medium") + self.assertTrue(pd.isna(result_df.loc[2, "priority"])) + + def test_users_table_missing_timezone(self): + """Test that users without timeZone field are handled correctly.""" + mock_client = MagicMock() + self.mock_connect.return_value = mock_client + + mock_users = [ + { + "accountId": "user1", + "accountType": "atlassian", + "emailAddress": "user1@example.com", + "displayName": "User One", + "active": True, + "timeZone": "America/New_York", + "locale": "en_US", + }, + { + "accountId": "user2", + "accountType": "atlassian", + "emailAddress": "user2@example.com", + "displayName": "User Two", + "active": True, + "locale": "en_US", + }, + { + "accountId": "user3", + "accountType": "atlassian", + "displayName": "User Three", + "active": False, + }, + ] + + mock_client.users_get_all.return_value = mock_users + + users_table = JiraUsersTable(self.handler) + result_df = users_table.list(conditions=[]) + + self.assertEqual(len(result_df), 3) + self.assertIsNotNone(result_df) + + expected_columns = users_table.get_columns() + for col in expected_columns: + self.assertIn(col, result_df.columns) + + self.assertEqual(result_df.loc[0, "timeZone"], "America/New_York") + self.assertTrue(pd.isna(result_df.loc[1, "timeZone"])) + self.assertTrue(pd.isna(result_df.loc[2, "timeZone"])) + + self.assertEqual(result_df.loc[0, "emailAddress"], "user1@example.com") + self.assertEqual(result_df.loc[1, "emailAddress"], "user2@example.com") + self.assertTrue(pd.isna(result_df.loc[2, "emailAddress"])) + + def test_projects_table_missing_optional_fields(self): + """Test that projects with missing optional fields are handled correctly.""" + mock_client = MagicMock() + self.mock_connect.return_value = mock_client + + mock_projects = [ + { + "id": "10001", + "key": "PROJ1", + "name": "Project One", + "projectTypeKey": "software", + "simplified": True, + "style": "classic", + "isPrivate": False, + "entityId": "entity1", + "uuid": "uuid1", + }, + { + "id": "10002", + "key": "PROJ2", + "name": "Project Two", + }, + ] + + mock_client.get_all_projects.return_value = mock_projects + + projects_table = JiraProjectsTable(self.handler) + result_df = projects_table.list(conditions=[]) + + self.assertEqual(len(result_df), 2) + self.assertIsNotNone(result_df) + + expected_columns = projects_table.get_columns() + for col in expected_columns: + self.assertIn(col, result_df.columns) + + self.assertEqual(result_df.loc[0, "projectTypeKey"], "software") + self.assertTrue(pd.isna(result_df.loc[1, "projectTypeKey"])) + + def test_groups_table_missing_fields(self): + """Test that groups with missing fields are handled correctly.""" + mock_client = MagicMock() + self.mock_connect.return_value = mock_client + + mock_groups = { + "groups": [ + { + "groupId": "group1", + "name": "Developers", + "html": "Developers", + }, + { + "groupId": "group2", + "name": "Managers", + }, + ] + } + + mock_client.get_groups.return_value = mock_groups + + groups_table = JiraGroupsTable(self.handler) + result_df = groups_table.list(conditions=[]) + + self.assertEqual(len(result_df), 2) + self.assertIsNotNone(result_df) + + expected_columns = groups_table.get_columns() + for col in expected_columns: + self.assertIn(col, result_df.columns) + + self.assertEqual(result_df.loc[0, "html"], "Developers") + self.assertTrue(pd.isna(result_df.loc[1, "html"])) + + def test_comments_table_fetches_missing_fields(self): + """Comments table should refresh issues to retrieve missing comment fields.""" + mock_client = MagicMock() + self.mock_connect.return_value = mock_client + + issue_without_comments = {"id": "1", "key": "ISSUE-1", "fields": {}} + mock_client.get_all_projects.return_value = [{"id": "100"}] + mock_client.get_all_project_issues.return_value = [issue_without_comments] + mock_client.get_issue.return_value = { + "fields": { + "comment": { + "comments": [ + { + "id": "c-1", + "body": "First comment", + "created": "2024-01-01", + "updated": "2024-01-02", + "author": { + "displayName": "Commenter", + "accountId": "acc-1", + }, + "visibility": { + "type": "role", + "value": "admin", + }, + } + ] + } + } + } + + comments_table = JiraCommentsTable(self.handler) + result_df = comments_table.list(limit=1) + + self.assertEqual(len(result_df), 1) + self.assertEqual(result_df.loc[0, "comment_id"], "c-1") + self.assertEqual(result_df.loc[0, "issue_key"], "ISSUE-1") + self.assertEqual(result_df.loc[0, "body"], "First comment") + self.assertEqual(result_df.loc[0, "author"], "Commenter") + self.assertEqual(result_df.loc[0, "visibility_type"], "role") + self.assertEqual(result_df.loc[0, "visibility_value"], "admin") + + def test_users_table_server_mode_columns(self): + """Users table should switch to server columns when client.cloud is False.""" + mock_client = MagicMock() + mock_client.cloud = False + self.mock_connect.return_value = mock_client + + mock_client.user.return_value = { + "name": "serveruser", + "displayName": "Server User", + "emailAddress": "server@example.com", + } + + users_table = JiraUsersTable(self.handler) + result_df = users_table.list() + + self.assertEqual(len(result_df), 1) + self.assertListEqual(list(result_df.columns), SERVER_COLUMNS) + self.assertEqual(result_df.loc[0, "name"], "serveruser") + self.assertEqual(result_df.loc[0, "displayName"], "Server User") + self.assertEqual(result_df.loc[0, "emailAddress"], "server@example.com") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/handlers/test_jira_handler.py b/tests/unit/handlers/community_handlers/test_jira_handler.py similarity index 100% rename from tests/unit/handlers/test_jira_handler.py rename to tests/unit/handlers/community_handlers/test_jira_handler.py diff --git a/tests/unit/handlers/test_mongodb.py b/tests/unit/handlers/community_handlers/test_mongodb.py similarity index 93% rename from tests/unit/handlers/test_mongodb.py rename to tests/unit/handlers/community_handlers/test_mongodb.py index 36ae12c8479..81232adea01 100644 --- a/tests/unit/handlers/test_mongodb.py +++ b/tests/unit/handlers/community_handlers/test_mongodb.py @@ -2,6 +2,8 @@ from collections import OrderedDict from unittest.mock import patch, MagicMock +import pytest + from bson import ObjectId from mindsdb_sql_parser import ast from mindsdb_sql_parser.ast.select.star import Star @@ -11,11 +13,17 @@ from base_handler_test import BaseHandlerTestSetup from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, + TableResponse, + OkResponse, + ErrorResponse, HandlerStatusResponse as StatusResponse, RESPONSE_TYPE, ) -from mindsdb.integrations.handlers.mongodb_handler.mongodb_handler import MongoDBHandler + +try: + from mindsdb.integrations.handlers.mongodb_handler.mongodb_handler import MongoDBHandler +except ImportError: + pytestmark = pytest.mark.skip("mongodb_handler not installed (community handler)") class TestMongoDBHandler(BaseHandlerTestSetup, unittest.TestCase): @@ -88,7 +96,7 @@ def test_check_connection_success(self): def test_query_failure_with_non_existent_collection(self): """ - Test if the `query` method returns a response object with an error message on failed query due to non-existent collection. + Test if the `query` method returns an ErrorResponse object with an error message on failed query due to non-existent collection. """ self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ "movies" @@ -103,7 +111,7 @@ def test_query_failure_with_non_existent_collection(self): response = self.handler.query(query) - assert isinstance(response, Response) + assert isinstance(response, ErrorResponse) self.assertEqual(response.type, RESPONSE_TYPE.ERROR) self.assertTrue(response.error_message) @@ -139,7 +147,7 @@ def test_query_failure_with_unsupported_operation(self): def test_query_select_success(self): """ - Test if the `query` method returns a response object with a data frame containing the query result. + Test if the `query` method returns a TableResponse object with a data frame containing the query result. `native_query` cannot be tested directly because it depends on some pre-processing steps handled by the `query` method. """ self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ @@ -164,7 +172,7 @@ def test_query_select_success(self): response = self.handler.query(query) - assert isinstance(response, Response) + assert isinstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -174,7 +182,7 @@ def test_query_select_success(self): def test_query_update_success(self): """ - Test if the `query` method returns a response object with a 'OK' status. + Test if the `query` method returns an OkResponse object with a 'OK' status. `native_query` cannot be tested directly because it depends on some pre-processing steps handled by the `query` method. """ self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ @@ -201,12 +209,12 @@ def test_query_update_success(self): response = self.handler.query(query) - assert isinstance(response, Response) + assert isinstance(response, OkResponse) self.assertEqual(response.type, RESPONSE_TYPE.OK) def test_get_tables(self): """ - Tests the `get_tables` method returns a response object with a list of tables (collections) in the database. + Tests the `get_tables` method returns a TableResponse object with a list of tables (collections) in the database. """ self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ "theaters", @@ -219,7 +227,7 @@ def test_get_tables(self): response = self.handler.get_tables() - assert isinstance(response, Response) + assert isinstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -232,7 +240,7 @@ def test_get_tables(self): def test_get_columns(self): """ - Tests the `get_columns` method returns a response object with a list of columns (fields) for a given table (collection). + Tests the `get_columns` method returns a TableResponse object with a list of columns (fields) for a given table (collection). """ self.mock_connect.return_value[self.dummy_connection_data["database"]]["movies"].find_one.return_value = { "_id": ObjectId("5f5b3f3b3f3b3f3b3f3b3f3b"), @@ -243,7 +251,7 @@ def test_get_columns(self): response = self.handler.get_columns("movies") - assert isinstance(response, Response) + assert isinstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -287,7 +295,7 @@ def test_query_select_with_subquery_success(self): response = self.handler.query(main_query) - assert isinstance(response, Response) + assert isinstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -353,7 +361,7 @@ def test_query_select_with_complex_subquery_success(self): response = self.handler.query(main_query) - self.assertIsInstance(response, Response) + self.assertIsInstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -388,7 +396,7 @@ def test_query_select_with_where_operators(self): response = self.handler.query(query) - self.assertIsInstance(response, Response) + self.assertIsInstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -431,7 +439,7 @@ def test_query_select_with_and_or_conditions(self): response = self.handler.query(query) - self.assertIsInstance(response, Response) + self.assertIsInstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -494,7 +502,7 @@ def test_select_with_match_and_projection(self): response = self.handler.query(query) - self.assertIsInstance(response, Response) + self.assertIsInstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -525,7 +533,7 @@ def test_select_constant_with_alias(self): response = self.handler.query(query) - self.assertIsInstance(response, Response) + self.assertIsInstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -557,7 +565,7 @@ def test_select_with_constant_no_alias(self): response = self.handler.query(query) - self.assertIsInstance(response, Response) + self.assertIsInstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -604,7 +612,7 @@ def test_query_select_with_subquery_and_where(self): response = self.handler.query(main_query) - assert isinstance(response, Response) + assert isinstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -643,7 +651,7 @@ def test_query_select_nested_field_projection(self): response = self.handler.query(query) - assert isinstance(response, Response) + assert isinstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -689,7 +697,7 @@ def test_query_select_nested_field_with_where(self): response = self.handler.query(query) - assert isinstance(response, Response) + assert isinstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -725,7 +733,7 @@ def test_query_aggregation_on_nested_field(self): response = self.handler.query(query) - assert isinstance(response, Response) + assert isinstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -772,7 +780,7 @@ def test_query_group_by_with_nested_aggregation(self): response = self.handler.query(query) - assert isinstance(response, Response) + assert isinstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame diff --git a/tests/unit/handlers/test_ms_teams.py b/tests/unit/handlers/community_handlers/test_ms_teams.py similarity index 100% rename from tests/unit/handlers/test_ms_teams.py rename to tests/unit/handlers/community_handlers/test_ms_teams.py diff --git a/tests/unit/handlers/community_handlers/test_openbb_tables.py b/tests/unit/handlers/community_handlers/test_openbb_tables.py new file mode 100644 index 00000000000..50f0d270a5a --- /dev/null +++ b/tests/unit/handlers/community_handlers/test_openbb_tables.py @@ -0,0 +1,101 @@ +from types import SimpleNamespace +from unittest.mock import patch + +import pandas as pd +import pytest + +try: + from mindsdb.integrations.handlers.openbb_handler.openbb_tables import OpenBBtable +except ImportError: + OpenBBtable = None + +pytestmark = pytest.mark.skipif(OpenBBtable is None, reason="openbb_handler not installed") + + +class _DummyOpenBBResponse: + def __init__(self, payload): + self.payload = payload + + def to_df(self): + return pd.DataFrame([self.payload]) + + +class _DummyPrice: + def historical(self, **kwargs): + return _DummyOpenBBResponse(kwargs) + + +class _DummyEquity: + def __init__(self): + self.price = _DummyPrice() + + +class _DummyCoverage: + def __init__(self): + self.commands = {".equity.price.historical": {}} + + +class _DummyObb: + def __init__(self): + self.equity = _DummyEquity() + self.coverage = _DummyCoverage() + + +class _DummyHandler: + def __init__(self): + self.obb = _DummyObb() + + +def test_openbb_command_resolution_returns_callable(): + table = OpenBBtable(_DummyHandler()) + + function = table._resolve_openbb_command("obb.equity.price.historical") + result = function(symbol="AAPL").to_df() + + assert result.iloc[0]["symbol"] == "AAPL" + + +def test_openbb_select_treats_params_as_data(): + table = OpenBBtable(_DummyHandler()) + malicious_value = "__import__('os').system('echo hacked')" + query = SimpleNamespace(where=object()) + + with patch( + "mindsdb.integrations.handlers.openbb_handler.openbb_tables.extract_comparison_conditions", + return_value=[["=", "cmd", "obb.equity.price.historical"], ["=", "symbol", malicious_value]], + ): + result = table.select(query) + + assert result.iloc[0]["symbol"] == malicious_value + + +def test_openbb_command_resolution_rejects_private_segments(): + table = OpenBBtable(_DummyHandler()) + + with pytest.raises(ValueError, match="Invalid OpenBB command segment"): + table._resolve_openbb_command("obb.__class__") + + +def test_openbb_select_coerces_literal_string_params(): + table = OpenBBtable(_DummyHandler()) + query = SimpleNamespace(where=object()) + + with patch( + "mindsdb.integrations.handlers.openbb_handler.openbb_tables.extract_comparison_conditions", + return_value=[ + ["=", "cmd", "obb.equity.price.historical"], + ["=", "limit", "123"], + ["=", "adjusted", "true"], + ["=", "symbol", "'AAPL'"], + ["=", "ids", "[1, 2]"], + ["=", "raw_symbol", "AAPL"], + ], + ): + result = table.select(query) + + row = result.iloc[0] + assert row["limit"] == 123 + assert bool(row["adjusted"]) is True + assert row["symbol"] == "AAPL" + assert row["ids"] == [1, 2] + assert row["raw_symbol"] == "AAPL" diff --git a/tests/unit/handlers/test_s3.py b/tests/unit/handlers/community_handlers/test_s3.py similarity index 71% rename from tests/unit/handlers/test_s3.py rename to tests/unit/handlers/community_handlers/test_s3.py index 16f3e7e64b2..443da7fc776 100644 --- a/tests/unit/handlers/test_s3.py +++ b/tests/unit/handlers/community_handlers/test_s3.py @@ -2,6 +2,8 @@ import unittest from unittest.mock import patch, MagicMock +import pytest + from botocore.client import ClientError from mindsdb_sql_parser import ast from mindsdb_sql_parser.ast import Select, Identifier, Star, Constant @@ -9,34 +11,39 @@ import pandas as pd from base_handler_test import BaseHandlerTestSetup -from mindsdb.integrations.handlers.s3_handler.s3_handler import S3Handler from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, + OkResponse, + TableResponse, + DataHandlerResponse as Response, HandlerStatusResponse as StatusResponse, - RESPONSE_TYPE + RESPONSE_TYPE, ) +try: + from mindsdb.integrations.handlers.s3_handler.s3_handler import S3Handler +except ImportError: + pytestmark = pytest.mark.skip("s3_handler not installed (community handler)") -class TestS3Handler(BaseHandlerTestSetup, unittest.TestCase): +class TestS3Handler(BaseHandlerTestSetup, unittest.TestCase): @property def object_name(self): - return '`my-bucket/my-file.csv`' + return "`my-bucket/my-file.csv`" @property def dummy_connection_data(self): return OrderedDict( - aws_access_key_id='AQAXEQK89OX07YS34OP', - aws_secret_access_key='wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY', - bucket='mindsdb-bucket', - region_name='us-east-2', + aws_access_key_id="AQAXEQK89OX07YS34OP", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + bucket="mindsdb-bucket", + region_name="us-east-2", ) def create_handler(self): - return S3Handler('s3', connection_data=self.dummy_connection_data) + return S3Handler("s3", connection_data=self.dummy_connection_data) def create_patcher(self): - return patch('boto3.client') + return patch("boto3.client") def test_connect(self): """ @@ -51,7 +58,7 @@ def test_connect(self): self.assertTrue(self.handler.is_connected) self.mock_connect.assert_called_once() - @patch('boto3.client') + @patch("boto3.client") def test_check_connection_success(self, mock_boto3_client): """ Test that the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on a successful connection. @@ -66,7 +73,7 @@ def test_check_connection_success(self, mock_boto3_client): assert isinstance(response, StatusResponse) self.assertFalse(response.error_message) - @patch('boto3.client') + @patch("boto3.client") def test_check_connection_failure_invalid_bucket_or_no_access(self, mock_boto3_client): """ Test that the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on failed connection due to invalid bucket or lack of access permissions. @@ -76,12 +83,12 @@ def test_check_connection_failure_invalid_bucket_or_no_access(self, mock_boto3_c mock_boto3_client.return_value = mock_boto3_client_instance mock_boto3_client_instance.head_bucket.side_effect = ClientError( error_response={ - 'Error': { - 'Code': '404', - 'Message': 'Not Found', + "Error": { + "Code": "404", + "Message": "Not Found", } }, - operation_name='HeadBucket' + operation_name="HeadBucket", ) response = self.handler.check_connection() @@ -90,7 +97,7 @@ def test_check_connection_failure_invalid_bucket_or_no_access(self, mock_boto3_c assert isinstance(response, StatusResponse) self.assertTrue(response.error_message) - @patch('boto3.client') + @patch("boto3.client") def test_query_select(self, mock_boto3_client): """ Tests the `query` method to ensure it executes a SELECT SQL query using a mock cursor and returns a Response object. @@ -104,18 +111,11 @@ def test_query_select(self, mock_boto3_client): duckdb_connect = MagicMock() self.handler._connect_duckdb = duckdb_connect duckdb_execute = duckdb_connect().__enter__().execute - duckdb_execute().fetchdf.return_value = pd.DataFrame([], columns=['col_2']) + duckdb_execute().fetchdf.return_value = pd.DataFrame([], columns=["col_2"]) # Craft the SELECT query and execute it. - object_name = 'my-bucket/my-file.csv' - select = ast.Select( - targets=[ - Star() - ], - from_table=Identifier( - parts=[object_name] - ) - ) + object_name = "my-bucket/my-file.csv" + select = ast.Select(targets=[Star()], from_table=Identifier(parts=[object_name])) duckdb_execute.reset_mock() response = self.handler.query(select) @@ -124,10 +124,9 @@ def test_query_select(self, mock_boto3_client): f"SELECT * FROM 's3://{self.dummy_connection_data['bucket']}/{object_name.replace('`', '')}'" ) - assert isinstance(response, Response) - self.assertFalse(response.error_code) + assert isinstance(response, TableResponse) - @patch('boto3.client') + @patch("boto3.client") def test_query_insert(self, mock_boto3_client): """ Tests the `query` method to ensure it executes a INSERT SQL query using a mock cursor and returns a Response object. @@ -145,29 +144,25 @@ def test_query_insert(self, mock_boto3_client): duckdb_execute().fetchdf.return_value = None # Craft the INSERT query and execute it. - columns = ['col_1', 'col_2'] - values = [('val_1', 'val_2')] - insert = ast.Insert( - table=Identifier( - parts=[self.object_name] - ), - columns=columns, - values=values - ) + columns = ["col_1", "col_2"] + values = [("val_1", "val_2")] + insert = ast.Insert(table=Identifier(parts=[self.object_name]), columns=columns, values=values) duckdb_execute.reset_mock() response = self.handler.query(insert) sqls = [i[0][0] for i in duckdb_execute.call_args_list] - assert sqls[0] == f"CREATE TABLE tmp_table AS SELECT * FROM 's3://{self.dummy_connection_data['bucket']}/{self.object_name}'" + assert ( + sqls[0] + == f"CREATE TABLE tmp_table AS SELECT * FROM 's3://{self.dummy_connection_data['bucket']}/{self.object_name}'" + ) assert sqls[1] == "INSERT INTO tmp_table BY NAME SELECT * FROM df" assert sqls[2] == f"COPY tmp_table TO 's3://{self.dummy_connection_data['bucket']}/{self.object_name}'" - assert isinstance(response, Response) - self.assertFalse(response.error_code) + assert isinstance(response, OkResponse) - @patch('boto3.client') + @patch("boto3.client") def test_get_tables(self, mock_boto3_client): """ Test that the `get_tables` method correctly calls the `list_objects_v2` method and returns a Response object with the supported objects (files). @@ -176,12 +171,12 @@ def test_get_tables(self, mock_boto3_client): mock_boto3_client_instance = MagicMock() mock_boto3_client.return_value = mock_boto3_client_instance mock_boto3_client_instance.list_objects_v2.return_value = { - 'Contents': [ - {'Key': 'file1.csv'}, - {'Key': 'file2.tsv'}, - {'Key': 'file3.json'}, - {'Key': 'file4.parquet'}, - {'Key': 'file5.xlsx'}, + "Contents": [ + {"Key": "file1.csv"}, + {"Key": "file2.tsv"}, + {"Key": "file3.json"}, + {"Key": "file4.parquet"}, + {"Key": "file5.xlsx"}, ] } @@ -192,37 +187,32 @@ def test_get_tables(self, mock_boto3_client): df = response.data_frame self.assertEqual(len(df), 5) # +1 table is 'files' - self.assertNotIn('file5.xlsx', df['table_name'].values) + self.assertNotIn("file5.xlsx", df["table_name"].values) - @patch('mindsdb.integrations.handlers.s3_handler.s3_handler.S3Handler.query') + @patch("mindsdb.integrations.handlers.s3_handler.s3_handler.S3Handler.query") def test_get_columns(self, mock_query): """ Test that the `get_columns` method correctly constructs the SQL query and calls `native_query` with the correct query. """ - mock_query.return_value = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( + mock_query.return_value = TableResponse( + data=pd.DataFrame( data={ - 'col_1': ['row_1', 'row_2', 'row_3'], - 'col_2': [1, 2, 3], + "col_1": ["row_1", "row_2", "row_3"], + "col_2": [1, 2, 3], }, - ) + ), ) - table_name = 'mock_table' + table_name = "mock_table" response = self.handler.get_columns(table_name) - expected_query = Select( - targets=[Star()], - from_table=Identifier(parts=[table_name]), - limit=Constant(1) - ) + expected_query = Select(targets=[Star()], from_table=Identifier(parts=[table_name]), limit=Constant(1)) self.handler.query.assert_called_once_with(expected_query) df = response.data_frame - self.assertEqual(df.columns.tolist(), ['column_name', 'data_type']) - self.assertEqual(df['data_type'].values.tolist(), ['string', 'int64']) + self.assertEqual(df.columns.tolist(), ["column_name", "data_type"]) + self.assertEqual(df["data_type"].values.tolist(), ["string", "int64"]) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/unit/handlers/test_slack.py b/tests/unit/handlers/community_handlers/test_slack.py similarity index 99% rename from tests/unit/handlers/test_slack.py rename to tests/unit/handlers/community_handlers/test_slack.py index 59c64de18b0..62a9ada8bc8 100644 --- a/tests/unit/handlers/test_slack.py +++ b/tests/unit/handlers/community_handlers/test_slack.py @@ -12,7 +12,7 @@ import pandas as pd from base_handler_test import BaseAPIChatHandlerTest, BaseAPIResourceTestSetup -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse, HandlerResponse as Response +from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse, TableResponse from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator try: @@ -431,7 +431,7 @@ def test_native_query(self): response = self.handler.native_query(query) self.mock_connect.return_value.conversations_info.assert_called_once_with(channel="C1234567890") - assert isinstance(response, Response) + assert isinstance(response, TableResponse) expected_df = pd.DataFrame([MOCK_RESPONSE_CONV_INFO_1["channel"]]) pd.testing.assert_frame_equal(response.data_frame, expected_df) @@ -451,7 +451,7 @@ def test_native_query_with_pagination(self): self.mock_connect.return_value.conversations_list.assert_any_call() self.mock_connect.return_value.conversations_list.assert_any_call(cursor="dGVhbTpDMDYxRkE1UEI=") - assert isinstance(response, Response) + assert isinstance(response, TableResponse) expected_df = pd.DataFrame(MOCK_RESPONSE_CONV_LIST_1["channels"] + MOCK_RESPONSE_CONV_LIST_2["channels"]) pd.testing.assert_frame_equal(response.data_frame, expected_df) diff --git a/tests/unit/handlers/test_bigquery.py b/tests/unit/handlers/test_bigquery.py index 1bb69de1a11..448af57d609 100644 --- a/tests/unit/handlers/test_bigquery.py +++ b/tests/unit/handlers/test_bigquery.py @@ -6,9 +6,10 @@ from google.api_core.exceptions import BadRequest from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, HandlerStatusResponse as StatusResponse, RESPONSE_TYPE, + TableResponse, + ErrorResponse, ) try: @@ -76,7 +77,7 @@ def test_native_query(self): self.handler.connect = MagicMock(return_value=mock_conn) mock_query = MagicMock() - mock_query.to_dataframe.return_value = None + mock_query.to_dataframe.return_value = pd.DataFrame({"col": [1, 2, 3]}) mock_conn.query.return_value = mock_query query_str = "SELECT * FROM table" @@ -87,8 +88,35 @@ def test_native_query(self): mock_query_job_config_instance = mock_query_job_config.return_value data = self.handler.native_query(query_str) mock_conn.query.assert_called_once_with(query_str, job_config=mock_query_job_config_instance) - assert isinstance(data, Response) - self.assertFalse(data.error_code) + assert isinstance(data, TableResponse) + + def test_native_query_empty_select_returns_table(self): + mock_conn = MagicMock() + self.handler.connect = MagicMock(return_value=mock_conn) + + mock_query = MagicMock() + mock_query.to_dataframe.return_value = pd.DataFrame(columns=["id"]) + mock_conn.query.return_value = mock_query + + with patch("mindsdb.integrations.handlers.bigquery_handler.bigquery_handler.QueryJobConfig"): + response = self.handler.native_query("SELECT id FROM table WHERE 1 = 0") + + self.assertEqual(response.type, RESPONSE_TYPE.TABLE) + self.assertEqual(list(response.data_frame.columns), ["id"]) + self.assertTrue(response.data_frame.empty) + + def test_native_query_empty_dataframe_without_columns_returns_ok(self): + mock_conn = MagicMock() + self.handler.connect = MagicMock(return_value=mock_conn) + + mock_query = MagicMock() + mock_query.to_dataframe.return_value = pd.DataFrame() + mock_conn.query.return_value = mock_query + + with patch("mindsdb.integrations.handlers.bigquery_handler.bigquery_handler.QueryJobConfig"): + response = self.handler.native_query("UPDATE table SET col = 1") + + self.assertEqual(response.type, RESPONSE_TYPE.OK) def test_get_tables(self): """ @@ -124,7 +152,7 @@ def test_get_columns(self): self.handler.native_query.assert_called_once_with(expected_query) def test_meta_get_tables_filters(self): - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame())) + self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) self.handler.meta_get_tables(table_names=["orders"]) @@ -132,7 +160,7 @@ def test_meta_get_tables_filters(self): self.assertIn("AND t.table_name IN ('orders')", query) def test_meta_get_columns_filters(self): - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame())) + self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) self.handler.meta_get_columns(table_names=["orders"]) @@ -176,9 +204,9 @@ def test_meta_get_column_statistics_batches_results(self): self.handler.native_query = MagicMock( side_effect=[ - Response(RESPONSE_TYPE.TABLE, data_frame=column_types_result), - Response(RESPONSE_TYPE.TABLE, data_frame=first_batch_result), - Response(RESPONSE_TYPE.TABLE, data_frame=second_batch_result), + TableResponse(data=column_types_result), + TableResponse(data=first_batch_result), + TableResponse(data=second_batch_result), ] ) @@ -189,20 +217,21 @@ def test_meta_get_column_statistics_batches_results(self): self.assertEqual(self.handler.native_query.call_count, 3) # 1 for column types + 2 for batches def test_meta_get_column_statistics_returns_error_when_empty(self): - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.ERROR, error_message="boom")) + self.handler.native_query = MagicMock(return_value=ErrorResponse(error_message="boom")) response = self.handler.meta_get_column_statistics_for_table("table", ["col"]) self.assertEqual(response.resp_type, RESPONSE_TYPE.ERROR) def test_meta_get_primary_keys_filters(self): - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame())) + self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) self.handler.meta_get_primary_keys(table_names=["orders"]) query = self.handler.native_query.call_args[0][0] self.assertIn("AND tc.table_name IN ('orders')", query) + self.assertNotIn("tc.constraint_name,", query) def test_meta_get_foreign_keys_filters(self): - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame())) + self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) self.handler.meta_get_foreign_keys(table_names=["orders"]) query = self.handler.native_query.call_args[0][0] self.assertIn("AND tc.table_name IN ('orders')", query) diff --git a/tests/unit/handlers/test_file.py b/tests/unit/handlers/test_file.py index 9df2ee28415..7c54c8cbbc7 100644 --- a/tests/unit/handlers/test_file.py +++ b/tests/unit/handlers/test_file.py @@ -17,8 +17,8 @@ ) from mindsdb.integrations.handlers.file_handler.file_handler import FileHandler -from mindsdb.integrations.libs.response import RESPONSE_TYPE - +from mindsdb.integrations.libs.response import RESPONSE_TYPE, INF_SCHEMA_COLUMNS_NAMES_SET, INF_SCHEMA_COLUMNS_NAMES +from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE from mindsdb.integrations.utilities.files.file_reader import ( FileReader, FileProcessingError, @@ -406,8 +406,25 @@ def test_get_columns(): file_handler = FileHandler(file_controller=MockFileController()) response = file_handler.get_columns("mock") - assert response.type == RESPONSE_TYPE.TABLE - - expected_df = pandas.DataFrame([{"Field": x, "Type": "str"} for x in file_records[0][2]]) - - assert response.data_frame.equals(expected_df) + assert response.type == RESPONSE_TYPE.COLUMNS_TABLE + + data = [] + for name in file_records[0][2]: + row = {} + for key_name in INF_SCHEMA_COLUMNS_NAMES_SET: + if key_name == INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME: + row[key_name] = name + elif key_name == INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE: + row[key_name] = "str" + elif key_name == INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE: + row[key_name] = MYSQL_DATA_TYPE.TEXT + else: + row[key_name] = None + data.append(row) + + expected_df = pandas.DataFrame(data) + assert set(response.data_frame.columns) == set(expected_df.columns) + expected_df = expected_df[response.data_frame.columns] + + # Use 'compare' to ignore dtypes (object != string) + assert response.data_frame.compare(expected_df).empty diff --git a/tests/unit/handlers/test_hubspot.py b/tests/unit/handlers/test_hubspot.py index df7ee1cdd04..edbb3732ecc 100644 --- a/tests/unit/handlers/test_hubspot.py +++ b/tests/unit/handlers/test_hubspot.py @@ -2,6 +2,7 @@ import pytest import unittest from unittest.mock import patch, MagicMock +import pandas as pd try: from hubspot.crm.objects import SimplePublicObject @@ -9,6 +10,8 @@ HubspotHandler, ) from mindsdb.integrations.handlers.hubspot_handler.hubspot_tables import ( + CompaniesTable, + ContactsTable, DealsTable, canonical_op, to_hubspot_property, @@ -17,6 +20,7 @@ _normalize_filter_conditions, ) from mindsdb_sql_parser.ast import Select, Identifier, Function + from mindsdb_sql_parser import parse_sql from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator except ImportError: pytestmark = pytest.mark.skip("HubSpot handler not installed") @@ -24,8 +28,8 @@ from base_handler_test import BaseHandlerTestSetup from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, HandlerStatusResponse as StatusResponse, + DataHandlerResponse, RESPONSE_TYPE, ) @@ -154,31 +158,10 @@ def test_native_query(self): query = "SELECT * FROM companies LIMIT 1" response = self.handler.native_query(query) - assert isinstance(response, Response) + assert isinstance(response, DataHandlerResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) self.assertIsNotNone(response.data_frame) - def test_get_tables(self): - """Test get_tables method returns registered tables.""" - response = self.handler.get_tables() - - assert isinstance(response, Response) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - - self.assertEqual(len(df), len(self.EXPECTED_TABLES)) - self.assertIn("TABLE_NAME", df.columns) - self.assertIn("TABLE_TYPE", df.columns) - - table_names = df["TABLE_NAME"].tolist() - for table_name in self.EXPECTED_TABLES: - self.assertIn(table_name, table_names) - - # All should be BASE TABLE type - table_types = df["TABLE_TYPE"].unique().tolist() - self.assertEqual(table_types, ["BASE TABLE"]) - def test_get_columns_companies(self): """Test get_columns method for companies table.""" mock_hubspot_client = MagicMock() @@ -203,7 +186,7 @@ def test_get_columns_companies(self): response = self.handler.get_columns("companies") - assert isinstance(response, Response) + assert isinstance(response, DataHandlerResponse) self.assertEqual(response.type, RESPONSE_TYPE.COLUMNS_TABLE) df = response.data_frame @@ -251,7 +234,7 @@ def test_get_columns_contacts(self): response = self.handler.get_columns("contacts") - assert isinstance(response, Response) + assert isinstance(response, DataHandlerResponse) self.assertEqual(response.type, RESPONSE_TYPE.COLUMNS_TABLE) df = response.data_frame @@ -298,7 +281,7 @@ def test_get_columns_deals(self): response = self.handler.get_columns("deals") - assert isinstance(response, Response) + assert isinstance(response, DataHandlerResponse) self.assertEqual(response.type, RESPONSE_TYPE.COLUMNS_TABLE) df = response.data_frame @@ -338,7 +321,7 @@ def test_get_columns_invalid_table(self): """Test get_columns method with invalid table name.""" response = self.handler.get_columns("nonexistent_table") - assert isinstance(response, Response) + assert isinstance(response, DataHandlerResponse) self.assertEqual(response.type, RESPONSE_TYPE.ERROR) self.assertIsNotNone(response.error_message) @@ -354,7 +337,7 @@ def test_native_query_with_insert(self): insert_query = "INSERT INTO companies (name, city) VALUES ('New Company', 'Boston')" response = self.handler.native_query(insert_query) - assert isinstance(response, Response) + assert isinstance(response, DataHandlerResponse) self.assertNotEqual(response.type, RESPONSE_TYPE.ERROR) @@ -386,7 +369,7 @@ def test_native_query_with_update(self): update_query = "UPDATE companies SET city='Boston' WHERE name='Test Company'" response = self.handler.native_query(update_query) - assert isinstance(response, Response) + assert isinstance(response, DataHandlerResponse) self.assertNotEqual(response.type, RESPONSE_TYPE.ERROR) def test_native_query_with_delete(self): @@ -414,7 +397,7 @@ def test_native_query_with_delete(self): delete_query = "DELETE FROM companies WHERE name='Test Company'" response = self.handler.native_query(delete_query) - assert isinstance(response, Response) + assert isinstance(response, DataHandlerResponse) self.assertNotEqual(response.type, RESPONSE_TYPE.ERROR) def test_handler_name(self): @@ -468,41 +451,6 @@ def test_native_query_invalid_sql(self): self.assertEqual(response.type, RESPONSE_TYPE.ERROR) self.assertIn("Query execution failed", response.error_message) - def test_get_tables_success(self): - """Test get_tables method returns table metadata.""" - mock_hubspot_client = MagicMock() - mock_companies_data = [ - SimplePublicObject( - id="123", - properties={ - "name": "Test Company", - "createdate": "2023-01-01T00:00:00Z", - "hs_lastmodifieddate": "2023-01-01T00:00:00Z", - }, - ) - ] - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.get_all.return_value = mock_companies_data - mock_hubspot_client.crm.contacts.get_all.return_value = [] - mock_hubspot_client.crm.deals.get_all.return_value = [] - - response = self.handler.get_tables() - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertIsNotNone(response.data_frame) - - df = response.data_frame - - self.assertEqual(len(df), len(self.EXPECTED_TABLES)) - self.assertIn("TABLE_NAME", df.columns) - self.assertIn("TABLE_TYPE", df.columns) - self.assertIn("TABLE_SCHEMA", df.columns) - - table_names = df["TABLE_NAME"].tolist() - for table_name in self.EXPECTED_TABLES: - self.assertIn(table_name, table_names) - def test_get_tables_connection_failure(self): """Test get_tables method with connection failure.""" self.mock_connect.side_effect = Exception("Connection failed") @@ -615,15 +563,22 @@ def test_oauth_connection(self): handler = HubspotHandler("hubspot", connection_data=oauth_data) mock_hubspot_client = MagicMock() - - with patch("mindsdb.integrations.handlers.hubspot_handler.hubspot_handler.HubSpot") as mock_hubspot: + mock_access_token = "oauth_access_token_123" + + with ( + patch("mindsdb.integrations.handlers.hubspot_handler.hubspot_handler.HubSpot") as mock_hubspot, + patch( + "mindsdb.integrations.handlers.hubspot_handler.hubspot_handler.HubSpotOAuth2Manager" + ) as mock_oauth_manager_cls, + ): mock_hubspot.return_value = mock_hubspot_client + mock_oauth_manager_cls.return_value.get_access_token.return_value = mock_access_token connection = handler.connect() self.assertIsNotNone(connection) self.assertTrue(handler.is_connected) - mock_hubspot.assert_called_with(client_id="test_client_id", client_secret="test_client_secret") + mock_hubspot.assert_called_with(access_token=mock_access_token) def test_comprehensive_error_handling(self): """Test comprehensive error handling in various scenarios.""" @@ -744,40 +699,6 @@ def test_get_columns_with_standard_schema(self): self.assertEqual(id_row.iloc[0]["ORDINAL_POSITION"], 1) self.assertEqual(id_row.iloc[0]["IS_NULLABLE"], "NO") - def test_comprehensive_table_metadata(self): - """Test that get_tables returns comprehensive metadata.""" - mock_hubspot_client = MagicMock() - - mock_companies_search_result = MagicMock() - mock_companies_search_result.total = 1250 - - mock_contacts_search_result = MagicMock() - mock_contacts_search_result.total = 850 - - mock_deals_search_result = MagicMock() - mock_deals_search_result.total = 320 - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_companies_search_result - mock_hubspot_client.crm.contacts.search_api.do_search.return_value = mock_contacts_search_result - mock_hubspot_client.crm.deals.search_api.do_search.return_value = mock_deals_search_result - - response = self.handler.get_tables() - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - df = response.data_frame - - # Check only the 3 required metadata columns (following postgres handler pattern) - required_columns = ["TABLE_SCHEMA", "TABLE_NAME", "TABLE_TYPE"] - for col in required_columns: - self.assertIn(col, df.columns) - - # Verify all three tables are present - table_names = df["TABLE_NAME"].tolist() - self.assertEqual(len(table_names), len(self.EXPECTED_TABLES)) - for table_name in self.EXPECTED_TABLES: - self.assertIn(table_name, table_names) - def test_estimate_table_rows_with_search_api(self): """Test that _estimate_table_rows uses search API for accurate counts.""" mock_hubspot_client = MagicMock() @@ -930,7 +851,11 @@ def test_meta_get_columns_all_tables(self): self.assertIn("deals", tables_present) def test_select_companies_with_in_clause_uses_search_api(self): - """Test that SELECT with IN clause uses HubSpot Search API.""" + """ + MindsDB calls table.select(query_ast) directly β€” not native_query. + Verify that a WHERE city IN (...) query routes to the HubSpot Search API. + """ + mock_hubspot_client = MagicMock() mock_search_result = MagicMock() mock_search_result.results = [ @@ -943,33 +868,26 @@ def test_select_companies_with_in_clause_uses_search_api(self): "hs_lastmodifieddate": "2023-01-01T00:00:00Z", }, ), - SimplePublicObject( - id="2", - properties={ - "name": "Austin Company", - "city": "Austin", - "createdate": "2023-01-01T00:00:00Z", - "hs_lastmodifieddate": "2023-01-01T00:00:00Z", - }, - ), ] mock_search_result.paging = None - self.mock_connect.return_value = mock_hubspot_client + handler = MagicMock() + handler.connect.return_value = mock_hubspot_client mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result - query = "SELECT * FROM companies WHERE city IN ('New York', 'Austin')" - response = self.handler.native_query(query) + table = CompaniesTable(handler) + query = parse_sql("SELECT * FROM companies WHERE city IN ('New York', 'Austin')", dialect="mindsdb") + result = table.select(query) - # Verify search API was called (not get_all) mock_hubspot_client.crm.companies.search_api.do_search.assert_called() mock_hubspot_client.crm.companies.get_all.assert_not_called() - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertIsNotNone(response.data_frame) + self.assertIsNotNone(result) def test_select_contacts_with_in_clause(self): - """Test SELECT contacts with IN clause.""" + """ + MindsDB calls ContactsTable.select(query_ast) directly. + Verify city IN (...) routes to HubSpot Search API. + """ mock_hubspot_client = MagicMock() mock_search_result = MagicMock() mock_search_result.results = [ @@ -985,17 +903,22 @@ def test_select_contacts_with_in_clause(self): ] mock_search_result.paging = None - self.mock_connect.return_value = mock_hubspot_client + handler = MagicMock() + handler.connect.return_value = mock_hubspot_client mock_hubspot_client.crm.contacts.search_api.do_search.return_value = mock_search_result - query = "SELECT * FROM contacts WHERE city IN ('Boston', 'Chicago')" - response = self.handler.native_query(query) + table = ContactsTable(handler) + query = parse_sql("SELECT * FROM contacts WHERE city IN ('Boston', 'Chicago')", dialect="mindsdb") + result = table.select(query) mock_hubspot_client.crm.contacts.search_api.do_search.assert_called() - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) + self.assertIsNotNone(result) def test_select_deals_with_in_clause(self): - """Test SELECT deals with IN clause on dealstage.""" + """ + MindsDB calls DealsTable.select(query_ast) directly. + Verify dealstage IN (...) routes to HubSpot Search API. + """ mock_hubspot_client = MagicMock() mock_search_result = MagicMock() mock_search_result.results = [ @@ -1010,33 +933,43 @@ def test_select_deals_with_in_clause(self): ] mock_search_result.paging = None - self.mock_connect.return_value = mock_hubspot_client + handler = MagicMock() + handler.connect.return_value = mock_hubspot_client + handler._hubspot_deal_stage_map_cache = ({}, {}) + handler._hubspot_deal_stage_rows_cache = [] + handler._hubspot_owner_rows_cache = [] + handler._hubspot_owner_map_cache = {} mock_hubspot_client.crm.deals.search_api.do_search.return_value = mock_search_result + mock_hubspot_client.crm.pipelines.pipelines_api.get_all.return_value = MagicMock(results=[]) - query = "SELECT * FROM deals WHERE dealstage IN ('closedwon', 'closedlost')" - response = self.handler.native_query(query) + table = DealsTable(handler) + query = parse_sql("SELECT * FROM deals WHERE dealstage IN ('closedwon', 'closedlost')", dialect="mindsdb") + result = table.select(query) mock_hubspot_client.crm.deals.search_api.do_search.assert_called() - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) + self.assertIsNotNone(result) def test_select_with_in_clause_verifies_filter_structure(self): - """Test that IN clause generates correct HubSpot filter structure.""" + """ + Verify that city IN (...) generates the correct HubSpot Search API filter payload. + Tests via CompaniesTable.select() β€” the actual call path MindsDB uses. + """ mock_hubspot_client = MagicMock() mock_search_result = MagicMock() mock_search_result.results = [] mock_search_result.paging = None - self.mock_connect.return_value = mock_hubspot_client + handler = MagicMock() + handler.connect.return_value = mock_hubspot_client mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result - query = "SELECT * FROM companies WHERE city IN ('NYC', 'LA', 'Chicago')" - self.handler.native_query(query) + table = CompaniesTable(handler) + query = parse_sql("SELECT * FROM companies WHERE city IN ('NYC', 'LA', 'Chicago')", dialect="mindsdb") + table.select(query) - # Capture the call arguments call_args = mock_hubspot_client.crm.companies.search_api.do_search.call_args search_request = call_args.kwargs.get("public_object_search_request", {}) - # Verify filter structure self.assertIn("filterGroups", search_request) filter_groups = search_request["filterGroups"] self.assertEqual(len(filter_groups), 1) @@ -1051,17 +984,19 @@ def test_select_with_in_clause_verifies_filter_structure(self): self.assertEqual(set(in_filter["values"]), {"NYC", "LA", "Chicago"}) def test_select_with_not_in_clause(self): - """Test SELECT with NOT IN clause.""" + """Verify industry NOT IN (...) generates NOT_IN operator in HubSpot filter.""" mock_hubspot_client = MagicMock() mock_search_result = MagicMock() mock_search_result.results = [] mock_search_result.paging = None - self.mock_connect.return_value = mock_hubspot_client + handler = MagicMock() + handler.connect.return_value = mock_hubspot_client mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result - query = "SELECT * FROM companies WHERE industry NOT IN ('Retail', 'Healthcare')" - self.handler.native_query(query) + table = CompaniesTable(handler) + query = parse_sql("SELECT * FROM companies WHERE industry NOT IN ('Retail', 'Healthcare')", dialect="mindsdb") + table.select(query) call_args = mock_hubspot_client.crm.companies.search_api.do_search.call_args search_request = call_args.kwargs.get("public_object_search_request", {}) @@ -1070,17 +1005,22 @@ def test_select_with_not_in_clause(self): self.assertEqual(filters[0]["operator"], "NOT_IN") def test_select_with_in_and_equality_combined(self): - """Test SELECT combining IN clause with equality filter.""" + """Verify city IN (...) AND industry = '...' both push down to the Search API.""" mock_hubspot_client = MagicMock() mock_search_result = MagicMock() mock_search_result.results = [] mock_search_result.paging = None - self.mock_connect.return_value = mock_hubspot_client + handler = MagicMock() + handler.connect.return_value = mock_hubspot_client mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result - query = "SELECT * FROM companies WHERE city IN ('NYC', 'LA') AND industry = 'Technology'" - self.handler.native_query(query) + table = CompaniesTable(handler) + query = parse_sql( + "SELECT * FROM companies WHERE city IN ('NYC', 'LA') AND industry = 'Technology'", + dialect="mindsdb", + ) + table.select(query) mock_hubspot_client.crm.companies.search_api.do_search.assert_called() @@ -1151,17 +1091,30 @@ def test_meta_get_column_statistics_multiple_tables(self): self.assertIn("contacts", table_names) def test_search_pushdown_builds_sorts_and_properties(self): - """Test search API payload includes sorts and properties when pushdown is used.""" + """ + Verify that ORDER BY and SELECT columns are pushed down to the HubSpot Search API. + Uses DealsTable.select() β€” the actual call path MindsDB uses. + """ mock_hubspot_client = MagicMock() mock_search_result = MagicMock() mock_search_result.results = [] mock_search_result.paging = None - self.mock_connect.return_value = mock_hubspot_client + handler = MagicMock() + handler.connect.return_value = mock_hubspot_client + handler._hubspot_deal_stage_map_cache = ({}, {}) + handler._hubspot_deal_stage_rows_cache = [] + handler._hubspot_owner_rows_cache = [] + handler._hubspot_owner_map_cache = {} mock_hubspot_client.crm.deals.search_api.do_search.return_value = mock_search_result + mock_hubspot_client.crm.pipelines.pipelines_api.get_all.return_value = MagicMock(results=[]) - query = "SELECT dealname FROM deals WHERE pipeline='default' ORDER BY closedate DESC LIMIT 5" - self.handler.native_query(query) + table = DealsTable(handler) + query = parse_sql( + "SELECT dealname FROM deals WHERE pipeline='default' ORDER BY closedate DESC LIMIT 5", + dialect="mindsdb", + ) + table.select(query) call_args = mock_hubspot_client.crm.deals.search_api.do_search.call_args search_request = call_args.kwargs.get("public_object_search_request", {}) @@ -1171,6 +1124,130 @@ def test_search_pushdown_builds_sorts_and_properties(self): self.assertEqual(search_request["sorts"][0]["direction"], "DESCENDING") self.assertEqual(search_request["properties"], ["dealname"]) + def test_three_table_join_on_clause_orientations(self): + """ + Verify CORE JOIN ASSOC JOIN CORE resolves left_assoc_col / right_assoc_col + correctly regardless of which side of the ON each table appears on. + """ + + company_df = pd.DataFrame({"id": ["1"], "name": ["Acme"]}) + assoc_df = pd.DataFrame({"company_id": ["1"], "contact_id": ["42"]}) + contact_df = pd.DataFrame({"id": ["42"], "firstname": ["Alice"]}) + + orientations = [ + ("c.id = cc.company_id", "cc.contact_id = ct.id"), # A + ("cc.company_id = c.id", "cc.contact_id = ct.id"), # B + ("c.id = cc.company_id", "ct.id = cc.contact_id"), # C + ("cc.company_id = c.id", "ct.id = cc.contact_id"), # D + ] + + handler: HubspotHandler = self.create_handler() + + for left_on, right_on in orientations: + with self.subTest(left_on=left_on, right_on=right_on): + companies_mock = MagicMock() + companies_mock.select.return_value = company_df.copy() + assoc_mock = MagicMock() + assoc_mock.list.return_value = assoc_df.copy() + contacts_mock = MagicMock() + contacts_mock.list.return_value = contact_df.copy() + + handler._tables["companies"] = companies_mock + handler._tables["company_contacts"] = assoc_mock + handler._tables["contacts"] = contacts_mock + + query = f""" + SELECT * + FROM companies c + JOIN company_contacts cc ON {left_on} + JOIN contacts ct ON {right_on} + """ + response = handler.native_query(query) + + self.assertEqual( + response.type, + RESPONSE_TYPE.TABLE, + msg=f"orientation ({left_on!r}, {right_on!r}) returned ERROR: " + f"{getattr(response, 'error_message', '')}", + ) + self.assertFalse(response.data_frame.empty) + + # The assoc table must be queried by company_id (left_assoc_col), not + # by some other column β€” this is the column the bug inverted. + assoc_conditions = assoc_mock.list.call_args.kwargs.get("conditions", []) + assoc_filter_cols = [fc.column for fc in assoc_conditions] + self.assertIn( + "company_id", + assoc_filter_cols, + msg=f"assoc.list not filtered on company_id for orientation " + f"({left_on!r}, {right_on!r}); got {assoc_filter_cols}", + ) + + def test_multijoin_query_handling(self): + """Test that multijoin queries return appropriate error since not supported.""" + query = """ + SELECT c.name, o.dealname + FROM companies c + JOIN deals o ON c.id = o.company_id + """ + response = self.handler.native_query(query) + + self.assertEqual(response.type, RESPONSE_TYPE.ERROR) + self.assertIn("not supported", response.error_message) + + +class TestHubspotPassthrough(unittest.TestCase): + """Exercise the PassthroughMixin retrofit (PAT path).""" + + def _mock_response(self, status_code=200): + resp = MagicMock() + resp.status_code = status_code + resp.headers = {"Content-Type": "application/json"} + resp.iter_content = MagicMock(return_value=iter([b'{"results":[]}'])) + resp.close = MagicMock() + return resp + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_passthrough_uses_bearer_and_hubspot_base_url(self, mock_request): + mock_request.return_value = self._mock_response() + handler = HubspotHandler( + "hubspot", + connection_data={"access_token": "pat-abc123xyz"}, + ) + from mindsdb.integrations.libs.passthrough_types import PassthroughRequest + + resp = handler.api_passthrough(PassthroughRequest("GET", "/crm/v3/owners")) + + self.assertEqual(resp.status_code, 200) + args, kwargs = mock_request.call_args + self.assertEqual(args[0], "GET") + self.assertEqual(args[1], "https://api.hubapi.com/crm/v3/owners") + self.assertEqual(kwargs["headers"]["Authorization"], "Bearer pat-abc123xyz") + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_test_passthrough_returns_ok_on_200(self, mock_request): + mock_request.return_value = self._mock_response(status_code=200) + handler = HubspotHandler("hubspot", connection_data={"access_token": "pat"}) + + result = handler.test_passthrough() + + self.assertTrue(result["ok"]) + self.assertEqual(result["status_code"], 200) + self.assertEqual(result["host"], "api.hubapi.com") + self.assertIsInstance(result["latency_ms"], int) + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_test_passthrough_returns_auth_failed_on_401(self, mock_request): + mock_request.return_value = self._mock_response(status_code=401) + handler = HubspotHandler("hubspot", connection_data={"access_token": "pat"}) + + result = handler.test_passthrough() + + self.assertFalse(result["ok"]) + self.assertEqual(result["error_code"], "auth_failed") + self.assertEqual(result["status_code"], 401) + self.assertEqual(result["host"], "api.hubapi.com") + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/handlers/test_mariadb.py b/tests/unit/handlers/test_mariadb.py index be2cc4f6120..9d75a8dce72 100644 --- a/tests/unit/handlers/test_mariadb.py +++ b/tests/unit/handlers/test_mariadb.py @@ -6,19 +6,18 @@ from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager from mindsdb.integrations.handlers.mariadb_handler.mariadb_handler import MariaDBHandler -from mindsdb.integrations.libs.response import HandlerResponse as Response +from mindsdb.integrations.libs.response import TableResponse class TestMariaDBHandler(BaseDatabaseHandlerTest, unittest.TestCase): - @property def dummy_connection_data(self): return OrderedDict( - host='127.0.0.1', + host="127.0.0.1", port=3307, - user='example_user', - password='example_pass', - database='example_db', + user="example_user", + password="example_pass", + database="example_db", ) @property @@ -60,22 +59,21 @@ def get_columns_query(self): from information_schema.columns where - table_name = '{self.mock_table}'; + table_name = '{self.mock_table}' + and table_schema = DATABASE(); """ def create_handler(self): - return MariaDBHandler('mariadb', connection_data=self.dummy_connection_data) + return MariaDBHandler("mariadb", connection_data=self.dummy_connection_data) def create_patcher(self): - return patch('mysql.connector.connect') + return patch("mysql.connector.connect") def test_native_query(self): - """Test that native_query returns a Response object with no error - """ + """Test that native_query returns a TableResponse object with no error""" mock_conn = MagicMock() mock_cursor = MockCursorContextManager( - data=[{'id': 1}], - description=[('id', 3, None, None, None, None, 1, 0, 45)] + data=[{"id": 1}], description=[("id", 3, None, None, None, None, 1, 0, 45)] ) self.handler.connect = MagicMock(return_value=mock_conn) @@ -84,9 +82,8 @@ def test_native_query(self): query_str = f"SELECT * FROM {self.mock_table}" data = self.handler.native_query(query_str) - self.assertIsInstance(data, Response) - self.assertFalse(data.error_code) + self.assertIsInstance(data, TableResponse) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/unit/handlers/test_mssql.py b/tests/unit/handlers/test_mssql.py index dbb097754f9..d7024d51359 100644 --- a/tests/unit/handlers/test_mssql.py +++ b/tests/unit/handlers/test_mssql.py @@ -17,7 +17,13 @@ from pandas import DataFrame from base_handler_test import BaseDatabaseHandlerTest -from mindsdb.integrations.libs.response import HandlerResponse as Response, INF_SCHEMA_COLUMNS_NAMES_SET, RESPONSE_TYPE +from mindsdb.integrations.libs.response import ( + OkResponse, + TableResponse, + ErrorResponse, + INF_SCHEMA_COLUMNS_NAMES_SET, + RESPONSE_TYPE, +) from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE @@ -91,8 +97,7 @@ def test_native_query_with_results(self): mock_conn.cursor.assert_called_once_with(as_dict=True) mock_cursor.execute.assert_called_once_with(query_str) - assert isinstance(data, Response) - self.assertFalse(data.error_code) + assert isinstance(data, TableResponse) self.assertEqual(data.type, RESPONSE_TYPE.TABLE) self.assertIsInstance(data.data_frame, DataFrame) expected_columns = ["id", "name"] @@ -121,8 +126,7 @@ def test_native_query_no_results(self): mock_conn.cursor.assert_called_once_with(as_dict=True) mock_cursor.execute.assert_called_once_with(query_str) - assert isinstance(data, Response) - self.assertFalse(data.error_code) + assert isinstance(data, OkResponse) self.assertEqual(data.type, RESPONSE_TYPE.OK) mock_conn.commit.assert_called_once() @@ -149,7 +153,7 @@ def test_native_query_error(self): mock_conn.cursor.assert_called_once_with(as_dict=True) mock_cursor.execute.assert_called_once_with(query_str) - assert isinstance(data, Response) + assert isinstance(data, ErrorResponse) self.assertEqual(data.type, RESPONSE_TYPE.ERROR) self.assertEqual(data.error_message, str(error)) @@ -166,7 +170,7 @@ def test_query_method(self): try: self.handler.renderer = renderer_mock self.handler.native_query = MagicMock() - self.handler.native_query.return_value = Response(RESPONSE_TYPE.OK) + self.handler.native_query.return_value = OkResponse() mock_ast = MagicMock() result = self.handler.query(mock_ast) @@ -180,7 +184,7 @@ def test_get_tables(self): """ Tests that get_tables calls native_query with the correct SQL """ - expected_response = Response(RESPONSE_TYPE.OK) + expected_response = OkResponse() self.handler.native_query = MagicMock(return_value=expected_response) response = self.handler.get_tables() @@ -199,9 +203,7 @@ def test_get_columns(self): """ Tests that get_columns calls native_query with the correct SQL """ - expected_response = Response( - RESPONSE_TYPE.TABLE, data_frame=DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET)) - ) + expected_response = TableResponse(data=DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET))) self.handler.native_query = MagicMock(return_value=expected_response) table_name = "test_table" @@ -259,7 +261,7 @@ def test_meta_get_tables_returns_response(self): }, ] ) - expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df) + expected_response = TableResponse(data=df) self.handler.native_query = MagicMock(return_value=expected_response) # without filter @@ -271,7 +273,7 @@ def test_meta_get_tables_returns_response(self): self.handler.native_query.reset_mock() tables = ["customers", "orders"] filtered_df = df[df["table_name"].isin(tables)].reset_index(drop=True) - filtered_response = Response(RESPONSE_TYPE.TABLE, data_frame=filtered_df) + filtered_response = TableResponse(data=filtered_df) self.handler.native_query = MagicMock(return_value=filtered_response) response = self.handler.meta_get_tables(table_names=tables) self.handler.native_query.assert_called_once() @@ -307,7 +309,7 @@ def test_meta_get_columns_returns_response(self): }, ] ) - expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df) + expected_response = TableResponse(data=df) self.handler.native_query = MagicMock(return_value=expected_response) # without filter @@ -319,7 +321,7 @@ def test_meta_get_columns_returns_response(self): self.handler.native_query.reset_mock() tables = ["customers"] filtered_df = df[df["table_name"].isin(tables)].reset_index(drop=True) - filtered_response = Response(RESPONSE_TYPE.TABLE, data_frame=filtered_df) + filtered_response = TableResponse(data=filtered_df) self.handler.native_query = MagicMock(return_value=filtered_response) response = self.handler.meta_get_columns(table_names=tables) self.handler.native_query.assert_called_once() @@ -351,7 +353,7 @@ def test_meta_get_column_statistics_returns_response(self): }, ] ) - expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df) + expected_response = TableResponse(data=df) self.handler.native_query = MagicMock(return_value=expected_response) # without filter @@ -363,7 +365,7 @@ def test_meta_get_column_statistics_returns_response(self): self.handler.native_query.reset_mock() tables = ["customers"] filtered_df = df[df["TABLE_NAME"].isin(tables)].reset_index(drop=True) - filtered_response = Response(RESPONSE_TYPE.TABLE, data_frame=filtered_df) + filtered_response = TableResponse(data=filtered_df) self.handler.native_query = MagicMock(return_value=filtered_response) response = self.handler.meta_get_column_statistics(table_names=tables) self.handler.native_query.assert_called_once() @@ -382,7 +384,7 @@ def test_meta_get_primary_keys_returns_response(self): {"table_name": "orders", "column_name": "id", "ordinal_position": 1, "constraint_name": "pk_orders"}, ] ) - expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df) + expected_response = TableResponse(data=df) self.handler.native_query = MagicMock(return_value=expected_response) # without filter @@ -394,7 +396,7 @@ def test_meta_get_primary_keys_returns_response(self): self.handler.native_query.reset_mock() tables = ["customers"] filtered_df = df[df["table_name"].isin(tables)].reset_index(drop=True) - filtered_response = Response(RESPONSE_TYPE.TABLE, data_frame=filtered_df) + filtered_response = TableResponse(data=filtered_df) self.handler.native_query = MagicMock(return_value=filtered_response) response = self.handler.meta_get_primary_keys(table_names=tables) self.handler.native_query.assert_called_once() @@ -420,7 +422,7 @@ def test_meta_get_foreign_keys_returns_response(self): }, ] ) - expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df) + expected_response = TableResponse(data=df) self.handler.native_query = MagicMock(return_value=expected_response) # without filter @@ -432,7 +434,7 @@ def test_meta_get_foreign_keys_returns_response(self): self.handler.native_query.reset_mock() tables = ["orders"] filtered_df = df[df["child_table_name"].isin(tables)].reset_index(drop=True) - filtered_response = Response(RESPONSE_TYPE.TABLE, data_frame=filtered_df) + filtered_response = TableResponse(data=filtered_df) self.handler.native_query = MagicMock(return_value=filtered_response) response = self.handler.meta_get_foreign_keys(table_names=tables) self.handler.native_query.assert_called_once() @@ -521,7 +523,7 @@ def test_meta_methods_result_shape_and_exceptions(self): for name, df_factory, method in methods: with self.subTest(method=name, case="no_filter"): df = df_factory() - expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df) + expected_response = TableResponse(data=df) self.handler.native_query = MagicMock(return_value=expected_response) res = method() self.handler.native_query.assert_called_once() @@ -533,7 +535,7 @@ def test_meta_methods_result_shape_and_exceptions(self): with self.subTest(method=name, case="with_filter"): df = df_factory() - expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df) + expected_response = TableResponse(data=df) self.handler.native_query = MagicMock(return_value=expected_response) res = ( method(table_names=["A", "B"]) @@ -639,6 +641,13 @@ def test_check_connection(self): self.assertFalse(response.success) self.assertEqual(response.error_message, "Connection error") + self.handler.connect.side_effect = ValueError("Invalid connection args") + + response = self.handler.check_connection() + + self.assertFalse(response.success) + self.assertEqual(response.error_message, "Invalid connection args") + def test_types_casting(self): """Test that types are casted correctly""" query_str = "SELECT * FROM test_table" @@ -726,7 +735,7 @@ def test_types_casting(self): ("n_real", 3, None, None, None, None, None), ] - response: Response = self.handler.native_query(query_str) + response: TableResponse = self.handler.native_query(query_str) excepted_mysql_types = [ MYSQL_DATA_TYPE.TINYINT, MYSQL_DATA_TYPE.INT, @@ -741,7 +750,7 @@ def test_types_casting(self): MYSQL_DATA_TYPE.FLOAT, MYSQL_DATA_TYPE.FLOAT, ] - self.assertEqual(response.mysql_types, excepted_mysql_types) + self.assertEqual([col.type for col in response.columns], excepted_mysql_types) for columns_name, input_value in input_row.items(): result_value = response.data_frame[columns_name][0] self.assertEqual(result_value, input_value) @@ -818,7 +827,7 @@ def test_types_casting(self): ("t_uniqueidentifier", 2, None, None, None, None, None), ] - response: Response = self.handler.native_query(query_str) + response: TableResponse = self.handler.native_query(query_str) excepted_mysql_types = [ MYSQL_DATA_TYPE.TEXT, MYSQL_DATA_TYPE.TEXT, @@ -832,7 +841,7 @@ def test_types_casting(self): MYSQL_DATA_TYPE.TEXT, MYSQL_DATA_TYPE.BINARY, ] - self.assertEqual(response.mysql_types, excepted_mysql_types) + self.assertEqual([col.type for col in response.columns], excepted_mysql_types) for columns_name, input_value in input_row.items(): result_value = response.data_frame[columns_name][0] self.assertEqual(result_value, input_value) @@ -901,7 +910,7 @@ def test_types_casting(self): ("d_datetimeoffset_p", 2, None, None, None, None, None), ] - response: Response = self.handler.native_query(query_str) + response: TableResponse = self.handler.native_query(query_str) excepted_mysql_types = [ # DATE and TIME is not possible to infer, so they are BINARY MYSQL_DATA_TYPE.BINARY, @@ -914,7 +923,7 @@ def test_types_casting(self): MYSQL_DATA_TYPE.DATETIME, MYSQL_DATA_TYPE.DATETIME, ] - self.assertEqual(response.mysql_types, excepted_mysql_types) + self.assertEqual([col.type for col in response.columns], excepted_mysql_types) for columns_name, input_value in input_row.items(): result_value = response.data_frame[columns_name][0] if columns_name == "d_datetimeoffset_p": @@ -1099,7 +1108,7 @@ def __getitem__(self, idx): mock_conn.cursor.assert_called_once_with() mock_cursor.execute.assert_called_once_with(query_str) - self.assertIsInstance(response, Response) + self.assertIsInstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) self.assertIsInstance(response.data_frame, DataFrame) self.assertEqual(list(response.data_frame.columns), ["id", "name"]) @@ -1168,10 +1177,10 @@ def __getitem__(self, idx): response = handler.native_query("SELECT * FROM test") - self.assertIsInstance(response, Response) + self.assertIsInstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertIsNotNone(response.mysql_types) - self.assertTrue(len(response.mysql_types) > 0) + self.assertIsNotNone(response.columns) + self.assertTrue(len(response.columns) > 0) finally: if "pyodbc" in sys.modules: del sys.modules["pyodbc"] diff --git a/tests/unit/handlers/test_mysql.py b/tests/unit/handlers/test_mysql.py index bbb3ab93e56..a506e0ba844 100644 --- a/tests/unit/handlers/test_mysql.py +++ b/tests/unit/handlers/test_mysql.py @@ -12,7 +12,13 @@ from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager from mindsdb.integrations.handlers.mysql_handler.mysql_handler import MySQLHandler -from mindsdb.integrations.libs.response import HandlerResponse as Response, INF_SCHEMA_COLUMNS_NAMES_SET, RESPONSE_TYPE +from mindsdb.integrations.libs.response import ( + OkResponse, + TableResponse, + DataHandlerResponse as Response, + INF_SCHEMA_COLUMNS_NAMES_SET, + RESPONSE_TYPE, +) from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE @@ -67,7 +73,8 @@ def get_columns_query(self): from information_schema.columns where - table_name = '{self.mock_table}'; + table_name = '{self.mock_table}' + and table_schema = DATABASE(); """ def create_handler(self): @@ -89,13 +96,12 @@ def test_native_query(self): query_str = f"SELECT * FROM {self.mock_table}" data = self.handler.native_query(query_str) - self.assertIsInstance(data, Response) - self.assertFalse(data.error_code) + self.assertIsInstance(data, TableResponse) def test_native_query_with_results(self): """ Tests the `native_query` method to ensure it executes a SQL query and handles the case - where the query returns a result set + where the query returns a result set, streaming data via fetchmany """ mock_conn = MagicMock() mock_cursor = MagicMock() @@ -106,7 +112,11 @@ def test_native_query_with_results(self): mock_conn.cursor = MagicMock(return_value=mock_cursor) mock_conn.is_connected = MagicMock(return_value=True) - mock_cursor.fetchall.return_value = [{"id": 1, "name": "test1"}, {"id": 2, "name": "test2"}] + # fetchmany returns tuples (non-dictionary cursor), then empty list to signal end + mock_cursor.fetchmany.side_effect = [ + [(1, "test1"), (2, "test2")], + [], + ] # MySQL cursor provides column info via description attribute mock_cursor.description = [ @@ -119,12 +129,10 @@ def test_native_query_with_results(self): query_str = "SELECT * FROM test_table" data = self.handler.native_query(query_str) - mock_conn.cursor.assert_called_once_with(dictionary=True, buffered=True) + mock_conn.cursor.assert_called_once_with(buffered=False) mock_cursor.execute.assert_called_once_with(query_str) - assert isinstance(data, Response) - self.assertFalse(data.error_code) - self.assertEqual(data.type, RESPONSE_TYPE.TABLE) + assert isinstance(data, TableResponse) self.assertIsInstance(data.data_frame, DataFrame) expected_columns = ["id", "name"] @@ -150,12 +158,10 @@ def test_native_query_no_results(self): query_str = "INSERT INTO test_table VALUES (1, 'test')" data = self.handler.native_query(query_str) - mock_conn.cursor.assert_called_once_with(dictionary=True, buffered=True) + mock_conn.cursor.assert_called_once_with(buffered=False) mock_cursor.execute.assert_called_once_with(query_str) - assert isinstance(data, Response) - self.assertFalse(data.error_code) - self.assertEqual(data.type, RESPONSE_TYPE.OK) + assert isinstance(data, OkResponse) self.assertEqual(data.affected_rows, 1) def test_native_query_error(self): @@ -178,7 +184,7 @@ def test_native_query_error(self): query_str = "INVALID SQL" data = self.handler.native_query(query_str) - mock_conn.cursor.assert_called_once_with(dictionary=True, buffered=True) + mock_conn.cursor.assert_called_once_with(buffered=False) mock_cursor.execute.assert_called_once_with(query_str) assert isinstance(data, Response) @@ -377,7 +383,7 @@ def test_query_method(self): mock_renderer_class.return_value = mock_renderer self.handler.native_query = MagicMock() - self.handler.native_query.return_value = Response(RESPONSE_TYPE.OK) + self.handler.native_query.return_value = OkResponse() mock_ast = MagicMock() @@ -406,7 +412,7 @@ def test_get_tables(self): """ Tests that get_tables calls native_query with the correct SQL """ - expected_response = Response(RESPONSE_TYPE.OK) + expected_response = OkResponse() self.handler.native_query = MagicMock(return_value=expected_response) response = self.handler.get_tables() @@ -425,9 +431,7 @@ def test_get_columns(self): """ Tests that get_columns calls native_query with the correct SQL """ - expected_response = Response( - RESPONSE_TYPE.TABLE, data_frame=DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET)) - ) + expected_response = TableResponse(data=DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET))) self.handler.native_query = MagicMock(return_value=expected_response) table_name = "test_table" @@ -454,7 +458,8 @@ def test_get_columns(self): from information_schema.columns where - table_name = '{table_name}'; + table_name = '{table_name}' + and table_schema = DATABASE(); """ self.assertEqual(call_args, expected_sql) self.assertEqual(response, expected_response) @@ -473,19 +478,19 @@ def test_types_casting(self): mock_conn.is_connected = MagicMock(return_value=True) # region test TEXT/BLOB types and sub-types - input_row = { - "t_varchar": "v_varchar", - "t_tinytext": "v_tinytext", - "t_text": "v_text", - "t_mediumtext": "v_mediumtext", - "t_longtext": "v_longtext", - "t_tinyblon": "v_tinyblon", - "t_blob": "v_blob", - "t_mediumblob": "v_mediumblob", - "t_longblob": "v_longblob", - "t_json": '{"key": "value"}', - } - mock_cursor.fetchall.return_value = [input_row] + input_row = OrderedDict( + t_varchar="v_varchar", + t_tinytext="v_tinytext", + t_text="v_text", + t_mediumtext="v_mediumtext", + t_longtext="v_longtext", + t_tinyblon="v_tinyblon", + t_blob="v_blob", + t_mediumblob="v_mediumblob", + t_longblob="v_longblob", + t_json='{"key": "value"}', + ) + mock_cursor.fetchall.return_value = [list(input_row.values())] mock_cursor.description = [ ("t_varchar", 253, None, None, None, None, 1, 0, 45), @@ -500,7 +505,7 @@ def test_types_casting(self): ("t_json", 245, None, None, None, None, 1, 144, 63), ] - response: Response = self.handler.native_query(query_str) + response: Response = self.handler.native_query(query_str, stream=False) excepted_mysql_types = [ MYSQL_DATA_TYPE.VARBINARY, MYSQL_DATA_TYPE.TEXT, @@ -513,7 +518,8 @@ def test_types_casting(self): MYSQL_DATA_TYPE.BLOB, MYSQL_DATA_TYPE.JSON, ] - self.assertEqual(response.mysql_types, excepted_mysql_types) + for column, mysql_type in zip(response.columns, excepted_mysql_types): + self.assertEqual(column.type, mysql_type) for key, input_value in input_row.items(): result_value = response.data_frame[key][0] self.assertEqual(type(result_value), type(input_value)) @@ -521,17 +527,18 @@ def test_types_casting(self): # endregion # region test TINYINT/BOOL/BOOLEAN types - input_row = {"t_tinyint": 1, "t_bool": 1, "t_boolean": 1} - mock_cursor.fetchall.return_value = [input_row] + input_row = OrderedDict(t_tinyint=1, t_bool=1, t_boolean=1) + mock_cursor.fetchall.return_value = [list(input_row.values())] mock_cursor.description = [ ("t_tinyint", 1, None, None, None, None, 1, 0, 63), ("t_bool", 1, None, None, None, None, 1, 0, 63), ("t_boolean", 1, None, None, None, None, 1, 0, 63), ] - response: Response = self.handler.native_query(query_str) + response: Response = self.handler.native_query(query_str, stream=False) excepted_mysql_types = [MYSQL_DATA_TYPE.TINYINT, MYSQL_DATA_TYPE.TINYINT, MYSQL_DATA_TYPE.TINYINT] - self.assertEqual(response.mysql_types, excepted_mysql_types) + for column, mysql_type in zip(response.columns, excepted_mysql_types): + self.assertEqual(column.type, mysql_type) for key, input_value in input_row.items(): result_value = response.data_frame[key][0] # without None values in result columns types will be one of pandas types @@ -540,19 +547,19 @@ def test_types_casting(self): # endregion # region test numeric types - input_row = { - "t_tinyint": 1, - "t_bool": 0, - "t_smallint": 2, - "t_year": 2025, - "t_mediumint": 3, - "t_int": 4, - "t_bigint": 5, - "t_float": 1.1, - "t_double": 2.2, - "t_decimal": Decimal("3.3"), - } - mock_cursor.fetchall.return_value = [input_row] + input_row = OrderedDict( + t_tinyint=1, + t_bool=0, + t_smallint=2, + t_year=2025, + t_mediumint=3, + t_int=4, + t_bigint=5, + t_float=1.1, + t_double=2.2, + t_decimal=Decimal("3.3"), + ) + mock_cursor.fetchall.return_value = [list(input_row.values())] mock_cursor.description = [ ("t_tinyint", 1, None, None, None, None, 1, 0, 63), ("t_bool", 1, None, None, None, None, 1, 0, 63), @@ -565,7 +572,7 @@ def test_types_casting(self): ("t_double", 5, None, None, None, None, 1, 0, 63), ("t_decimal", 246, None, None, None, None, 1, 0, 63), ] - response: Response = self.handler.native_query(query_str) + response: Response = self.handler.native_query(query_str, stream=False) excepted_mysql_types = [ MYSQL_DATA_TYPE.TINYINT, MYSQL_DATA_TYPE.TINYINT, @@ -579,21 +586,22 @@ def test_types_casting(self): MYSQL_DATA_TYPE.DECIMAL, ] - self.assertEqual(response.mysql_types, excepted_mysql_types) + for column, mysql_type in zip(response.columns, excepted_mysql_types): + self.assertEqual(column.type, mysql_type) for key, input_value in input_row.items(): result_value = response.data_frame[key][0] self.assertEqual(result_value, input_value) # endregion # test date/time types - input_row = { - "t_date": datetime.date(2025, 4, 16), - "t_time": datetime.timedelta(seconds=45600), - "t_year": 2025, - "t_datetime": datetime.datetime(2025, 4, 16, 12, 30, 15), - "t_timestamp": datetime.datetime(2025, 4, 16, 12, 30, 15), - } - mock_cursor.fetchall.return_value = [input_row] + input_row = OrderedDict( + t_date=datetime.date(2025, 4, 16), + t_time=datetime.timedelta(seconds=45600), + t_year=2025, + t_datetime=datetime.datetime(2025, 4, 16, 12, 30, 15), + t_timestamp=datetime.datetime(2025, 4, 16, 12, 30, 15), + ) + mock_cursor.fetchall.return_value = [list(input_row.values())] mock_cursor.description = [ ("t_date", 10, None, None, None, None, 1, 128, 63), @@ -603,7 +611,7 @@ def test_types_casting(self): ("t_timestamp", 7, None, None, None, None, 1, 128, 63), ] - response: Response = self.handler.native_query(query_str) + response: Response = self.handler.native_query(query_str, stream=False) excepted_mysql_types = [ MYSQL_DATA_TYPE.DATE, MYSQL_DATA_TYPE.TIME, @@ -611,7 +619,8 @@ def test_types_casting(self): MYSQL_DATA_TYPE.DATETIME, MYSQL_DATA_TYPE.TIMESTAMP, ] - self.assertEqual(response.mysql_types, excepted_mysql_types) + for column, mysql_type in zip(response.columns, excepted_mysql_types): + self.assertEqual(column.type, mysql_type) for key, input_value in input_row.items(): result_value = response.data_frame[key][0] self.assertEqual(result_value, input_value) @@ -619,14 +628,14 @@ def test_types_casting(self): # region test casting of nullable types bigint_val = 9223372036854775807 - input_rows = [{"t_bigint": bigint_val, "t_boolean": 1}, {"t_bigint": None, "t_boolean": None}] - mock_cursor.fetchall.return_value = input_rows + input_rows = [OrderedDict(t_bigint=bigint_val, t_boolean=1), OrderedDict(t_bigint=None, t_boolean=None)] + mock_cursor.fetchall.return_value = [list(row.values()) for row in input_rows] description = [ ("t_bigint", 8, None, None, None, None, 1, 0, 63), ("t_boolean", 1, None, None, None, None, 1, 0, 63), ] mock_cursor.description = description - response: Response = self.handler.native_query(query_str) + response: Response = self.handler.native_query(query_str, stream=False) self.assertEqual(response.data_frame.dtypes.iloc[0], "Int64") self.assertEqual(response.data_frame.dtypes.iloc[1], "Int64") self.assertEqual(response.data_frame.iloc[0, 0], bigint_val) @@ -636,16 +645,17 @@ def test_types_casting(self): # endregion # region test vector type - input_row = { - "t_vector": array("f", [1.1, 2.2, 3.3]), - } - mock_cursor.fetchall.return_value = [input_row] + input_row = OrderedDict( + t_vector=array("f", [1.1, 2.2, 3.3]), + ) + mock_cursor.fetchall.return_value = [list(input_row.values())] mock_cursor.description = [("t_vector", 242, None, None, None, None, 1, 144, 63)] - response: Response = self.handler.native_query(query_str) + response: Response = self.handler.native_query(query_str, stream=False) excepted_mysql_types = [MYSQL_DATA_TYPE.VECTOR] - self.assertEqual(response.mysql_types, excepted_mysql_types) + for column, mysql_type in zip(response.columns, excepted_mysql_types): + self.assertEqual(column.type, mysql_type) self.assertEqual(input_row["t_vector"], response.data_frame["t_vector"][0]) # endregion @@ -661,7 +671,7 @@ def _test_meta_method_with_filter(self, method, sample_data, filter_column, filt """ # Test without filter df = DataFrame(sample_data) - expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=df) + expected_response = TableResponse(data=df) self.handler.native_query = MagicMock(return_value=expected_response) response = method() @@ -671,7 +681,7 @@ def _test_meta_method_with_filter(self, method, sample_data, filter_column, filt # Test with filter self.handler.native_query.reset_mock() filtered_df = df[df[filter_column].isin(filter_values)].reset_index(drop=True) - filtered_response = Response(RESPONSE_TYPE.TABLE, data_frame=filtered_df) + filtered_response = TableResponse(data=filtered_df) self.handler.native_query = MagicMock(return_value=filtered_response) response = method(table_names=filter_values) diff --git a/tests/unit/handlers/test_oracle.py b/tests/unit/handlers/test_oracle.py index cfd8dd7423f..fb18a57fcc6 100644 --- a/tests/unit/handlers/test_oracle.py +++ b/tests/unit/handlers/test_oracle.py @@ -18,9 +18,11 @@ import pandas as pd from pandas import DataFrame -from base_handler_test import BaseDatabaseHandlerTest +from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, + TableResponse, + OkResponse, + ErrorResponse, INF_SCHEMA_COLUMNS_NAMES_SET, RESPONSE_TYPE, ) @@ -165,9 +167,42 @@ def test_thick_mode_connection(self): handler.connect() mock_init.assert_called_once_with(lib_dir="/path/to/oracle/client/lib") - def test_native_query_with_results(self): + def test_native_query_with_results_streaming(self): """ - Tests the `native_query` method for a SELECT statement returning results. + Tests the `native_query` method for a SELECT statement returning results at server side execution. + """ + mock_conn = MagicMock() + mock_cursor = MockCursorContextManager() + + self.handler.connect = MagicMock(return_value=mock_conn) + mock_conn.cursor = MagicMock(return_value=mock_cursor) + + # Server-side execution uses fetchmany, not fetchall + mock_cursor.fetchmany = MagicMock(side_effect=[[(1, "test1"), (2, "test2")], []]) + mock_cursor.description = [ + ("ID", None, None, None, None, None, None), + ("NAME", None, None, None, None, None, None), + ] + + query_str = "SELECT ID, NAME FROM test_table" + data = self.handler.native_query(query_str, stream=True) + + mock_conn.cursor.assert_called_once() + mock_cursor.execute.assert_called_once_with(query_str) + + # Verify the response + self.assertIsInstance(data, TableResponse) + self.assertEqual(data.type, RESPONSE_TYPE.TABLE) + self.assertIsNone(data._data) + data.fetchall() + self.assertIsInstance(data._data, DataFrame) + expected_columns = ["ID", "NAME"] + self.assertListEqual(list(data.data_frame.columns), expected_columns) + self.assertEqual(len(data.data_frame), 2) + + def test_native_query_with_no_streaming(self): + """ + Tests the `native_query` method for a SELECT statement returning results at client side execution. """ mock_conn = MagicMock() mock_cursor = MagicMock() @@ -177,22 +212,21 @@ def test_native_query_with_results(self): self.handler.connect = MagicMock(return_value=mock_conn) mock_conn.cursor = MagicMock(return_value=mock_cursor) - mock_cursor.fetchall.return_value = [(1, "test1"), (2, "test2")] + mock_cursor.fetchall = MagicMock(return_value=[(1, "test1"), (2, "test2")]) mock_cursor.description = [ ("ID", None, None, None, None, None, None), ("NAME", None, None, None, None, None, None), ] query_str = "SELECT ID, NAME FROM test_table" - data = self.handler.native_query(query_str) + data = self.handler.native_query(query_str, stream=False) mock_conn.cursor.assert_called_once() mock_cursor.execute.assert_called_once_with(query_str) mock_cursor.fetchall.assert_called_once() mock_conn.commit.assert_called_once() - self.assertIsInstance(data, Response) - self.assertFalse(data.error_code) + self.assertIsInstance(data, TableResponse) self.assertEqual(data.type, RESPONSE_TYPE.TABLE) self.assertIsInstance(data.data_frame, DataFrame) expected_columns = ["ID", "NAME"] @@ -222,8 +256,7 @@ def test_native_query_no_results(self): mock_cursor.fetchall.assert_not_called() mock_conn.commit.assert_called_once() - self.assertIsInstance(data, Response) - self.assertFalse(data.error_code) + self.assertIsInstance(data, OkResponse) self.assertEqual(data.type, RESPONSE_TYPE.OK) self.assertEqual(data.affected_rows, 1) @@ -252,7 +285,7 @@ def test_native_query_error(self): mock_conn.rollback.assert_called_once() mock_conn.commit.assert_not_called() - self.assertIsInstance(data, Response) + self.assertIsInstance(data, ErrorResponse) self.assertEqual(data.type, RESPONSE_TYPE.ERROR) self.assertEqual(data.error_message, error_msg) @@ -265,7 +298,7 @@ def test_query_method(self): orig_renderer = self.handler.renderer self.handler.native_query = MagicMock() - expected_response = Response(RESPONSE_TYPE.TABLE) + expected_response = TableResponse() self.handler.native_query.return_value = expected_response mock_ast = MagicMock() @@ -299,7 +332,7 @@ def test_get_tables(self): ], columns=["TABLE_SCHEMA", "TABLE_NAME", "TABLE_TYPE"], ) - expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df) + expected_response = TableResponse(data=expected_df) self.handler.native_query = MagicMock(return_value=expected_response) @@ -364,7 +397,7 @@ def test_get_tables_multiple_schemas(self): ], columns=["TABLE_SCHEMA", "TABLE_NAME", "TABLE_TYPE"], ) - expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df) + expected_response = TableResponse(data=expected_df) self.handler.native_query = MagicMock(return_value=expected_response) @@ -448,7 +481,7 @@ def test_get_columns(self): ] expected_df = DataFrame(expected_df_data, columns=query_columns) - expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df) + expected_response = TableResponse(data=expected_df) self.handler.native_query = MagicMock(return_value=expected_response) table_name = "test_table" @@ -573,7 +606,7 @@ def test_types_casting(self): ("N_BINARY_DOUBLE", oracledb.DB_TYPE_NUMBER, 127, None, None, None, True), ] - response: Response = self.handler.native_query(query_str) + response: TableResponse = self.handler.native_query(query_str, stream=False) excepted_mysql_types = [ MYSQL_DATA_TYPE.FLOAT, MYSQL_DATA_TYPE.DECIMAL, @@ -590,7 +623,7 @@ def test_types_casting(self): MYSQL_DATA_TYPE.FLOAT, MYSQL_DATA_TYPE.FLOAT, ] - self.assertEqual(response.mysql_types, excepted_mysql_types) + self.assertEqual([col.type for col in response.columns], excepted_mysql_types) for i, input_value in enumerate(input_row): result_value = response.data_frame[response.data_frame.columns[i]][0] self.assertEqual(result_value, input_value) @@ -612,9 +645,9 @@ def test_types_casting(self): ("T_BOOLEAN", oracledb.DB_TYPE_BOOLEAN, None, None, None, None, True), ("T_BOOL", oracledb.DB_TYPE_BOOLEAN, None, None, None, None, True), ] - response: Response = self.handler.native_query(query_str) + response: TableResponse = self.handler.native_query(query_str, stream=False) excepted_mysql_types = [MYSQL_DATA_TYPE.BOOLEAN, MYSQL_DATA_TYPE.BOOLEAN] - self.assertEqual(response.mysql_types, excepted_mysql_types) + self.assertEqual([col.type for col in response.columns], excepted_mysql_types) for i, input_value in enumerate(input_row): result_value = response.data_frame[response.data_frame.columns[i]][0] self.assertEqual(result_value, input_value) @@ -680,7 +713,7 @@ def test_types_casting(self): ("T_RAW", oracledb.DB_TYPE_RAW, 100, 100, None, None, True), ("T_BLOB", oracledb.DB_TYPE_LONG_RAW, None, None, None, None, True), ] - response: Response = self.handler.native_query(query_str) + response: TableResponse = self.handler.native_query(query_str, stream=False) excepted_mysql_types = [ MYSQL_DATA_TYPE.TEXT, MYSQL_DATA_TYPE.TEXT, @@ -692,7 +725,7 @@ def test_types_casting(self): MYSQL_DATA_TYPE.BINARY, MYSQL_DATA_TYPE.BINARY, ] - self.assertEqual(response.mysql_types, excepted_mysql_types) + self.assertEqual([col.type for col in response.columns], excepted_mysql_types) for i, input_value in enumerate(input_row): result_value = response.data_frame[response.data_frame.columns[i]][0] self.assertEqual(result_value, input_value) @@ -739,13 +772,13 @@ def test_types_casting(self): ("D_TIMESTAMP", oracledb.DB_TYPE_TIMESTAMP, 23, None, 0, 6, True), ("D_TIMESTAMP_P", oracledb.DB_TYPE_TIMESTAMP, 23, None, 0, 9, True), ] - response: Response = self.handler.native_query(query_str) + response: TableResponse = self.handler.native_query(query_str, stream=False) excepted_mysql_types = [ MYSQL_DATA_TYPE.DATE, MYSQL_DATA_TYPE.TIMESTAMP, MYSQL_DATA_TYPE.TIMESTAMP, ] - self.assertEqual(response.mysql_types, excepted_mysql_types) + self.assertEqual([col.type for col in response.columns], excepted_mysql_types) for i, input_value in enumerate(input_row): result_value = response.data_frame[response.data_frame.columns[i]][0] self.assertEqual(result_value, input_value) @@ -767,7 +800,7 @@ def test_types_casting(self): ), # set 17 just to force cast to Int64 ("T_BOOLEAN", oracledb.DB_TYPE_BOOLEAN, None, None, None, None, True), ] - response: Response = self.handler.native_query(query_str) + response: TableResponse = self.handler.native_query(query_str, stream=False) self.assertEqual(response.data_frame.dtypes[0], "Int64") self.assertEqual(response.data_frame.dtypes[1], "boolean") self.assertEqual(response.data_frame.iloc[0, 0], bigint_val) @@ -800,12 +833,13 @@ def test_types_casting(self): ("T_EMBEDDING", oracledb.DB_TYPE_VECTOR, None, None, None, None, True), ("T_JSON", oracledb.DB_TYPE_JSON, None, None, None, None, True), ] - response: Response = self.handler.native_query(query_str) + response: TableResponse = self.handler.native_query(query_str, stream=False) excepted_mysql_types = [MYSQL_DATA_TYPE.VECTOR, MYSQL_DATA_TYPE.JSON] + self.assertEqual([col.type for col in response.columns], excepted_mysql_types) for i, input_value in enumerate(input_row): result_value = response.data_frame[response.data_frame.columns[i]][0] self.assertEqual(result_value, input_value) - # endreion + # endregion def test_insert(self): """ @@ -813,9 +847,7 @@ def test_insert(self): using insertmany for batch inserts. """ mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) + mock_cursor = MockCursorContextManager() self.handler.connect = MagicMock(return_value=mock_conn) mock_conn.cursor = MagicMock(return_value=mock_cursor) @@ -837,9 +869,7 @@ def test_insert_error(self): Tests the insert method to ensure it correctly handles errors """ mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) + mock_cursor = MockCursorContextManager() self.handler.connect = MagicMock(return_value=mock_conn) mock_conn.cursor = MagicMock(return_value=mock_cursor) @@ -869,7 +899,7 @@ def test_meta_get_tables(self, table_names=None): "row_count", ], ) - mock_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df) + mock_response = TableResponse(data=expected_df) self.handler.native_query = MagicMock(return_value=mock_response) response = self.handler.meta_get_tables(table_names=table_names) @@ -900,7 +930,7 @@ def test_meta_get_columns(self, table_names=None): ], ) - mock_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df) + mock_response = TableResponse(data=expected_df) self.handler.native_query = MagicMock(return_value=mock_response) table_name = "TABLE1" @@ -934,7 +964,7 @@ def test_meta_get_column_statistics(self, table_names=None): ], ) - mock_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df) + mock_response = TableResponse(data=expected_df) self.handler.native_query = MagicMock(return_value=mock_response) table_names = ["STATS_TABLE"] response = self.handler.meta_get_column_statistics(table_names=table_names) @@ -975,7 +1005,7 @@ def test_meta_get_primary_keys(self): ], ) - mock_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df) + mock_response = TableResponse(data=expected_df) self.handler.native_query = MagicMock(return_value=mock_response) table_names = ["USERS", "ORDERS"] @@ -1024,7 +1054,7 @@ def test_meta_get_foreign_keys(self, table_names=None): ], ) - mock_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df) + mock_response = TableResponse(data=expected_df) self.handler.native_query = MagicMock(return_value=mock_response) table_names = ["ORDERS", "ORDER_ITEMS"] diff --git a/tests/unit/handlers/test_postgres.py b/tests/unit/handlers/test_postgres.py index 8ad5be6d414..a0e3adc1335 100644 --- a/tests/unit/handlers/test_postgres.py +++ b/tests/unit/handlers/test_postgres.py @@ -17,7 +17,12 @@ from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager from mindsdb.integrations.handlers.postgres_handler.postgres_handler import PostgresHandler, _map_type -from mindsdb.integrations.libs.response import HandlerResponse as Response, RESPONSE_TYPE +from mindsdb.integrations.libs.response import ( + RESPONSE_TYPE, + TableResponse, + OkResponse, + ErrorResponse, +) from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE @@ -96,35 +101,64 @@ def create_handler(self): def create_patcher(self): return patch("psycopg.connect") - def test_native_query_command_ok(self): + def test_native_query_command_ok_stream(self): """ Tests the `native_query` method to ensure it executes a SQL query and handles the case where the query doesn't return a result set (ExecStatus.COMMAND_OK) """ mock_conn = MagicMock() - # Use MockCursorContextManager for simplified mocking - mock_cursor = MockCursorContextManager() + mock_cursor_server = MockCursorContextManager() + mock_cursor_client = MockCursorContextManager() self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) + mock_conn.cursor = MagicMock(side_effect=[mock_cursor_server, mock_cursor_client]) - mock_cursor.execute.return_value = None + syntax_error = psycopg.errors.SyntaxError('syntax error at or near "insert"') + mock_cursor_server.execute.side_effect = syntax_error + mock_cursor_client.execute.return_value = None # Setup pgresult mock_pgresult = MagicMock() mock_pgresult.status = ExecStatus.COMMAND_OK - mock_cursor.pgresult = mock_pgresult - mock_cursor.rowcount = 1 + mock_cursor_client.pgresult = mock_pgresult + mock_cursor_client.rowcount = 1 query_str = "INSERT INTO table VALUES (1, 2, 3)" - data = self.handler.native_query(query_str) - mock_cursor.execute.assert_called_once_with(query_str) - assert isinstance(data, Response) - self.assertFalse(data.error_code) - self.assertEqual(data.type, RESPONSE_TYPE.OK) + data = self.handler.native_query(query_str, stream=True) + mock_cursor_server.execute.assert_called_once_with(query_str) + mock_cursor_client.execute.assert_called_once_with(query_str) + assert isinstance(data, OkResponse) self.assertEqual(data.affected_rows, 1) - def test_native_query_with_results(self): + def test_native_query_command_ok_no_stream(self): + """ + Tests the `native_query` at client side execution + """ + mock_conn = MagicMock() + # mock_cursor_server = MockCursorContextManager() + mock_cursor_client = MockCursorContextManager() + + self.handler.connect = MagicMock(return_value=mock_conn) + mock_conn.cursor = MagicMock(side_effect=[mock_cursor_client]) + + # syntax_error = psycopg.errors.SyntaxError('syntax error at or near "insert"') + # mock_cursor_server.execute.side_effect = syntax_error + mock_cursor_client.execute.return_value = None + + # Setup pgresult + mock_pgresult = MagicMock() + mock_pgresult.status = ExecStatus.COMMAND_OK + mock_cursor_client.pgresult = mock_pgresult + mock_cursor_client.rowcount = 1 + + query_str = "INSERT INTO table VALUES (1, 2, 3)" + data = self.handler.native_query(query_str, stream=False) + # mock_cursor_server.execute.assert_called_once_with(query_str) + mock_cursor_client.execute.assert_called_once_with(query_str) + assert isinstance(data, OkResponse) + self.assertEqual(data.affected_rows, 1) + + def test_native_query_with_results_client_side(self): """ Tests the `native_query` method to ensure it executes a SQL query and handles the case where the query returns a result set @@ -135,7 +169,7 @@ def test_native_query_with_results(self): self.handler.connect = MagicMock(return_value=mock_conn) mock_conn.cursor = MagicMock(return_value=mock_cursor) - mock_cursor.fetchall = MagicMock(return_value=[[1, "name1"], [2, "name2"]]) + mock_cursor.fetchall = MagicMock(side_effect=[[[1, "name1"], [2, "name2"]], []]) # Create proper description objects with necessary type_code for _cast_dtypes mock_cursor.description = [ @@ -149,14 +183,51 @@ def test_native_query_with_results(self): mock_cursor.pgresult = mock_pgresult query_str = "SELECT * FROM table" - data = self.handler.native_query(query_str) + data = self.handler.native_query(query_str, stream=False) mock_cursor.execute.assert_called_once_with(query_str) - assert isinstance(data, Response) - self.assertFalse(data.error_code) + assert isinstance(data, TableResponse) + assert getattr(data, "error_code", None) is None self.assertEqual(data.type, RESPONSE_TYPE.TABLE) self.assertIsInstance(data.data_frame, DataFrame) self.assertEqual(list(data.data_frame.columns), ["id", "name"]) + def test_native_query_with_results_stream(self): + """ + Tests the `native_query` method to ensure it executes a SQL query and handles the case + where the query returns a result set at server side execution + """ + mock_conn = MagicMock() + mock_cursor = MockCursorContextManager() + + self.handler.connect = MagicMock(return_value=mock_conn) + mock_conn.cursor = MagicMock(return_value=mock_cursor) + + # Server-side execution uses fetchmany, not fetchall + mock_cursor.fetchmany = MagicMock(side_effect=[[[1, "name1"], [2, "name2"]], []]) + + mock_cursor.description = [ + ColumnDescription(name="id", type_code=regtype_to_oid["integer"]), # int4 type code + ColumnDescription(name="name", type_code=regtype_to_oid["text"]), # text type code + ] + + query_str = "SELECT * FROM table" + data = self.handler.native_query(query_str, stream=True) + mock_cursor.execute.assert_called_once_with(query_str) + + # Verify the response + assert isinstance(data, TableResponse) + assert getattr(data, "error_code", None) is None + self.assertEqual(data.type, RESPONSE_TYPE.TABLE) + self.assertIsNone(data._data) + data.fetchall() + self.assertIsInstance(data._data, DataFrame) + self.assertEqual(list(data.data_frame.columns), ["id", "name"]) + + # Verify DataFrame contains all expected rows + self.assertEqual(len(data.data_frame), 2) + self.assertEqual(data.data_frame["id"].tolist(), [1, 2]) + self.assertEqual(data.data_frame["name"].tolist(), ["name1", "name2"]) + def test_native_query_with_params(self): """ Tests the `native_query` method with parameters to ensure executemany is called correctly @@ -175,8 +246,7 @@ def test_native_query_with_params(self): params = [(1, "a"), (2, "b")] data = self.handler.native_query(query_str, params=params) mock_cursor.executemany.assert_called_once_with(query_str, params) - assert isinstance(data, Response) - self.assertFalse(data.error_code) + assert isinstance(data, OkResponse) def test_native_query_error(self): """ @@ -198,8 +268,7 @@ def test_native_query_error(self): mock_cursor.execute.assert_called_once_with(query_str) - assert isinstance(data, Response) - self.assertEqual(data.type, RESPONSE_TYPE.ERROR) + assert isinstance(data, ErrorResponse) # The handler implementation sets error_code to 0, check error_message instead self.assertEqual(data.error_code, 0) @@ -260,30 +329,7 @@ def test_query_method_uses_renderer_params(self): self.assertEqual(result, "ok") self.handler.renderer.get_exec_params.assert_called_once_with(query_node, with_failback=True) - self.handler.native_query.assert_called_once_with("SELECT 1", ["foo"]) - - def test_query_stream_yields_batches(self): - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager() - mock_cursor.pgresult = MagicMock(status=ExecStatus.TUPLES_OK) - mock_cursor.fetchmany = MagicMock(side_effect=[[(1, "name")], []]) - mock_cursor.description = [ - ColumnDescription(name="id", type_code=regtype_to_oid["integer"]), - ColumnDescription(name="name", type_code=regtype_to_oid["text"]), - ] - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - self.handler.renderer.get_exec_params = MagicMock(return_value=("SELECT * FROM table", None)) - self.handler.disconnect = MagicMock() - - batches = list(self.handler.query_stream(MagicMock(), fetch_size=1)) - - self.assertEqual(len(batches), 1) - self.assertListEqual(list(batches[0].columns), ["id", "name"]) - mock_conn.commit.assert_called_once() - mock_conn.rollback.assert_called_once() - self.handler.disconnect.assert_called_once() + self.handler.native_query.assert_called_once_with("SELECT 1", ["foo"], stream=False) def test_insert_respects_existing_column_case(self): if getattr(self.handler, "name", None) != "postgres": @@ -299,9 +345,8 @@ def test_insert_respects_existing_column_case(self): mock_conn.cursor = MagicMock(return_value=mock_cursor) self.handler.disconnect = MagicMock() self.handler.get_columns = MagicMock( - return_value=Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame({"COLUMN_NAME": ["Id", "Amount"]}), + return_value=TableResponse( + data=pd.DataFrame({"COLUMN_NAME": ["Id", "Amount"]}), ) ) @@ -318,6 +363,15 @@ def test_insert_respects_existing_column_case(self): self.assertIn('"Id"', executed_copy) self.assertIn('"Amount"', executed_copy) + def test_meta_get_column_statistics_returns_non_table_response(self): + error_response = ErrorResponse(error_message="boom") + self.handler.native_query = MagicMock(return_value=error_response) + + result = self.handler.meta_get_column_statistics() + + self.assertIs(result, error_response) + self.handler.native_query.assert_called_once() + def test_cast_dtypes(self): """ Tests the _cast_dtypes method to ensure it correctly converts PostgreSQL types to pandas types @@ -444,13 +498,13 @@ def test_insert(self): mock_pgresult.status = ExecStatus.TUPLES_OK mock_cursor.pgresult = mock_pgresult mock_cursor.rowcount = 1 - mock_cursor.fetchall = MagicMock( - return_value=[ - ["a", "int", 1, None, "YES", None, None, None, None, None, None, None], - ["b", "int", 2, None, "YES", None, None, None, None, None, None, None], - ["c", "int", 3, None, "YES", None, None, None, None, None, None, None], - ] - ) + + get_columns_result = [ + ["id", "int", 1, None, "YES", None, None, None, None, None, None, None], + ["name", "text", 2, None, "YES", None, None, None, None, None, None, None], + ] + mock_cursor.fetchmany = MagicMock(side_effect=[get_columns_result, []]) + information_schema_description = [ ColumnDescription(name="COLUMN_NAME", type_code=regtype_to_oid["text"]), ColumnDescription(name="DATA_TYPE", type_code=regtype_to_oid["text"]), @@ -474,19 +528,6 @@ def test_insert(self): copy_obj.__enter__ = MagicMock(return_value=copy_obj) copy_obj.__exit__ = MagicMock(return_value=None) - # region add result for 'get_columns' call - mock_pgresult = MagicMock() - mock_pgresult.status = ExecStatus.TUPLES_OK - mock_cursor.pgresult = mock_pgresult - mock_cursor.fetchall = MagicMock( - return_value=[ - ["id", "int", 1, None, "YES", None, None, None, None, None, None, None], - ["name", "text", 2, None, "YES", None, None, None, None, None, None, None], - ] - ) - mock_cursor.description = information_schema_description - # endregino - df = pd.DataFrame({"id": [1, 2, 3], "name": ["a", "b", "c"]}) self.handler.insert("test_table", df) @@ -643,9 +684,11 @@ def test_types_casting(self): MYSQL_DATA_TYPE.VARCHAR, MYSQL_DATA_TYPE.VARCHAR, ] - response: Response = self.handler.native_query(query_str) + response: TableResponse = self.handler.native_query(query_str, stream=False) + + for column, mysql_type in zip(response.columns, excepted_mysql_types): + self.assertEqual(column.type, mysql_type) - self.assertEqual(response.mysql_types, excepted_mysql_types) for i, input_value in enumerate(input_row): result_value = response.data_frame[description[i].name][0] self.assertEqual(type(result_value), type(input_value), f"type mismatch: {result_value} != {input_value}") @@ -657,8 +700,9 @@ def test_types_casting(self): mock_cursor.fetchall.return_value = input_rows mock_cursor.description = [ColumnDescription(name="t_boolean", type_code=16)] excepted_mysql_types = [MYSQL_DATA_TYPE.BOOL] - response: Response = self.handler.native_query(query_str) - self.assertEqual(response.mysql_types, excepted_mysql_types) + response: TableResponse = self.handler.native_query(query_str, stream=False) + for column, mysql_type in zip(response.columns, excepted_mysql_types): + self.assertEqual(column.type, mysql_type) self.assertTrue(pd_types.is_bool_dtype(response.data_frame["t_boolean"][0])) self.assertTrue(bool(response.data_frame["t_boolean"][0]) is True) self.assertTrue(bool(response.data_frame["t_boolean"][1]) is False) @@ -774,8 +818,9 @@ def test_types_casting(self): MYSQL_DATA_TYPE.FLOAT, # n_float4 MYSQL_DATA_TYPE.DOUBLE, # n_float8 ] - response: Response = self.handler.native_query(query_str) - self.assertEqual(response.mysql_types, excepted_mysql_types) + response: TableResponse = self.handler.native_query(query_str, stream=False) + for column, mysql_type in zip(response.columns, excepted_mysql_types): + self.assertEqual(column.type, mysql_type) for i, input_value in enumerate(input_row): result_value = response.data_frame[description[i].name][0] self.assertEqual(result_value, input_value, f"value mismatch: {result_value} != {input_value}") @@ -850,8 +895,9 @@ def test_types_casting(self): MYSQL_DATA_TYPE.TIME, # TIMETZ ] - response: Response = self.handler.native_query(query_str) - self.assertEqual(response.mysql_types, excepted_mysql_types) + response: TableResponse = self.handler.native_query(query_str, stream=False) + for column, mysql_type in zip(response.columns, excepted_mysql_types): + self.assertEqual(column.type, mysql_type) for i, input_value in enumerate(input_row): result_value = response.data_frame[description[i].name][0] self.assertEqual(result_value, input_value, f"value mismatch: {result_value} != {input_value}") @@ -866,7 +912,7 @@ def test_types_casting(self): ColumnDescription(name="t_boolean", type_code=16), ] mock_cursor.description = description - response: Response = self.handler.native_query(query_str) + response: TableResponse = self.handler.native_query(query_str, stream=False) self.assertEqual(response.data_frame.dtypes[0], "Int64") self.assertEqual(response.data_frame.dtypes[1], "boolean") self.assertEqual(response.data_frame.iloc[0, 0], bigint_val) @@ -921,8 +967,9 @@ def test_types_casting(self): MYSQL_DATA_TYPE.VECTOR, ] - response: Response = self.handler.native_query(query_str) - self.assertEqual(response.mysql_types, excepted_mysql_types) + response: TableResponse = self.handler.native_query(query_str, stream=False) + for column, mysql_type in zip(response.columns, excepted_mysql_types): + self.assertEqual(column.type, mysql_type) for i, input_value in enumerate(input_row): result_value = response.data_frame[description[i].name][0] self.assertEqual(type(result_value), type(input_value), f"type mismatch: {result_value} != {input_value}") @@ -933,7 +980,7 @@ def test_types_casting(self): # endregion def test_get_tables_all_flag(self): - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame())) + self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) self.handler.get_tables(all=True) query = self.handler.native_query.call_args[0][0] self.assertNotIn("current_schema()", query.split("table_schema")[-1]) @@ -955,19 +1002,19 @@ def test_get_columns_with_schema_name(self): "COLLATION_NAME": [None], } ) - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=df)) + self.handler.native_query = MagicMock(return_value=TableResponse(data=df)) self.handler.get_columns("customers", schema_name="analytics") query = self.handler.native_query.call_args[0][0] self.assertIn("table_schema = 'analytics'", query) def test_meta_get_tables_filters_by_list(self): - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame())) + self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) self.handler.meta_get_tables(table_names=["orders"]) query = self.handler.native_query.call_args[0][0] self.assertIn("IN ('orders')", query) def test_meta_get_columns_filters_by_list(self): - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame())) + self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) self.handler.meta_get_columns(table_names=["orders"]) query = self.handler.native_query.call_args[0][0] self.assertIn("IN ('orders')", query) @@ -984,7 +1031,7 @@ def test_meta_get_column_statistics_transforms_histogram(self): "histogram_bounds": ["{1,5,10}"], } ) - response = Response(RESPONSE_TYPE.TABLE, data_frame=df) + response = TableResponse(data=df) self.handler.native_query = MagicMock(return_value=response) result = self.handler.meta_get_column_statistics(table_names=["orders"]) @@ -995,13 +1042,13 @@ def test_meta_get_column_statistics_transforms_histogram(self): self.assertEqual(result.data_frame.loc[0, "MOST_COMMON_VALUES"], ["A", "B"]) def test_meta_get_primary_keys_with_filter(self): - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame())) + self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) self.handler.meta_get_primary_keys(table_names=["orders"]) query = self.handler.native_query.call_args[0][0] self.assertIn("AND tc.table_name IN ('orders')", query) def test_meta_get_foreign_keys_with_filter(self): - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame())) + self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) self.handler.meta_get_foreign_keys(table_names=["orders"]) query = self.handler.native_query.call_args[0][0] self.assertIn("AND tc.table_name IN ('orders')", query) diff --git a/tests/unit/handlers/test_redshift.py b/tests/unit/handlers/test_redshift.py index 8ee9a4f7e27..1d40b93fb4d 100644 --- a/tests/unit/handlers/test_redshift.py +++ b/tests/unit/handlers/test_redshift.py @@ -5,18 +5,21 @@ import pandas as pd import psycopg -from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, - RESPONSE_TYPE -) +from mindsdb.integrations.libs.response import OkResponse, ErrorResponse, RESPONSE_TYPE from mindsdb.integrations.handlers.redshift_handler.redshift_handler import RedshiftHandler from test_postgres import TestPostgresHandler class TestRedshiftHandler(TestPostgresHandler): - def create_handler(self): - return RedshiftHandler('redshift', connection_data=self.dummy_connection_data) + return RedshiftHandler("redshift", connection_data=self.dummy_connection_data) + + def test_native_query(self): + """ + This test is overridden to avoid issues with the generic MockCursorContextManager not being compatible with Postgres/Redshift cursor behavior. + More specific tests (test_native_query_with_results, test_native_query_command_ok, test_native_query_error) cover this functionality. + """ + pass def test_insert(self): """ @@ -32,20 +35,17 @@ def test_insert(self): mock_cursor.executemany.return_value = None - df = pd.DataFrame({ - 'column1': [1, 2, 3, np.nan], - 'column2': ['a', 'b', 'c', None] - }) + df = pd.DataFrame({"column1": [1, 2, 3, np.nan], "column2": ["a", "b", "c", None]}) - table_name = 'mock_table' + table_name = "mock_table" response = self.handler.insert(table_name, df) - columns = ', '.join([f'"{col}"' if ' ' in col else col for col in df.columns]) - values = ', '.join(['%s' for _ in range(len(df.columns))]) - expected_query = f'INSERT INTO {table_name} ({columns}) VALUES ({values})' + columns = ", ".join([f'"{col}"' if " " in col else col for col in df.columns]) + values = ", ".join(["%s" for _ in range(len(df.columns))]) + expected_query = f"INSERT INTO {table_name} ({columns}) VALUES ({values})" mock_cursor.executemany.assert_called_once_with(expected_query, df.replace({np.nan: None}).values.tolist()) - assert isinstance(response, Response) + assert isinstance(response, OkResponse) self.assertEqual(response.type, RESPONSE_TYPE.OK) mock_conn.commit.assert_called_once() @@ -65,17 +65,14 @@ def test_insert_error(self): error = psycopg.Error(error_msg) mock_cursor.executemany.side_effect = error - df = pd.DataFrame({ - 'column1': [1, 2, 3, np.nan], - 'column2': ['a', 'b', 'c', None] - }) + df = pd.DataFrame({"column1": [1, 2, 3, np.nan], "column2": ["a", "b", "c", None]}) - response = self.handler.insert('nonexistent_table', df) + response = self.handler.insert("nonexistent_table", df) mock_cursor.executemany.assert_called_once() mock_conn.rollback.assert_called_once() - assert isinstance(response, Response) + assert isinstance(response, ErrorResponse) self.assertEqual(response.type, RESPONSE_TYPE.ERROR) self.assertEqual(response.error_message, error_msg) @@ -91,21 +88,21 @@ def test_insert_with_empty_dataframe(self): self.handler.connect = MagicMock(return_value=mock_conn) mock_conn.cursor = MagicMock(return_value=mock_cursor) - df = pd.DataFrame(columns=['column1', 'column2']) + df = pd.DataFrame(columns=["column1", "column2"]) - table_name = 'mock_table' + table_name = "mock_table" response = self.handler.insert(table_name, df) - columns = ', '.join([f'"{col}"' if ' ' in col else col for col in df.columns]) - values = ', '.join(['%s' for _ in range(len(df.columns))]) - expected_query = f'INSERT INTO {table_name} ({columns}) VALUES ({values})' + columns = ", ".join([f'"{col}"' if " " in col else col for col in df.columns]) + values = ", ".join(["%s" for _ in range(len(df.columns))]) + expected_query = f"INSERT INTO {table_name} ({columns}) VALUES ({values})" mock_cursor.executemany.assert_called_once() call_args, call_kwargs = mock_cursor.executemany.call_args self.assertEqual(call_args[0], expected_query) self.assertEqual(len(call_args[1]), 0) - assert isinstance(response, Response) + assert isinstance(response, OkResponse) self.assertEqual(response.type, RESPONSE_TYPE.OK) mock_conn.commit.assert_called_once() @@ -123,25 +120,27 @@ def test_insert_with_special_column_names(self): self.handler.connect = MagicMock(return_value=mock_conn) mock_conn.cursor = MagicMock(return_value=mock_cursor) - df = pd.DataFrame({ - 'normal_column': [1, 2], - 'column with spaces': ['a', 'b'], - 'column-with-hyphens': [True, False], - 'mixed@column#123': [3.14, 2.71] - }) + df = pd.DataFrame( + { + "normal_column": [1, 2], + "column with spaces": ["a", "b"], + "column-with-hyphens": [True, False], + "mixed@column#123": [3.14, 2.71], + } + ) - table_name = 'mock_table' + table_name = "mock_table" response = self.handler.insert(table_name, df) call_args = mock_cursor.executemany.call_args[0][0] for col in df.columns: - if ' ' in col: + if " " in col: self.assertIn(f'"{col}"', call_args) else: self.assertTrue(col in call_args or f'"{col}"' in call_args) - assert isinstance(response, Response) + assert isinstance(response, OkResponse) self.assertEqual(response.type, RESPONSE_TYPE.OK) def test_insert_disconnect_when_needed(self): @@ -159,15 +158,15 @@ def test_insert_disconnect_when_needed(self): self.handler.disconnect = MagicMock() mock_conn.cursor = MagicMock(return_value=mock_cursor) - df = pd.DataFrame({'column1': [1, 2, 3]}) - self.handler.insert('mock_table', df) + df = pd.DataFrame({"column1": [1, 2, 3]}) + self.handler.insert("mock_table", df) self.handler.disconnect.assert_called_once() self.handler.connect.reset_mock() self.handler.disconnect.reset_mock() self.handler.is_connected = True - self.handler.insert('mock_table', df) + self.handler.insert("mock_table", df) self.handler.disconnect.assert_not_called() -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/unit/handlers/test_rest_api.py b/tests/unit/handlers/test_rest_api.py new file mode 100644 index 00000000000..ff10b02d67f --- /dev/null +++ b/tests/unit/handlers/test_rest_api.py @@ -0,0 +1,165 @@ +"""Unit tests for the generic REST API passthrough handler.""" + +from unittest.mock import patch, MagicMock + +from mindsdb.integrations.handlers.rest_api_handler.rest_api_handler import RestApiHandler +from mindsdb.integrations.libs.passthrough import PassthroughProtocol +from mindsdb.integrations.libs.passthrough_types import PassthroughRequest, PassthroughResponse +from mindsdb.integrations.libs.response import ( + HandlerStatusResponse as StatusResponse, +) + + +VALID_DATA = { + "base_url": "https://api.example.com", + "bearer_token": "test-token-123", +} + + +def _make_handler(connection_data=None): + if connection_data is None: + connection_data = dict(VALID_DATA) + return RestApiHandler("test_rest", connection_data=connection_data) + + +class TestRestApiHandlerInit: + def test_satisfies_passthrough_protocol(self): + assert issubclass(RestApiHandler, PassthroughProtocol) + + def test_stores_connection_data(self): + data = {"base_url": "https://x.com", "bearer_token": "tok"} + handler = _make_handler(data) + assert handler.connection_data == data + + def test_default_test_request_path(self): + handler = _make_handler() + assert handler._test_request.method == "GET" + assert handler._test_request.path == "/" + + def test_custom_test_path(self): + handler = _make_handler( + { + "base_url": "https://api.example.com", + "bearer_token": "tok", + "test_path": "/health", + } + ) + assert handler._test_request.path == "/health" + + def test_custom_test_path_without_slash(self): + handler = _make_handler( + { + "base_url": "https://api.example.com", + "bearer_token": "tok", + "test_path": "status", + } + ) + assert handler._test_request.path == "/status" + + +class TestCheckConnection: + def test_success(self): + handler = _make_handler() + response = handler.check_connection() + assert isinstance(response, StatusResponse) + assert response.success is True + assert not response.error_message + + def test_missing_base_url(self): + handler = _make_handler({"bearer_token": "tok"}) + response = handler.check_connection() + assert response.success is False + assert "base_url" in response.error_message + + def test_missing_bearer_token(self): + handler = _make_handler({"base_url": "https://api.example.com"}) + response = handler.check_connection() + assert response.success is False + assert "bearer_token" in response.error_message + + def test_empty_connection_data(self): + handler = _make_handler({}) + response = handler.check_connection() + assert response.success is False + + +class TestPassthroughIntegration: + """Test that the mixin methods work correctly on RestApiHandler.""" + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_api_passthrough_injects_bearer(self, mock_request): + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {"Content-Type": "application/json"} + mock_resp.iter_content.return_value = [b'{"ok": true}'] + mock_resp.close = MagicMock() + mock_request.return_value = mock_resp + + handler = _make_handler() + result = handler.api_passthrough(PassthroughRequest(method="GET", path="/v1/users")) + + assert isinstance(result, PassthroughResponse) + assert result.status_code == 200 + headers = mock_request.call_args.kwargs["headers"] + assert headers["Authorization"] == "Bearer test-token-123" + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_api_passthrough_uses_base_url(self, mock_request): + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {} + mock_resp.iter_content.return_value = [b""] + mock_resp.close = MagicMock() + mock_request.return_value = mock_resp + + handler = _make_handler() + handler.api_passthrough(PassthroughRequest(method="GET", path="/foo")) + + called_url = mock_request.call_args.args[1] + assert called_url == "https://api.example.com/foo" + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_api_passthrough_includes_default_headers(self, mock_request): + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {} + mock_resp.iter_content.return_value = [b""] + mock_resp.close = MagicMock() + mock_request.return_value = mock_resp + + handler = _make_handler( + { + "base_url": "https://api.example.com", + "bearer_token": "tok", + "default_headers": {"Accept": "application/json", "X-Team": "data"}, + } + ) + handler.api_passthrough(PassthroughRequest(method="GET", path="/")) + + headers = mock_request.call_args.kwargs["headers"] + assert headers["Accept"] == "application/json" + assert headers["X-Team"] == "data" + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_test_passthrough_success(self, mock_request): + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {"Content-Type": "application/json"} + mock_resp.iter_content.return_value = [b'{"ok": true}'] + mock_resp.close = MagicMock() + mock_request.return_value = mock_resp + + handler = _make_handler() + result = handler.test_passthrough() + + assert isinstance(result, dict) + assert result["ok"] is True + assert result["status_code"] == 200 + + def test_test_passthrough_with_no_network(self): + """test_passthrough catches connection errors gracefully.""" + handler = _make_handler() + result = handler.test_passthrough() + assert isinstance(result, dict) + assert result["ok"] is False + assert result["error_code"] in ("network", "unknown") diff --git a/tests/unit/handlers/test_salesforce.py b/tests/unit/handlers/test_salesforce.py index 54253f3eef8..6df5580ba27 100644 --- a/tests/unit/handlers/test_salesforce.py +++ b/tests/unit/handlers/test_salesforce.py @@ -16,7 +16,7 @@ from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, + TableResponse, HandlerStatusResponse as StatusResponse, RESPONSE_TYPE, ) @@ -157,7 +157,7 @@ def test_check_connection_failure(self): def test_get_tables(self): """ - Test that the `get_tables` method returns a list of tables mapped from the Salesforce API. + Test that the `get_tables` method returns a TableResponse with a list of tables mapped from the Salesforce API. """ mock_tables = ["Account", "Contact"] self.mock_connect.return_value = MagicMock( @@ -168,7 +168,7 @@ def test_get_tables(self): self.handler.connect() response = self.handler.get_tables() - assert isinstance(response, Response) + assert isinstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -177,7 +177,7 @@ def test_get_tables(self): def test_get_columns(self): """ - Test that the `get_columns` method returns a list of columns for a given table. + Test that the `get_columns` method returns a TableResponse with a list of columns for a given table. """ mock_columns = ["Id", "Name", "Email"] mock_table = "Contact" @@ -203,7 +203,7 @@ def test_get_columns(self): self.handler.connect() response = self.handler.get_columns(mock_table) - assert isinstance(response, Response) + assert isinstance(response, TableResponse) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) df = response.data_frame @@ -435,7 +435,7 @@ def test_meta_get_tables_filters_requested_tables(self): with patch( "mindsdb.integrations.handlers.salesforce_handler.salesforce_handler.MetaAPIHandler.meta_get_tables", - return_value=Response(RESPONSE_TYPE.TABLE, None), + return_value=TableResponse(), ) as mock_meta: response = self.handler.meta_get_tables(table_names=["contact"]) @@ -680,5 +680,64 @@ def test_meta_get_columns_builds_schema(self): self.assertEqual(columns[0]["data_type"], "string") +class TestSalesforcePassthrough(unittest.TestCase): + """Exercise the PassthroughMixin retrofit (per-instance base URL).""" + + CONNECTION_DATA = { + "username": "u", + "password": "p", + "client_id": "cid", + "client_secret": "csec", + "access_token": "sf_access_tok", + "instance_url": "https://my-org.my.salesforce.com", + } + + def _mock_response(self, status_code=200): + resp = MagicMock() + resp.status_code = status_code + resp.headers = {"Content-Type": "application/json"} + resp.iter_content = MagicMock(return_value=iter([b'{"sobjects":[]}'])) + resp.close = MagicMock() + return resp + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_passthrough_uses_bearer_and_instance_url(self, mock_request): + mock_request.return_value = self._mock_response() + handler = SalesforceHandler("salesforce", connection_data=self.CONNECTION_DATA) + from mindsdb.integrations.libs.passthrough_types import PassthroughRequest + + resp = handler.api_passthrough(PassthroughRequest("GET", "/services/data/v60.0/")) + + self.assertEqual(resp.status_code, 200) + args, kwargs = mock_request.call_args + self.assertEqual(args[0], "GET") + self.assertEqual(args[1], "https://my-org.my.salesforce.com/services/data/v60.0/") + self.assertEqual(kwargs["headers"]["Authorization"], "Bearer sf_access_tok") + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_test_passthrough_returns_ok_on_200(self, mock_request): + mock_request.return_value = self._mock_response(status_code=200) + handler = SalesforceHandler("salesforce", connection_data=self.CONNECTION_DATA) + + result = handler.test_passthrough() + + self.assertTrue(result["ok"]) + self.assertEqual(result["status_code"], 200) + self.assertEqual(result["host"], "my-org.my.salesforce.com") + self.assertIsInstance(result["latency_ms"], int) + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_test_passthrough_returns_auth_failed_on_401(self, mock_request): + mock_request.return_value = self._mock_response(status_code=401) + handler = SalesforceHandler("salesforce", connection_data=self.CONNECTION_DATA) + + result = handler.test_passthrough() + + self.assertFalse(result["ok"]) + self.assertEqual(result["error_code"], "auth_failed") + self.assertEqual(result["status_code"], 401) + self.assertEqual(result["host"], "my-org.my.salesforce.com") + + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/handlers/test_shopify_handler.py b/tests/unit/handlers/test_shopify_handler.py index 277ec09a638..b1ecc6e8811 100644 --- a/tests/unit/handlers/test_shopify_handler.py +++ b/tests/unit/handlers/test_shopify_handler.py @@ -817,5 +817,67 @@ def test_limit_large_than_max_page_limit(self, mock_shopify_query): self.assertEqual(len(result), 300) +class TestShopifyPassthrough(unittest.TestCase): + """Exercise the PassthroughMixin retrofit (X-Shopify-Access-Token auth).""" + + CONNECTION_DATA = { + "shop_url": "test-shop.myshopify.com", + "client_id": "cid", + "client_secret": "csec", + "access_token": "shpat_tokenvalue", + } + + def _mock_response(self, status_code=200): + resp = MagicMock() + resp.status_code = status_code + resp.headers = {"Content-Type": "application/json"} + resp.iter_content = MagicMock(return_value=iter([b'{"shop":{"id":1}}'])) + resp.close = MagicMock() + return resp + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_passthrough_uses_shopify_header_and_per_shop_base_url(self, mock_request): + mock_request.return_value = self._mock_response() + handler = ShopifyHandler("shopify", connection_data=self.CONNECTION_DATA) + from mindsdb.integrations.libs.passthrough_types import PassthroughRequest + + resp = handler.api_passthrough(PassthroughRequest("GET", "/admin/api/2024-01/shop.json")) + + self.assertEqual(resp.status_code, 200) + args, kwargs = mock_request.call_args + self.assertEqual(args[0], "GET") + self.assertEqual(args[1], "https://test-shop.myshopify.com/admin/api/2024-01/shop.json") + # Custom Shopify auth header; no bearer Authorization. + self.assertEqual(kwargs["headers"]["X-Shopify-Access-Token"], "shpat_tokenvalue") + self.assertNotIn("Authorization", kwargs["headers"]) + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_test_passthrough_returns_ok_on_200(self, mock_request): + mock_request.return_value = self._mock_response(status_code=200) + handler = ShopifyHandler("shopify", connection_data=self.CONNECTION_DATA) + + result = handler.test_passthrough() + + self.assertTrue(result["ok"]) + self.assertEqual(result["status_code"], 200) + self.assertEqual(result["host"], "test-shop.myshopify.com") + self.assertIsInstance(result["latency_ms"], int) + # The probe should hit the version-less endpoint so it survives + # Shopify's quarterly Admin API version retirements. + self.assertEqual(mock_request.call_args[0][1], "https://test-shop.myshopify.com/admin/shop.json") + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_test_passthrough_returns_auth_failed_on_401(self, mock_request): + mock_request.return_value = self._mock_response(status_code=401) + handler = ShopifyHandler("shopify", connection_data=self.CONNECTION_DATA) + + result = handler.test_passthrough() + + self.assertFalse(result["ok"]) + self.assertEqual(result["error_code"], "auth_failed") + self.assertEqual(result["status_code"], 401) + self.assertEqual(result["host"], "test-shop.myshopify.com") + + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/handlers/test_snowflake.py b/tests/unit/handlers/test_snowflake.py index e43aec5eac5..07c0c87b040 100644 --- a/tests/unit/handlers/test_snowflake.py +++ b/tests/unit/handlers/test_snowflake.py @@ -16,11 +16,16 @@ import numpy as np import pandas as pd from pandas import DataFrame -from types import SimpleNamespace from base_handler_test import BaseDatabaseHandlerTest -from mindsdb.integrations.libs.response import HandlerResponse as Response, INF_SCHEMA_COLUMNS_NAMES_SET, RESPONSE_TYPE +from mindsdb.integrations.libs.response import ( + OkResponse, + TableResponse, + ErrorResponse, + INF_SCHEMA_COLUMNS_NAMES_SET, + RESPONSE_TYPE, +) from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE @@ -246,8 +251,7 @@ def test_native_query_with_results(self): mock_cursor.fetch_pandas_batches.assert_called_once() mock_cursor.fetchall.assert_not_called() - self.assertIsInstance(data, Response) - self.assertFalse(data.error_code) + self.assertIsInstance(data, TableResponse) self.assertEqual(data.type, RESPONSE_TYPE.TABLE) self.assertIsInstance(data.data_frame, DataFrame) self.assertListEqual(list(data.data_frame.columns), expected_columns) @@ -285,8 +289,7 @@ def test_native_query_no_results(self): mock_cursor.execute.assert_called_once_with(query_str) mock_cursor.fetch_pandas_batches.assert_called_once() - self.assertIsInstance(data, Response) - self.assertFalse(data.error_code) + self.assertIsInstance(data, OkResponse) self.assertEqual(data.type, RESPONSE_TYPE.OK) self.assertEqual(data.affected_rows, 1) @@ -350,7 +353,7 @@ def test_native_query_error(self): mock_conn.cursor.assert_called_once() mock_cursor.execute.assert_called_once_with(query_str) - self.assertIsInstance(data, Response) + self.assertIsInstance(data, ErrorResponse) self.assertEqual(data.type, RESPONSE_TYPE.ERROR) self.assertIn(error_msg, data.error_message) @@ -376,33 +379,11 @@ def test_native_query_releases_memory_pool_when_jemalloc(self): mock_pool.backend_name = "jemalloc" mock_pool.release_unused = MagicMock() - response = self.handler.native_query("SELECT 1") + response = self.handler.native_query("SELECT 1", stream=False) self.assertEqual(response.type, RESPONSE_TYPE.TABLE) mock_pool.release_unused.assert_called_once() - def test_native_query_memory_estimation_error(self): - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__.return_value = mock_cursor - mock_cursor.__exit__.return_value = None - large_df = DataFrame({"ID": range(1500)}) - mock_cursor.fetch_pandas_batches.return_value = iter([large_df]) - mock_cursor.description = [ColumnDescription(name="ID", type_code=0, scale=0)] - mock_cursor.rowcount = 10000 - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor.return_value = mock_cursor - - with patch( - "mindsdb.integrations.handlers.snowflake_handler.snowflake_handler.psutil.virtual_memory", - return_value=SimpleNamespace(available=512), - ): - response = self.handler.native_query("SELECT * FROM big_table") - - self.assertEqual(response.type, RESPONSE_TYPE.ERROR) - self.assertIn("query result is too large", response.error_message) - def test_key_pair_authentication_success(self): """ Tests successful connection using key pair authentication @@ -645,7 +626,7 @@ def test_query_method(self): renderer_mock.get_string.return_value = "SELECT * FROM test_table_rendered" self.handler.native_query = MagicMock() - expected_response = Response(RESPONSE_TYPE.TABLE) + expected_response = TableResponse(data=DataFrame()) self.handler.native_query.return_value = expected_response try: @@ -673,11 +654,8 @@ def test_get_tables(self): """ Tests that get_tables calls native_query with the correct SQL for Snowflake """ - expected_response = Response( - RESPONSE_TYPE.TABLE, - data_frame=DataFrame( - [("table1", "SCHEMA1", "BASE TABLE")], columns=["TABLE_NAME", "TABLE_SCHEMA", "TABLE_TYPE"] - ), + expected_response = TableResponse( + data=DataFrame([("table1", "SCHEMA1", "BASE TABLE")], columns=["TABLE_NAME", "TABLE_SCHEMA", "TABLE_TYPE"]) ) self.handler.native_query = MagicMock(return_value=expected_response) @@ -751,7 +729,7 @@ def test_get_columns(self): ] expected_df = DataFrame(expected_df_data, columns=query_columns) - expected_response = Response(RESPONSE_TYPE.TABLE, data_frame=expected_df) + expected_response = TableResponse(data=expected_df) self.handler.native_query = MagicMock(return_value=expected_response) table_name = "test_table" @@ -794,7 +772,7 @@ def test_meta_get_tables_casts_rowcount(self): } ] ) - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=df)) + self.handler.native_query = MagicMock(return_value=TableResponse(data=df)) result = self.handler.meta_get_tables(table_names=["orders"]) @@ -815,7 +793,7 @@ def test_meta_get_columns_filters(self): } ] ) - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=df)) + self.handler.native_query = MagicMock(return_value=TableResponse(data=df)) result = self.handler.meta_get_columns(table_names=["orders"]) @@ -849,8 +827,8 @@ def test_meta_get_column_statistics_success(self): ) self.handler.native_query = MagicMock( side_effect=[ - Response(RESPONSE_TYPE.TABLE, data_frame=columns_df), - Response(RESPONSE_TYPE.TABLE, data_frame=stats_df), + TableResponse(data=columns_df), + TableResponse(data=stats_df), ] ) @@ -864,9 +842,7 @@ def test_meta_get_column_statistics_success(self): self.assertEqual(id_stats["maximum_value"], 10) def test_meta_get_column_statistics_handles_error_response(self): - self.handler.native_query = MagicMock( - return_value=Response(RESPONSE_TYPE.ERROR, error_message="boom", data_frame=None) - ) + self.handler.native_query = MagicMock(return_value=ErrorResponse(error_message="boom")) result = self.handler.meta_get_column_statistics(table_names=["orders"]) self.assertEqual(result.type, RESPONSE_TYPE.ERROR) @@ -877,7 +853,7 @@ def test_meta_get_primary_keys_filters(self): {"table_name": "CUSTOMERS", "column_name": "ID", "key_sequence": 1, "constraint_name": "PK_CUSTOMERS"}, ] ) - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=df)) + self.handler.native_query = MagicMock(return_value=TableResponse(data=df)) result = self.handler.meta_get_primary_keys(table_names=["ORDERS"]) @@ -909,12 +885,17 @@ def test_meta_get_foreign_keys_filters(self): }, ] ) - self.handler.native_query = MagicMock(return_value=Response(RESPONSE_TYPE.TABLE, data_frame=df)) + self.handler.native_query = MagicMock(return_value=TableResponse(data=df)) result = self.handler.meta_get_foreign_keys(table_names=["ORDERS", "CUSTOMERS"]) self.assertEqual(len(result.data_frame), 1) self.assertIn("child_table_name", result.data_frame.columns) + row = result.data_frame.iloc[0] + self.assertEqual(row["parent_table_name"], "ORDERS") + self.assertEqual(row["parent_column_name"], "CUSTOMER_ID") + self.assertEqual(row["child_table_name"], "CUSTOMERS") + self.assertEqual(row["child_column_name"], "ID") def test_meta_get_foreign_keys_handles_exception(self): self.handler.native_query = MagicMock(side_effect=Exception("boom")) @@ -1195,7 +1176,8 @@ def test_types_casting(self): ] response = self.handler.native_query(query_str) - self.assertEqual(response.mysql_types, excepted_mysql_types) + actual_mysql_types = [col.type for col in response.columns] + self.assertEqual(actual_mysql_types, excepted_mysql_types) for column_name in input_data.columns: result_value = response.data_frame[column_name][0] self.assertEqual(result_value, input_data[column_name][0]) @@ -1346,7 +1328,8 @@ def test_types_casting(self): ] response = self.handler.native_query(query_str) - self.assertEqual(response.mysql_types, excepted_mysql_types) + actual_mysql_types = [col.type for col in response.columns] + self.assertEqual(actual_mysql_types, excepted_mysql_types) for column_name in input_data.columns: result_value = response.data_frame[column_name][0] self.assertEqual(result_value, input_data[column_name][0]) @@ -1380,7 +1363,8 @@ def test_types_casting(self): excepted_mysql_types = [MYSQL_DATA_TYPE.BOOLEAN] response = self.handler.native_query(query_str) - self.assertEqual(response.mysql_types, excepted_mysql_types) + actual_mysql_types = [col.type for col in response.columns] + self.assertEqual(actual_mysql_types, excepted_mysql_types) for column_name in input_data.columns: result_value = response.data_frame[column_name][0] self.assertEqual(result_value, input_data[column_name][0]) @@ -1616,7 +1600,8 @@ def test_types_casting(self): } ) response = self.handler.native_query(query_str) - self.assertEqual(response.mysql_types, excepted_mysql_types) + actual_mysql_types = [col.type for col in response.columns] + self.assertEqual(actual_mysql_types, excepted_mysql_types) self.assertTrue(response.data_frame.equals(expected_result_df)) # endregion @@ -1679,7 +1664,8 @@ def test_types_casting(self): } ) response = self.handler.native_query(query_str) - self.assertEqual(response.mysql_types, excepted_mysql_types) + actual_mysql_types = [col.type for col in response.columns] + self.assertEqual(actual_mysql_types, excepted_mysql_types) self.assertTrue(response.data_frame.equals(expected_result_df)) # endregion diff --git a/tests/unit/handlers/test_timescaledb.py b/tests/unit/handlers/test_timescaledb.py index 32c3efb46de..52cbd771908 100644 --- a/tests/unit/handlers/test_timescaledb.py +++ b/tests/unit/handlers/test_timescaledb.py @@ -7,22 +7,19 @@ from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager from mindsdb.integrations.handlers.timescaledb_handler.timescaledb_handler import TimeScaleDBHandler -from mindsdb.integrations.libs.response import ( - HandlerResponse as Response -) +from mindsdb.integrations.libs.response import DataHandlerResponse as Response class TestTimescaleHandler(BaseDatabaseHandlerTest, unittest.TestCase): - @property def dummy_connection_data(self): return OrderedDict( - host='127.0.0.1', + host="127.0.0.1", port=5432, - user='example_user', - schema='public', - password='example_pass', - database='example_db' + user="example_user", + schema="public", + password="example_pass", + database="example_db", ) @property @@ -69,10 +66,10 @@ def get_columns_query(self): """ def create_handler(self): - return TimeScaleDBHandler('timescaledb', connection_data=self.dummy_connection_data) + return TimeScaleDBHandler("timescaledb", connection_data=self.dummy_connection_data) def create_patcher(self): - return patch('psycopg.connect') + return patch("psycopg.connect") def test_native_query(self): """ @@ -99,5 +96,5 @@ def test_native_query(self): self.assertFalse(data.error_code) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/unit/handlers/test_web.py b/tests/unit/handlers/test_web.py index 78c1a9e2617..cb34256e15d 100644 --- a/tests/unit/handlers/test_web.py +++ b/tests/unit/handlers/test_web.py @@ -9,22 +9,27 @@ from bs4 import BeautifulSoup from mindsdb.integrations.libs.api_handler_exceptions import TableAlreadyExists -from mindsdb.integrations.handlers.web_handler.web_handler import WebHandler -from mindsdb.integrations.handlers.web_handler.web_handler import CrawlerTable -from mindsdb.integrations.handlers.web_handler import urlcrawl_helpers as helpers +try: + from mindsdb.integrations.handlers.web_handler.web_handler import WebHandler + from mindsdb.integrations.handlers.web_handler.web_handler import CrawlerTable + from mindsdb.integrations.handlers.web_handler import urlcrawl_helpers as helpers -from mindsdb.integrations.utilities.sql_utils import (FilterCondition, FilterOperator) + WEB_HANDLER_AVAILABLE = True +except ImportError: + WEB_HANDLER_AVAILABLE = False +from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator -class TestWebsHandler(unittest.TestCase): +@pytest.mark.skipif(not WEB_HANDLER_AVAILABLE, reason="web_handler not installed (community handler)") +class TestWebsHandler(unittest.TestCase): def setUp(self) -> None: - self.handler = WebHandler(name='test_web_handler') + self.handler = WebHandler(name="test_web_handler") def test_crawler_already_registered(self): with self.assertRaises(TableAlreadyExists): - self.handler._register_table('crawler', CrawlerTable) + self.handler._register_table("crawler", CrawlerTable) PDF_CONTENT = ( @@ -43,6 +48,7 @@ def test_crawler_already_registered(self): MARKDOWN_SAMPLE_1 = "# Heading One \n\n ## Heading Two" +@pytest.mark.skipif(not WEB_HANDLER_AVAILABLE, reason="web_handler not installed (community handler)") class TestWebHelpers(unittest.TestCase): @patch("requests.Response") def test_pdf_to_markdown(self, mock_response) -> None: @@ -60,14 +66,15 @@ def test_broken_pdf_to_markdown(self, mock_response) -> None: helpers.pdf_to_markdown(response) def test_url_validation(self): - assert helpers.is_valid('https://google.com') is True - assert helpers.is_valid('google.com') is False + assert helpers.is_valid("https://google.com") is True + assert helpers.is_valid("google.com") is False def test_get_readable_text_from_soup(self) -> None: soup = BeautifulSoup(HTML_SAMPLE_1, "html.parser") import re - expected = re.sub(r'\s+', ' ', MARKDOWN_SAMPLE_1).strip() - actual = re.sub(r'\s+', ' ', helpers.get_readable_text_from_soup(soup)).strip() + + expected = re.sub(r"\s+", " ", MARKDOWN_SAMPLE_1).strip() + actual = re.sub(r"\s+", " ", helpers.get_readable_text_from_soup(soup)).strip() assert expected == actual @@ -104,12 +111,9 @@ def test_parallel_get_all_website_links(self, mock_executor, mock_get_links): def html_get(url, **kwargs): # generate html page with 10 sub-links in the same domain - if not url.endswith('/'): - url = url + '/' - links = [ - f"link {i}\n" - for i in range(10) - ] + if not url.endswith("/"): + url = url + "/" + links = [f"link {i}\n" for i in range(10)] html = f""" @@ -128,24 +132,23 @@ def html_get(url, **kwargs): return resp +@pytest.mark.skipif(not WEB_HANDLER_AVAILABLE, reason="web_handler not installed (community handler)") class TestWebHandler(unittest.TestCase): - - @patch('requests.Session.get') + @patch("requests.Session.get") def test_web_cases(self, mock_get): - mock_get.side_effect = html_get crawler_table = CrawlerTable(handler=MagicMock()) # filters - single_url = FilterCondition('url', FilterOperator.EQUAL, 'https://docs.mindsdb.com/') - two_urls = FilterCondition('url', FilterOperator.IN, ('https://docs.mindsdb.com/', 'https://docs.python.org/')) + single_url = FilterCondition("url", FilterOperator.EQUAL, "https://docs.mindsdb.com/") + two_urls = FilterCondition("url", FilterOperator.IN, ("https://docs.mindsdb.com/", "https://docs.python.org/")) - depth_0 = FilterCondition('crawl_depth', FilterOperator.EQUAL, 0) - depth_1 = FilterCondition('crawl_depth', FilterOperator.EQUAL, 1) - depth_2 = FilterCondition('crawl_depth', FilterOperator.EQUAL, 2) + depth_0 = FilterCondition("crawl_depth", FilterOperator.EQUAL, 0) + depth_1 = FilterCondition("crawl_depth", FilterOperator.EQUAL, 1) + depth_2 = FilterCondition("crawl_depth", FilterOperator.EQUAL, 2) - per_url_2 = FilterCondition('per_url_limit', FilterOperator.EQUAL, 2) + per_url_2 = FilterCondition("per_url_limit", FilterOperator.EQUAL, 2) # ---- single url ----- diff --git a/mindsdb/integrations/handlers/altibase_handler/tests/__init__.py b/tests/unit/integrations/__init__.py similarity index 100% rename from mindsdb/integrations/handlers/altibase_handler/tests/__init__.py rename to tests/unit/integrations/__init__.py diff --git a/mindsdb/integrations/handlers/aqicn_handler/tests/__init__.py b/tests/unit/integrations/libs/__init__.py similarity index 100% rename from mindsdb/integrations/handlers/aqicn_handler/tests/__init__.py rename to tests/unit/integrations/libs/__init__.py diff --git a/tests/unit/integrations/libs/test_response.py b/tests/unit/integrations/libs/test_response.py new file mode 100644 index 00000000000..18aa870d939 --- /dev/null +++ b/tests/unit/integrations/libs/test_response.py @@ -0,0 +1,671 @@ +"""Unit tests for response classes in mindsdb.integrations.libs.response module. + +This module tests all response types used by handlers: +- TableResponse: for queries that return data (SELECT, SHOW, etc.) +- OkResponse: for successful operations without data (CREATE, DROP, etc.) +- ErrorResponse: for error cases +- HandlerStatusResponse: for connection status checks +- normalize_response: for converting legacy HandlerResponse to new types +- _safe_pandas_concat: memory-safe DataFrame concatenation +""" + +from unittest.mock import patch, MagicMock + +import pandas as pd +import pytest + +from mindsdb.integrations.libs.response import ( + TableResponse, + OkResponse, + ErrorResponse, + HandlerStatusResponse, + HandlerResponse, + normalize_response, + _safe_pandas_concat, + RESPONSE_TYPE, + DataHandlerResponse, +) +from mindsdb.utilities.types.column import Column +from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE + + +def _mock_virtual_memory(available_kb: int): + """Create a mock for psutil.virtual_memory() with given available memory in KB.""" + mock_mem = MagicMock() + mock_mem.available = available_kb << 10 # convert KB back to bytes + return mock_mem + + +class TestHandlerStatusResponse: + """Tests for HandlerStatusResponse class.""" + + def test_init_success(self): + """Test initialization with success status.""" + redirect_url = "https://example.com/auth" + copy_storage = "s3://bucket/path" + response = HandlerStatusResponse(success=True, redirect_url=redirect_url, copy_storage=copy_storage) + + assert response.success is True + assert response.error_message is None + assert response.redirect_url == redirect_url + assert response.copy_storage == copy_storage + + json_data = response.to_json() + assert json_data["success"] is True + assert json_data["error"] is None + assert json_data["redirect_url"] == redirect_url + assert json_data["copy_storage"] == copy_storage + + def test_init_failure(self): + """Test initialization with failure status.""" + error_msg = "Connection failed" + response = HandlerStatusResponse(success=False, error_message=error_msg) + + assert response.success is False + assert response.error_message == error_msg + assert response.redirect_url is None + assert response.copy_storage is None + + json_data = response.to_json() + assert json_data["success"] is False + assert json_data["error"] == error_msg + assert "redirect_url" not in json_data + assert "copy_storage" not in json_data + + +class TestErrorResponse: + """Unit tests for ErrorResponse class.""" + + def test_init_basic(self): + """Test basic initialization.""" + response = ErrorResponse(error_code=1, error_message="Test error", is_expected_error=True) + + assert response.type == RESPONSE_TYPE.ERROR + assert response.resp_type == RESPONSE_TYPE.ERROR + assert response.error_code == 1 + assert response.error_message == "Test error" + assert response.is_expected_error is True + assert response.exception is None + assert isinstance(response, DataHandlerResponse) + + def test_exception_capture(self): + """Test that exception is captured from current context.""" + try: + raise ValueError("Test exception") + except ValueError: + response = ErrorResponse(error_message="Caught exception") + assert response.exception is not None + assert isinstance(response.exception, ValueError) + + +class TestOkResponse: + """Unit tests for OkResponse class.""" + + def test_init(self): + """Test initialization with affected rows count.""" + response = OkResponse(affected_rows=5) + + assert response.type == RESPONSE_TYPE.OK + assert response.resp_type == RESPONSE_TYPE.OK + assert response.affected_rows == 5 + assert isinstance(response, DataHandlerResponse) + + def test_init_without_affected_rows(self): + """Test initialization without affected rows.""" + response = OkResponse() + + assert response.affected_rows is None + + +class TestTableResponse: + """Unit tests for TableResponse class.""" + + def test_init_with_data(self): + """Test initialization with DataFrame.""" + df = pd.DataFrame({"id": [1, 2, 3], "name": ["a", "b", "c"]}) + response = TableResponse(data=df) + + assert response.type == RESPONSE_TYPE.TABLE + assert response.resp_type == RESPONSE_TYPE.TABLE + assert response._fetched is True + pd.testing.assert_frame_equal(response._data, df) + # 'columns' was not provided as attr, so should be as in df + assert [c.name for c in response.columns] == ["id", "name"] + + def test_complex_init_with_generator(self): + """Test initialization with data generator.""" + column1 = Column(name="id", type=MYSQL_DATA_TYPE.INT) + column2 = Column(name="name", type=MYSQL_DATA_TYPE.VARCHAR) + columns = [column1, column2] + df = pd.DataFrame({"id": [0, 1], "name": ["a", "b"]}) + df1 = pd.DataFrame({"id": [2, 3], "name": ["d", "e"]}) + df2 = pd.DataFrame({"id": [4, 5], "name": ["f", "g"]}) + + def data_gen(): + yield df1 + yield df2 + + response = TableResponse(data=df, data_generator=data_gen(), columns=columns) + + assert response.columns[0] is column1 + assert response.columns[1] is column2 + assert response.data_generator is not None + pd.testing.assert_frame_equal(response._data, df) + assert response._fetched is False + pieces = [] + while isinstance(el := response.fetchmany(), pd.DataFrame): + pieces.append(el) + pd.testing.assert_frame_equal(pieces[0], df1) + pd.testing.assert_frame_equal(pieces[1], df2) + pd.testing.assert_frame_equal(response._data, pd.concat([df, df1, df2])) + assert response._fetched is True + assert response.data_generator is None + + def test_data_frame_property(self): + """Test initialization with explicit columns.""" + columns = [Column(name="id", type=MYSQL_DATA_TYPE.INT), Column(name="name", type=MYSQL_DATA_TYPE.VARCHAR)] + df = pd.DataFrame({"id": [0, 1], "name": ["a", "b"]}) + df1 = pd.DataFrame({"id": [2, 3], "name": ["d", "e"]}) + df2 = pd.DataFrame({"id": [4, 5], "name": ["f", "g"]}) + + def data_gen(): + yield df1 + yield df2 + + response = TableResponse(data=df, data_generator=data_gen(), columns=columns) + assert response._fetched is False + pd.testing.assert_frame_equal(response._data, df) + pd.testing.assert_frame_equal(response.data_frame, pd.concat([df, df1, df2])) + assert response._fetched is True + + # should not change result + response.fetchall() + pd.testing.assert_frame_equal(response.data_frame, pd.concat([df, df1, df2])) + + def test_init_with_affected_rows(self): + """Test initialization with affected_rows.""" + df = pd.DataFrame({"id": [1, 2, 3]}) + response = TableResponse(data=df, affected_rows=100) + + assert response.affected_rows == 100 + + def test_iterate_no_save_no_generator(self): + """Test iterate_no_save yields existing data.""" + df = pd.DataFrame({"id": [1, 2, 3]}) + # Need to provide a generator (even empty) to avoid TypeError + response = TableResponse(data=df, data_generator=iter([])) + + chunks = list(response.iterate_no_save()) + + assert len(chunks) == 1 + pd.testing.assert_frame_equal(chunks[0], df) + + # after `iterate_no_save` result should be invalid + with pytest.raises(ValueError): + pd.testing.assert_frame_equal(response.data_frame, df) + + def test_iterate_no_save_with_generator(self): + """Test iterate_no_save yields all chunks without saving.""" + df1 = pd.DataFrame({"id": [4, 5]}) + df2 = pd.DataFrame({"id": [6, 7]}) + + def data_gen(): + yield df1 + yield df2 + + df = pd.DataFrame({"id": [1, 2, 3]}) + response = TableResponse(data=df, data_generator=data_gen()) + chunks = list(response.iterate_no_save()) + + assert len(chunks) == 3 + pd.testing.assert_frame_equal(chunks[0], df) + pd.testing.assert_frame_equal(chunks[1], df1) + pd.testing.assert_frame_equal(chunks[2], df2) + + # after `iterate_no_save` result should be invalid + with pytest.raises(ValueError): + pd.testing.assert_frame_equal(response.data_frame, df) + + +class TestNormalizeResponse: + """Unit tests for normalize_response function.""" + + def test_normalize_table_response(self): + """Test that TableResponse is returned as-is.""" + original = TableResponse(data=pd.DataFrame({"id": [1, 2]})) + result = normalize_response(original) + + assert result is original + + def test_normalize_ok_response(self): + """Test that OkResponse is returned as-is.""" + original = OkResponse(affected_rows=5) + result = normalize_response(original) + + assert result is original + + def test_normalize_error_response(self): + """Test that ErrorResponse is returned as-is.""" + original = ErrorResponse(error_message="Test error") + result = normalize_response(original) + + assert result is original + + def test_normalize_legacy_error_response(self): + """Test conversion of legacy HandlerResponse with ERROR type.""" + legacy = HandlerResponse(resp_type=RESPONSE_TYPE.ERROR, error_code=1, error_message="Legacy error") + result = normalize_response(legacy) + + assert isinstance(result, ErrorResponse) + assert result.error_code == 1 + assert result.error_message == "Legacy error" + + def test_normalize_legacy_ok_response(self): + """Test conversion of legacy HandlerResponse with OK type.""" + legacy = HandlerResponse(resp_type=RESPONSE_TYPE.OK, affected_rows=10) + result = normalize_response(legacy) + + assert isinstance(result, OkResponse) + assert result.affected_rows == 10 + + def test_normalize_legacy_table_response(self): + """Test conversion of legacy HandlerResponse with TABLE type.""" + df = pd.DataFrame({"id": [1, 2], "name": ["a", "b"]}) + legacy = HandlerResponse(resp_type=RESPONSE_TYPE.TABLE, data_frame=df) + result = normalize_response(legacy) + + assert isinstance(result, TableResponse) + pd.testing.assert_frame_equal(result.data_frame, df) + + def test_normalize_legacy_table_response_with_mysql_types(self): + """Test conversion preserves mysql_types as column types.""" + df = pd.DataFrame({"id": [1, 2], "name": ["a", "b"]}) + mysql_types = [MYSQL_DATA_TYPE.INT, MYSQL_DATA_TYPE.VARCHAR] + legacy = HandlerResponse(resp_type=RESPONSE_TYPE.TABLE, data_frame=df, mysql_types=mysql_types) + result = normalize_response(legacy) + + assert isinstance(result, TableResponse) + assert len(result.columns) == 2 + assert result.columns[0].type == MYSQL_DATA_TYPE.INT + assert result.columns[1].type == MYSQL_DATA_TYPE.VARCHAR + + def test_normalize_legacy_table_response_empty_dataframe(self): + """Test conversion with empty DataFrame.""" + df = pd.DataFrame() + legacy = HandlerResponse(resp_type=RESPONSE_TYPE.TABLE, data_frame=df) + result = normalize_response(legacy) + + assert isinstance(result, TableResponse) + assert len(result.columns) == 0 + + +class TestSafePandasConcat: + """Unit tests for _safe_pandas_concat function.""" + + @patch("mindsdb.integrations.libs.response.psutil") + def test_concat_with_enough_memory(self, mock_psutil): + """Test successful concatenation when sufficient memory is available.""" + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) + + df1 = pd.DataFrame({"id": [1, 2]}) + df2 = pd.DataFrame({"id": [3, 4]}) + result = _safe_pandas_concat([df1, df2]) + + pd.testing.assert_frame_equal(result, pd.concat([df1, df2])) + + @patch("mindsdb.integrations.libs.response.psutil") + def test_concat_raises_memory_error_when_not_enough_memory(self, mock_psutil): + """Test MemoryError is raised when available memory is too low.""" + # Set available memory to essentially 0 + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=10) + + df1 = pd.DataFrame({"x": list(range(1000))}) + df2 = pd.DataFrame({"x": list(range(1000))}) + + with pytest.raises(MemoryError): + _safe_pandas_concat([df1, df2]) + + @patch("mindsdb.integrations.libs.response.psutil") + def test_concat_single_piece(self, mock_psutil): + """Test concatenation with a single DataFrame.""" + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) + + df = pd.DataFrame({"id": [1, 2, 3]}) + result = _safe_pandas_concat([df]) + + pd.testing.assert_frame_equal(result, df) + + +class TestRaiseIfLowMemory: + """Unit tests for TableResponse._raise_if_low_memory method.""" + + @patch("mindsdb.integrations.libs.response.psutil") + def test_with_known_affected_rows_enough_memory(self, mock_psutil): + """Test no error when affected_rows is known and memory is sufficient.""" + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) + + response = TableResponse(data=pd.DataFrame({"id": [1, 2]}), affected_rows=100) + response._last_data_piece = pd.DataFrame({"id": list(range(10))}) + response.rows_fetched = 10 + + # Should not raise + response._raise_if_low_memory() + + @patch("mindsdb.integrations.libs.response.psutil") + def test_with_known_affected_rows_not_enough_memory(self, mock_psutil): + """Test MemoryError when affected_rows is known and memory is insufficient.""" + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1) + + # Use strings to ensure DataFrame memory > 1KB after >> 10 + large_piece = pd.DataFrame({"text": ["x" * 200 for _ in range(100)]}) + response = TableResponse(data=pd.DataFrame({"text": ["a"]}), affected_rows=1000) + response._last_data_piece = large_piece + response.rows_fetched = 100 + + with pytest.raises(MemoryError, match="Not enough memory"): + response._raise_if_low_memory() + + @patch("mindsdb.integrations.libs.response.psutil") + def test_with_unknown_affected_rows_enough_memory(self, mock_psutil): + """Test no error when affected_rows is None and memory is sufficient.""" + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) + + response = TableResponse(data=pd.DataFrame({"id": [1, 2]})) + response._last_data_piece = pd.DataFrame({"id": list(range(10))}) + + # Should not raise + response._raise_if_low_memory() + + @patch("mindsdb.integrations.libs.response.psutil") + def test_with_unknown_affected_rows_not_enough_memory(self, mock_psutil): + """Test MemoryError when affected_rows is None and memory is insufficient.""" + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1) + + # Use strings to ensure DataFrame memory > 1KB after >> 10 + large_piece = pd.DataFrame({"text": ["x" * 200 for _ in range(100)]}) + response = TableResponse(data=pd.DataFrame({"text": ["a"]})) + response._last_data_piece = large_piece + + with pytest.raises(MemoryError, match="Not enough memory"): + response._raise_if_low_memory() + + @patch("mindsdb.integrations.libs.response.psutil") + def test_all_rows_already_fetched(self, mock_psutil): + """Test no error when all rows have been fetched (rows_expected = 0).""" + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=0) + + response = TableResponse(data=pd.DataFrame({"id": [1, 2]}), affected_rows=10) + response._last_data_piece = pd.DataFrame({"id": list(range(10))}) + response.rows_fetched = 10 # all rows fetched + + # rows_expected = min(10 - 10, 10) = 0, should not raise + response._raise_if_low_memory() + + +class TestIterateWithMemoryCheck: + """Unit tests for TableResponse._iterate_with_memory_check method.""" + + def test_none_generator_yields_nothing(self): + """Test that no chunks are yielded when data_generator is None.""" + response = TableResponse(data=pd.DataFrame({"id": [1]})) + assert response._data_generator is None + + chunks = list(response._iterate_with_memory_check()) + assert chunks == [] + + @patch("mindsdb.integrations.libs.response.psutil") + def test_normal_iteration(self, mock_psutil): + """Test that all chunks are yielded during normal iteration.""" + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) + + df1 = pd.DataFrame({"id": [1, 2]}) + df2 = pd.DataFrame({"id": [3, 4]}) + + def data_gen(): + yield df1 + yield df2 + + columns = [Column(name="id")] + response = TableResponse(data_generator=data_gen(), columns=columns) + + chunks = list(response._iterate_with_memory_check()) + + assert len(chunks) == 2 + pd.testing.assert_frame_equal(chunks[0], df1) + pd.testing.assert_frame_equal(chunks[1], df2) + + @patch("mindsdb.integrations.libs.response.psutil") + def test_memory_error_stops_iteration_after_first_chunk(self, mock_psutil): + """Test that MemoryError is raised after the first chunk when memory runs out. + + The pre-loop _raise_if_low_memory() is a no-op (since _last_data_piece is None), + so the first real psutil.virtual_memory() call happens at the post-yield check. + """ + # Use strings to ensure DataFrame memory > 1KB after >> 10 + df1 = pd.DataFrame({"text": ["x" * 200 for _ in range(100)]}) + df2 = pd.DataFrame({"text": ["y" * 200 for _ in range(100)]}) + + def data_gen(): + yield df1 + yield df2 + + columns = [Column(name="text")] + response = TableResponse(data_generator=data_gen(), columns=columns) + + gen = response._iterate_with_memory_check() + + # First chunk succeeds β€” post-yield check will be the first real psutil call + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1) + first = next(gen) + pd.testing.assert_frame_equal(first, df1) + + # Resuming the generator triggers _raise_if_low_memory with 0 available memory + with pytest.raises(MemoryError): + next(gen) + + @patch("mindsdb.integrations.libs.response.psutil") + def test_updates_last_data_piece_and_rows_fetched(self, mock_psutil): + """Test that _last_data_piece and rows_fetched are updated during iteration.""" + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) + + df1 = pd.DataFrame({"id": [1, 2, 3]}) + df2 = pd.DataFrame({"id": [4, 5]}) + + def data_gen(): + yield df1 + yield df2 + + columns = [Column(name="id")] + response = TableResponse(data_generator=data_gen(), columns=columns) + assert response.rows_fetched == 0 + + list(response._iterate_with_memory_check()) + + pd.testing.assert_frame_equal(response._last_data_piece, df2) + assert response.rows_fetched == 5 + + +class TestTableResponseFetchallEdgeCases: + """Additional edge-case tests for TableResponse.fetchall.""" + + def test_fetchall_no_generator_returns_existing_data(self): + """Test fetchall returns existing data when no generator is set.""" + df = pd.DataFrame({"id": [1, 2, 3]}) + response = TableResponse(data=df) + + result = response.fetchall() + pd.testing.assert_frame_equal(result, df) + + @patch("mindsdb.integrations.libs.response.psutil") + def test_fetchall_generator_only_no_initial_data(self, mock_psutil): + """Test fetchall with generator but no initial data.""" + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) + + df1 = pd.DataFrame({"id": [1, 2]}) + df2 = pd.DataFrame({"id": [3, 4]}) + + def data_gen(): + yield df1 + yield df2 + + columns = [Column(name="id")] + response = TableResponse(data_generator=data_gen(), columns=columns) + + result = response.fetchall() + pd.testing.assert_frame_equal(result, pd.concat([df1, df2])) + assert response._fetched is True + assert response._data_generator is None + + @patch("mindsdb.integrations.libs.response.psutil") + def test_fetchall_empty_generator_creates_empty_df(self, mock_psutil): + """Test fetchall with empty generator creates DataFrame with column names.""" + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) + + columns = [Column(name="id"), Column(name="name")] + response = TableResponse(data_generator=iter([]), columns=columns) + + result = response.fetchall() + assert list(result.columns) == ["id", "name"] + assert len(result) == 0 + + def test_fetchall_raises_if_invalid(self): + """Test fetchall raises ValueError if data was already consumed by iterate_no_save.""" + df = pd.DataFrame({"id": [1]}) + response = TableResponse(data=df, data_generator=iter([])) + list(response.iterate_no_save()) + + with pytest.raises(ValueError, match="Data has already been fetched"): + response.fetchall() + + +class TestTableResponseFetchmanyEdgeCases: + """Additional edge-case tests for TableResponse.fetchmany.""" + + @patch("mindsdb.integrations.libs.response.psutil") + def test_fetchmany_first_piece_with_no_initial_data(self, mock_psutil): + """Test fetchmany sets _data directly when no initial data exists.""" + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) + + df1 = pd.DataFrame({"id": [1, 2]}) + columns = [Column(name="id")] + response = TableResponse(data_generator=iter([df1]), columns=columns) + + piece = response.fetchmany() + pd.testing.assert_frame_equal(piece, df1) + pd.testing.assert_frame_equal(response._data, df1) + + @patch("mindsdb.integrations.libs.response.psutil") + def test_fetchmany_accumulates_data(self, mock_psutil): + """Test fetchmany accumulates pieces in _data.""" + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) + + df = pd.DataFrame({"id": [0]}) + df1 = pd.DataFrame({"id": [1]}) + df2 = pd.DataFrame({"id": [2]}) + + def data_gen(): + yield df1 + yield df2 + + columns = [Column(name="id")] + response = TableResponse(data=df, data_generator=data_gen(), columns=columns) + + response.fetchmany() # df1 + response.fetchmany() # df2 + + pd.testing.assert_frame_equal(response._data, pd.concat([df, df1, df2])) + + @patch("mindsdb.integrations.libs.response.psutil") + def test_fetchmany_returns_none_when_exhausted(self, mock_psutil): + """Test fetchmany returns None and marks response as fetched when generator is empty.""" + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) + + df1 = pd.DataFrame({"id": [1]}) + columns = [Column(name="id")] + response = TableResponse(data_generator=iter([df1]), columns=columns) + + piece1 = response.fetchmany() + assert isinstance(piece1, pd.DataFrame) + + piece2 = response.fetchmany() + assert piece2 is None + assert response._fetched is True + assert response._data_generator is None + + def test_fetchmany_raises_if_invalid(self): + """Test fetchmany raises ValueError after iterate_no_save.""" + df = pd.DataFrame({"id": [1]}) + response = TableResponse(data=df, data_generator=iter([])) + list(response.iterate_no_save()) + + with pytest.raises(ValueError, match="Data has already been fetched"): + response.fetchmany() + + +class TestMemoryErrorPropagation: + """Tests for MemoryError propagation through fetchall, fetchmany, and iterate_no_save.""" + + @patch("mindsdb.integrations.libs.response.psutil") + def test_fetchall_raises_memory_error(self, mock_psutil): + """Test MemoryError propagates through fetchall.""" + # Enough memory for first chunk, then out of memory + mock_psutil.virtual_memory.side_effect = [ + _mock_virtual_memory(available_kb=1_000_000), # pre-loop check + _mock_virtual_memory(available_kb=0), # post-yield check + ] + + df1 = pd.DataFrame({"x": list(range(1000))}) + df2 = pd.DataFrame({"x": list(range(1000))}) + + def data_gen(): + yield df1 + yield df2 + + columns = [Column(name="x")] + response = TableResponse(data_generator=data_gen(), columns=columns) + + with pytest.raises(MemoryError): + response.fetchall() + + @patch("mindsdb.integrations.libs.response.psutil") + def test_fetchmany_raises_memory_error(self, mock_psutil): + """Test MemoryError propagates through fetchmany on second call.""" + df1 = pd.DataFrame({"x": list(range(1000))}) + df2 = pd.DataFrame({"x": list(range(1000))}) + + def data_gen(): + yield df1 + yield df2 + + columns = [Column(name="x")] + response = TableResponse(data_generator=data_gen(), columns=columns) + + # First fetchmany: enough memory (pre-loop check is no-op since _last_data_piece is None) + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) + response.fetchmany() + + # Second fetchmany: pre-loop check fails because we now have _last_data_piece set + mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=0) + with pytest.raises(MemoryError): + response.fetchmany() + + @patch("mindsdb.integrations.libs.response.psutil") + def test_iterate_no_save_raises_memory_error(self, mock_psutil): + """Test MemoryError propagates through iterate_no_save.""" + mock_psutil.virtual_memory.side_effect = [ + _mock_virtual_memory(available_kb=1_000_000), # pre-loop check + _mock_virtual_memory(available_kb=0), # post-yield check after first chunk + ] + + df1 = pd.DataFrame({"x": list(range(1000))}) + df2 = pd.DataFrame({"x": list(range(1000))}) + + def data_gen(): + yield df1 + yield df2 + + columns = [Column(name="x")] + response = TableResponse(data_generator=data_gen(), columns=columns) + + with pytest.raises(MemoryError): + list(response.iterate_no_save()) diff --git a/tests/unit/interfaces/agents/test_generic_api_key.py b/tests/unit/interfaces/agents/test_generic_api_key.py index 8198b763a08..3473aa05c70 100644 --- a/tests/unit/interfaces/agents/test_generic_api_key.py +++ b/tests/unit/interfaces/agents/test_generic_api_key.py @@ -1,9 +1,8 @@ import os import unittest -from unittest.mock import patch, MagicMock +from unittest.mock import patch from mindsdb.integrations.utilities.handler_utils import get_api_key -from mindsdb.interfaces.agents.agents_controller import AgentsController class TestGenericApiKeyHandling(unittest.TestCase): @@ -71,100 +70,6 @@ def test_get_generic_api_key_for_google_provider(self): ) self.assertEqual(api_key, "test-specific-google-api-key") - @patch("mindsdb.interfaces.agents.agents_controller.AgentsController.check_model_provider") - @patch("mindsdb.interfaces.agents.agents_controller.AgentsController.get_agent") - @patch("mindsdb.interfaces.agents.agents_controller.ProjectController") - @patch("mindsdb.interfaces.storage.db.session") - def test_add_agent_with_generic_api_key( - self, mock_session, mock_project_controller, mock_get_agent, mock_check_model_provider - ): - """Test adding an agent with a generic API key in params.""" - # Mock project controller - mock_project = MagicMock() - mock_project_controller.return_value.get.return_value = mock_project - - # Mock get_agent to return None (agent doesn't exist yet) - mock_get_agent.return_value = None - - # Mock check_model_provider to return a provider - mock_check_model_provider.return_value = (None, "openai") - - # Create an instance of AgentsController - agent_controller = AgentsController() - - # Test adding an agent with a generic API key in params - params = {"api_key": "test-generic-agent-api-key", "other_param": "value"} - - # Create a mock agent with proper params - mock_agent = MagicMock() - mock_agent.params = params.copy() # Set params directly - - # Mock db.Agents to return our prepared mock agent - with patch("mindsdb.interfaces.storage.db.Agents", return_value=mock_agent): - # Add the agent - agent = agent_controller.add_agent( - name="test_agent", - project_name="mindsdb", - model_name="gpt-4", - provider="openai", - params=params, - ) - - # Verify that the generic API key was preserved in the params - self.assertEqual(agent.params["api_key"], "test-generic-agent-api-key") - - @patch("mindsdb.interfaces.agents.agents_controller.AgentsController.check_model_provider") - @patch("mindsdb.interfaces.agents.agents_controller.AgentsController.get_agent") - @patch("mindsdb.interfaces.agents.agents_controller.ProjectController") - @patch("mindsdb.interfaces.storage.db.session") - def test_add_agent_with_both_api_keys( - self, mock_session, mock_project_controller, mock_get_agent, mock_check_model_provider - ): - """Test adding an agent with both generic and provider-specific API keys.""" - # Mock project controller - mock_project = MagicMock() - mock_project_controller.return_value.get.return_value = mock_project - - # Mock get_agent to return None (agent doesn't exist yet) - mock_get_agent.return_value = None - - # Mock check_model_provider to return a provider - mock_check_model_provider.return_value = (None, "openai") - - # Create an instance of AgentsController - agent_controller = AgentsController() - - # Test adding an agent with both generic and provider-specific API keys - params = { - "api_key": "test-generic-agent-api-key", - "openai_api_key": "test-specific-agent-api-key", - "other_param": "value", - } - - # Create a mock agent with proper params - mock_agent = MagicMock() - mock_agent.params = params.copy() # Set params directly - - # Mock db.Agents to return our prepared mock agent - with patch("mindsdb.interfaces.storage.db.Agents", return_value=mock_agent): - # Add the agent - agent = agent_controller.add_agent( - name="test_agent", - project_name="mindsdb", - model_name="gpt-4", - provider="openai", - params=params, - ) - - # Verify that both API keys were preserved in the params - self.assertEqual(agent.params["api_key"], "test-generic-agent-api-key") - self.assertEqual(agent.params["openai_api_key"], "test-specific-agent-api-key") - - # Test that get_api_key returns the provider-specific key when both are present - api_key = get_api_key("openai", {"params": params}) - - self.assertEqual(api_key, "test-specific-agent-api-key") - if __name__ == "__main__": unittest.main() diff --git a/tests/unit/interfaces/knowledge_base/test_default_storage_resolution.py b/tests/unit/interfaces/knowledge_base/test_default_storage_resolution.py new file mode 100644 index 00000000000..6543ef28f4a --- /dev/null +++ b/tests/unit/interfaces/knowledge_base/test_default_storage_resolution.py @@ -0,0 +1,79 @@ +import os +from types import SimpleNamespace +from unittest.mock import MagicMock +from unittest.mock import patch + +from mindsdb.interfaces.knowledge_base.controller import KnowledgeBaseController +from mindsdb.interfaces.knowledge_base.default_storage_resolver import resolve_default_storage_engines +from mindsdb.utilities.config import config + + +def _make_controller(handler_meta_by_name): + integration_controller = MagicMock() + integration_controller.get_handler_meta.side_effect = lambda name: handler_meta_by_name.get(name) + integration_controller.get.return_value = None + + session = SimpleNamespace(integration_controller=integration_controller) + return KnowledgeBaseController(session), integration_controller + + +def test_resolve_default_vector_storage_uses_pgvector_from_config(): + previous_storage = config["knowledge_bases"].get("storage", None) + controller, _ = _make_controller({"pgvector": {"import": {"success": True}}}) + + try: + config.update({"knowledge_bases": {"storage": "pgvector"}}) + vector_db_name = "kb_pgvector_store" + controller._create_persistent_pgvector = MagicMock(return_value=vector_db_name) + + vector_db, vector_table = controller._resolve_default_vector_storage("kb_docs") + + assert vector_db == vector_db_name + assert vector_table == "kb_docs" + controller._create_persistent_pgvector.assert_called_once_with({}) + finally: + config.update({"knowledge_bases": {"storage": previous_storage}}) + + +def test_resolve_default_vector_storage_uses_faiss_from_config(): + previous_storage = config["knowledge_bases"].get("storage", None) + controller, _ = _make_controller({"duckdb_faiss": {"import": {"success": True}}}) + + try: + config.update({"knowledge_bases": {"storage": "faiss"}}) + + vector_db_name = "store_kb_docs" + controller._create_persistent_faiss = MagicMock(return_value=vector_db_name) + + vector_db, vector_table = controller._resolve_default_vector_storage("kb_docs") + + assert vector_db == vector_db_name + assert vector_table == "kb_docs" + controller._create_persistent_faiss.assert_called_once_with("kb_docs") + finally: + config.update({"knowledge_bases": {"storage": previous_storage}}) + + +def test_create_persistent_pgvector_reuses_existing_store(): + controller, integration_controller = _make_controller({}) + integration_controller.get.return_value = {"name": "kb_pgvector_store"} + + vector_store_name = controller._create_persistent_pgvector({"is_sparse": True, "vector_size": 30522}) + + assert vector_store_name == "kb_pgvector_store" + integration_controller.add.assert_not_called() + + +def test_resolver_uses_pgvector_url_fallback_when_storage_is_empty(): + previous_storage = config["knowledge_bases"].get("storage", None) + controller, _ = _make_controller({}) + + try: + config.update({"knowledge_bases": {"storage": None}}) + with patch.dict(os.environ, {"KB_PGVECTOR_URL": "postgresql://user:pass@host/db"}, clear=False): + resolved = resolve_default_storage_engines(config) + assert resolved["default_storage"] == "pgvector" + assert resolved["available_vector_engines"] == ["faiss", "pgvector"] + assert resolved["pgvector_enabled"] is True + finally: + config.update({"knowledge_bases": {"storage": previous_storage}}) diff --git a/tests/unit/interfaces/test_get_handler_meta.py b/tests/unit/interfaces/test_get_handler_meta.py new file mode 100644 index 00000000000..70f01fd6712 --- /dev/null +++ b/tests/unit/interfaces/test_get_handler_meta.py @@ -0,0 +1,235 @@ +""" +Unit tests for IntegrationController.get_handler_meta() focusing on the +handler_folder=None crash fix for community handler stubs. + +Covered scenarios: + 1. Community stub (path=None), no handler_folder passed β†’ folder derived from stub metadata. + 2. Community stub (path=None), explicit handler_folder passed β†’ explicit folder used as-is. + 3. Non-community (built-in) handler with path set β†’ fetch path never triggered. + 4. Community stub whose "import.folder" is also None (malformed entry) β†’ graceful None return. +""" + +import threading +import unittest +from pathlib import Path +from unittest.mock import MagicMock, patch + + +def _make_controller(): + """ + Return an IntegrationController instance with _load_handler_modules skipped + so no real filesystem / network access happens during construction. + """ + from mindsdb.interfaces.database.integrations import IntegrationController + + with patch.object(IntegrationController, "_load_handler_modules"): + ctrl = IntegrationController() + + # Minimal attributes that other methods rely on. + ctrl.handler_modules = {} + ctrl.handlers_import_status = {} + ctrl.handlers_cache = MagicMock() + ctrl._import_lock = threading.Lock() + ctrl._community_handlers_dir = None + return ctrl + + +def _community_stub(handler_name: str, folder: str | None = None): + """Build a community handler stub as created by _load_handler_modules.""" + from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL + + return { + "path": None, + "import": { + "success": None, + "error_message": None, + "folder": folder if folder is not None else f"{handler_name}_handler", + "dependencies": [], + }, + "name": handler_name, + "title": handler_name.capitalize(), + "description": "", + "permanent": False, + "connection_args": None, + "class_type": None, + "type": None, + "support_level": HANDLER_SUPPORT_LEVEL.COMMUNITY, + } + + +def _builtin_stub(handler_name: str, handler_path: Path): + """Build a built-in handler stub as created by _register_handler_dir.""" + return { + "path": handler_path, + "import": { + "success": True, + "error_message": None, + "folder": handler_path.name, + "dependencies": [], + }, + "name": handler_name, + "permanent": False, + "connection_args": None, + "class_type": None, + "type": None, + "support_level": None, + "community": False, + } + + +class TestGetHandlerMetaCommunityFolderFallback(unittest.TestCase): + """get_handler_meta() derives handler_folder from stub metadata when None.""" + + def setUp(self): + self.ctrl = _make_controller() + + def test_community_stub_folder_derived_from_metadata(self): + """ + When handler_folder is not supplied, get_handler_meta() must read + "import.folder" from the stub and pass it to _fetch_community_handler. + """ + stub = _community_stub("github", folder="github_handler") + self.ctrl.handlers_import_status["github"] = stub + + fetched_meta = {**stub, "path": Path("/tmp/github_handler")} + fetched_meta["import"] = {**stub["import"], "success": True} + + with patch.object(self.ctrl, "_fetch_community_handler", return_value=fetched_meta) as mock_fetch: + result = self.ctrl.get_handler_meta("github") # no handler_folder + + mock_fetch.assert_called_once_with("github", "github_handler") + self.assertIsNotNone(result) + + def test_community_stub_explicit_folder_not_overridden(self): + """ + When handler_folder is explicitly provided, it must be forwarded as-is + and the stub metadata must not override it. + """ + stub = _community_stub("github", folder="github_handler") + self.ctrl.handlers_import_status["github"] = stub + + fetched_meta = {**stub, "path": Path("/tmp/custom_dir")} + fetched_meta["import"] = {**stub["import"], "success": True} + + with patch.object(self.ctrl, "_fetch_community_handler", return_value=fetched_meta) as mock_fetch: + result = self.ctrl.get_handler_meta("github", handler_folder="custom_dir") + + mock_fetch.assert_called_once_with("github", "custom_dir") + self.assertIsNotNone(result) + + def test_builtin_handler_fetch_path_not_triggered(self): + """ + A built-in handler with a real path must not trigger the community fetch + path regardless of the handler_folder argument. + """ + stub = _builtin_stub("mysql", Path("/opt/mindsdb/handlers/mysql_handler")) + self.ctrl.handlers_import_status["mysql"] = stub + + with ( + patch.object(self.ctrl, "_fetch_community_handler") as mock_fetch, + patch.object(self.ctrl, "import_handler", return_value=stub), + ): + result = self.ctrl.get_handler_meta("mysql") + + mock_fetch.assert_not_called() + self.assertIsNotNone(result) + + def test_community_stub_missing_folder_returns_none_gracefully(self): + """ + If the stub's "import.folder" is also None (malformed index entry), + the guard in get_handler_meta() must return None immediately β€” before + _fetch_community_handler is ever called β€” to avoid a TypeError from + fetch_handler(None, storage_dir). + """ + stub = _community_stub("broken") + stub["import"]["folder"] = None # simulate malformed entry + self.ctrl.handlers_import_status["broken"] = stub + + with patch.object(self.ctrl, "_fetch_community_handler") as mock_fetch: + result = self.ctrl.get_handler_meta("broken") # no handler_folder + + mock_fetch.assert_not_called() # guard exits before reaching _fetch_community_handler + self.assertIsNone(result) + + def test_unknown_handler_returns_none(self): + """get_handler_meta() for a completely unknown handler name returns None.""" + result = self.ctrl.get_handler_meta("does_not_exist") + self.assertIsNone(result) + + +class TestGetHandlersImportStatus(unittest.TestCase): + """get_handlers_import_status() must not fetch/import community stubs.""" + + def setUp(self): + self.ctrl = _make_controller() + + def test_community_stub_not_fetched_during_listing(self): + """ + Community stubs (support_level="community", path=None) must not trigger + _fetch_community_handler() or import_handler() during listing. + """ + stub = _community_stub("github", folder="github_handler") + self.ctrl.handlers_import_status["github"] = stub + + with ( + patch.object(self.ctrl, "_fetch_community_handler") as mock_fetch, + patch.object(self.ctrl, "import_handler") as mock_import, + ): + self.ctrl.get_handlers_import_status() + + mock_fetch.assert_not_called() + mock_import.assert_not_called() + + def test_community_stub_metadata_returned_in_listing(self): + """ + Stub metadata must be present in the result so the UI can render the + handler entry without a fetch having occurred. + """ + stub = _community_stub("github", folder="github_handler") + self.ctrl.handlers_import_status["github"] = stub + + with ( + patch.object(self.ctrl, "_fetch_community_handler"), + patch.object(self.ctrl, "import_handler"), + ): + result = self.ctrl.get_handlers_import_status() + + self.assertIn("github", result) + meta = result["github"] + self.assertEqual(meta["name"], "github") + self.assertEqual(meta["support_level"], "community") + self.assertIsNone(meta["path"]) + self.assertIsNotNone(meta["import"]) + + def test_non_community_handler_uses_get_handler_meta(self): + """ + Built-in handlers (path != None) must still go through get_handler_meta() + so that lazy import is triggered if needed. + """ + stub = _builtin_stub("mysql", Path("/opt/mindsdb/handlers/mysql_handler")) + self.ctrl.handlers_import_status["mysql"] = stub + + with patch.object(self.ctrl, "get_handler_meta", return_value=stub) as mock_meta: + self.ctrl.get_handlers_import_status() + + mock_meta.assert_called_once_with("mysql", stub["import"]["folder"]) + + def test_fetched_community_handler_uses_get_handler_meta(self): + """ + A community handler that has already been fetched (path != None) must + also go through get_handler_meta() β€” the early-return guard only applies + when path is None. + """ + stub = _community_stub("github", folder="github_handler") + stub["path"] = Path("/tmp/community_handlers/github_handler") + stub["import"]["success"] = True + self.ctrl.handlers_import_status["github"] = stub + + with patch.object(self.ctrl, "get_handler_meta", return_value=stub) as mock_meta: + self.ctrl.get_handlers_import_status() + + mock_meta.assert_called_once_with("github", "github_handler") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/planner/test_join_tables.py b/tests/unit/planner/test_join_tables.py index 24cef73b8fa..7bd8a463d7a 100644 --- a/tests/unit/planner/test_join_tables.py +++ b/tests/unit/planner/test_join_tables.py @@ -11,6 +11,7 @@ Star, BinaryOperation, Function, + Parameter, ) from mindsdb_sql_parser.utils import JoinType @@ -319,43 +320,71 @@ def test_join_tables_plan_limit_offset(self): def test_join_tables_plan_order_by(self): query = parse_sql(""" + WITH tab2 AS ( + SELECT * FROM int2.tab2 limit 100 + ), + categories as ( + SELECT * FROM int3.cats + ) SELECT tab1.column1, tab2.column1, tab2.column2 - FROM int.tab1 INNER - JOIN int2.tab2 ON tab1.column1 > tab2.column1 + FROM int.tab1 tab1 + INNER JOIN tab2 ON tab1.column1 > tab2.column1 + WHERE tab2.category_id = (SELECT id FROM categories WHERE name='book') ORDER BY tab1.column1 LIMIT 10 """) subquery = copy.deepcopy(query) + subquery.cte = None subquery.from_table = None subquery.offset = None + subquery.where.args[1] = Parameter(Result(2)) - plan = plan_query(query, integrations=["int", "int2"]) + plan = plan_query(query, integrations=["int", "int2", "int3"], default_namespace="mindsdb") expected_plan = QueryPlan( integrations=["int"], steps=[ - FetchDataframeStepPartition( + FetchDataframeStep( step_num=0, + integration="int2", + query=parse_sql("select * from tab2 limit 100"), + ), + FetchDataframeStep( + step_num=1, + integration="int3", + query=parse_sql("select * from cats"), + ), + SubSelectStep( + step_num=2, + query=Select( + targets=[Identifier("id")], + where=BinaryOperation(op="=", args=[Identifier("name"), Constant("book")]), + ), + dataframe=Result(1), + table_name="categories", + ), + FetchDataframeStepPartition( + step_num=3, integration="int", - query=parse_sql("select column1 AS column1 from tab1 order by column1"), + query=parse_sql("select column1 AS column1 from tab1 AS tab1 order by column1"), condition={"limit": 10}, steps=[ - FetchDataframeStep( - step_num=1, - integration="int2", + SubSelectStep( + step_num=4, + dataframe=Result(0), query=Select( targets=[ - Identifier("column1", alias=Identifier("column1")), - Identifier("column2", alias=Identifier("column2")), + Star(), ], # Column pruning - from_table=Identifier("tab2"), + where=BinaryOperation(op="=", args=[Identifier("category_id"), Parameter(Result(2))]), ), + table_name="tab2", ), JoinStep( - step_num=2, - left=Result(0), - right=Result(1), + step_num=5, + left=Result(3), + right=Result(4), query=Join( left=Identifier("tab1"), right=Identifier("tab2"), @@ -367,7 +396,7 @@ def test_join_tables_plan_order_by(self): ), ], ), - QueryStep(subquery, from_table=Result(0), strict_where=False), + QueryStep(subquery, from_table=Result(3), strict_where=False), ], ) diff --git a/tests/unit/planner/test_select_from_predictor.py b/tests/unit/planner/test_select_from_predictor.py index 38a1e65f1ff..85ccf4af365 100644 --- a/tests/unit/planner/test_select_from_predictor.py +++ b/tests/unit/planner/test_select_from_predictor.py @@ -1,14 +1,17 @@ import pytest from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import (Identifier, Select, Constant, Star, Parameter, BinaryOperation) +from mindsdb_sql_parser.ast import Identifier, Select, Constant, Star, Parameter, BinaryOperation from mindsdb.api.executor.planner.exceptions import PlanningException from mindsdb.api.executor.planner import plan_query from mindsdb.api.executor.planner.query_plan import QueryPlan from mindsdb.api.executor.planner.step_result import Result from mindsdb.api.executor.planner.steps import ( - ProjectStep, ApplyPredictorRowStep, GetPredictorColumns, FetchDataframeStep + ProjectStep, + ApplyPredictorRowStep, + GetPredictorColumns, + FetchDataframeStep, ) @@ -16,347 +19,334 @@ class TestPlanSelectFromPredictor: def test_select_from_predictor_plan(self): query = Select( targets=[Star()], - from_table=Identifier('mindsdb.pred'), + from_table=Identifier("mindsdb.pred"), where=BinaryOperation( - op='and', - args=[BinaryOperation(op='=', args=[Identifier('x1'), Constant(1)]), - BinaryOperation(op='=', args=[Identifier('x2'), Constant('2')])], - ) + op="and", + args=[ + BinaryOperation(op="=", args=[Identifier("x1"), Constant(1)]), + BinaryOperation(op="=", args=[Identifier("x2"), Constant("2")]), + ], + ), ) expected_plan = QueryPlan( - predictor_namespace='mindsdb', + predictor_namespace="mindsdb", steps=[ - ApplyPredictorRowStep( - namespace='mindsdb', predictor=Identifier('pred'), - row_dict={'x1': 1, 'x2': '2'} - ), + ApplyPredictorRowStep(namespace="mindsdb", predictor=Identifier("pred"), row_dict={"x1": 1, "x2": "2"}), ], - ) - plan = plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}}) + plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) assert plan.steps == expected_plan.steps def test_select_from_predictor_negative_constant(self): query = parse_sql( - ''' + """ select * from mindsdb.pred where x1 = -1 - ''' + """ ) expected_plan = QueryPlan( - predictor_namespace='mindsdb', + predictor_namespace="mindsdb", steps=[ - ApplyPredictorRowStep(namespace='mindsdb', predictor=Identifier('pred'), row_dict={'x1': -1, }), + ApplyPredictorRowStep( + namespace="mindsdb", + predictor=Identifier("pred"), + row_dict={ + "x1": -1, + }, + ), ], ) - plan = plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}}) + plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) assert plan.steps == expected_plan.steps def test_select_from_predictor_plan_other_ml(self): query = parse_sql( - ''' + """ select * from mlflow.pred where x1 = 1 and x2 = '2' - ''' + """ ) expected_plan = QueryPlan( - predictor_namespace='mindsdb', + predictor_namespace="mindsdb", steps=[ - ApplyPredictorRowStep( - namespace='mlflow', predictor=Identifier('pred'), - row_dict={'x1': 1, 'x2': '2'} - ), + ApplyPredictorRowStep(namespace="mlflow", predictor=Identifier("pred"), row_dict={"x1": 1, "x2": "2"}), ], - ) - plan = plan_query(query, predictor_metadata=[{'name': 'pred', 'integration_name': 'mlflow'}]) + plan = plan_query(query, predictor_metadata=[{"name": "pred", "integration_name": "mlflow"}]) assert plan.steps == expected_plan.steps def test_select_from_predictor_aliases_in_project(self): query = Select( - targets=[Identifier('tb.x1', alias=Identifier('col1')), - Identifier('tb.x2', alias=Identifier('col2')), - Identifier('tb.y', alias=Identifier('predicted'))], - from_table=Identifier('mindsdb.pred', alias=Identifier('tb')), + targets=[ + Identifier("tb.x1", alias=Identifier("col1")), + Identifier("tb.x2", alias=Identifier("col2")), + Identifier("tb.y", alias=Identifier("predicted")), + ], + from_table=Identifier("mindsdb.pred", alias=Identifier("tb")), where=BinaryOperation( - op='and', + op="and", args=[ - BinaryOperation(op='=', args=[Identifier('tb.x1'), Constant(1)]), - BinaryOperation(op='=', args=[Identifier('tb.x2'), Constant('2')]), + BinaryOperation(op="=", args=[Identifier("tb.x1"), Constant(1)]), + BinaryOperation(op="=", args=[Identifier("tb.x2"), Constant("2")]), ], - ) + ), ) expected_plan = QueryPlan( - predictor_namespace='mindsdb', + predictor_namespace="mindsdb", steps=[ ApplyPredictorRowStep( - namespace='mindsdb', - predictor=Identifier('pred', alias=Identifier('tb')), - row_dict={'x1': 1, 'x2': '2'} + namespace="mindsdb", + predictor=Identifier("pred", alias=Identifier("tb")), + row_dict={"x1": 1, "x2": "2"}, ), ProjectStep( dataframe=Result(0), - columns=[Identifier('tb.x1', alias=Identifier('col1')), - Identifier('tb.x2', alias=Identifier('col2')), - Identifier('tb.y', alias=Identifier('predicted'))] + columns=[ + Identifier("tb.x1", alias=Identifier("col1")), + Identifier("tb.x2", alias=Identifier("col2")), + Identifier("tb.y", alias=Identifier("predicted")), + ], ), ], - ) - plan = plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}}) + plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) assert plan.steps == expected_plan.steps def test_select_from_predictor_plan_predictor_alias(self): query = Select( targets=[Star()], - from_table=Identifier('mindsdb.pred', alias=Identifier('pred_alias')), + from_table=Identifier("mindsdb.pred", alias=Identifier("pred_alias")), where=BinaryOperation( - op='and', + op="and", args=[ - BinaryOperation(op='=', args=[Identifier('pred_alias.x1'), Constant(1)]), - BinaryOperation( - op='=', - args=[Identifier('pred_alias.x2'), Constant('2')] - ) + BinaryOperation(op="=", args=[Identifier("pred_alias.x1"), Constant(1)]), + BinaryOperation(op="=", args=[Identifier("pred_alias.x2"), Constant("2")]), ], - ) + ), ) expected_plan = QueryPlan( - predictor_namespace='mindsdb', + predictor_namespace="mindsdb", steps=[ ApplyPredictorRowStep( - namespace='mindsdb', predictor=Identifier('pred', alias=Identifier('pred_alias')), - row_dict={'x1': 1, 'x2': '2'} + namespace="mindsdb", + predictor=Identifier("pred", alias=Identifier("pred_alias")), + row_dict={"x1": 1, "x2": "2"}, ), ], ) - plan = plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}}) + plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) assert plan.steps == expected_plan.steps def test_select_from_predictor_plan_verbose_col_names(self): query = Select( targets=[Star()], - from_table=Identifier('mindsdb.pred'), + from_table=Identifier("mindsdb.pred"), where=BinaryOperation( - op='and', - args=[BinaryOperation(op='=', args=[Identifier('pred.x1'), Constant(1)]), - BinaryOperation(op='=', args=[Identifier('pred.x2'), Constant('2')])], - ) + op="and", + args=[ + BinaryOperation(op="=", args=[Identifier("pred.x1"), Constant(1)]), + BinaryOperation(op="=", args=[Identifier("pred.x2"), Constant("2")]), + ], + ), ) expected_plan = QueryPlan( - predictor_namespace='mindsdb', + predictor_namespace="mindsdb", steps=[ - ApplyPredictorRowStep( - namespace='mindsdb', predictor=Identifier('pred'), - row_dict={'x1': 1, 'x2': '2'} - ), + ApplyPredictorRowStep(namespace="mindsdb", predictor=Identifier("pred"), row_dict={"x1": 1, "x2": "2"}), ProjectStep(dataframe=Result(0), columns=[Star()]), ], ) - plan = plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}}) + plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) for i in range(len(plan.steps)): assert plan.steps[i] == expected_plan.steps[i] def test_select_from_predictor_plan_group_by_error(self): query = Select( - targets=[Identifier('x1'), Identifier('x2'), Identifier('pred.y')], - from_table=Identifier('mindsdb.pred'), - group_by=[Identifier('x1')] + targets=[Identifier("x1"), Identifier("x2"), Identifier("pred.y")], + from_table=Identifier("mindsdb.pred"), + group_by=[Identifier("x1")], ) with pytest.raises(PlanningException): - plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}}) + plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) def test_select_from_predictor_wrong_where_op_error(self): query = Select( targets=[Star()], - from_table=Identifier('mindsdb.pred'), + from_table=Identifier("mindsdb.pred"), where=BinaryOperation( - op='and', - args=[BinaryOperation(op='>', args=[Identifier('x1'), Constant(1)]), - BinaryOperation(op='=', args=[Identifier('x2'), Constant('2')])], - ) + op="and", + args=[ + BinaryOperation(op=">", args=[Identifier("x1"), Constant(1)]), + BinaryOperation(op="=", args=[Identifier("x2"), Constant("2")]), + ], + ), ) with pytest.raises(PlanningException): - plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}}) + plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) def test_select_from_predictor_multiple_values_error(self): query = Select( targets=[Star()], - from_table=Identifier('mindsdb.pred'), + from_table=Identifier("mindsdb.pred"), where=BinaryOperation( - op='and', - args=[BinaryOperation(op='=', args=[Identifier('x1'), Constant(1)]), - BinaryOperation(op='=', args=[Identifier('x1'), Constant('2')])], - ) + op="and", + args=[ + BinaryOperation(op="=", args=[Identifier("x1"), Constant(1)]), + BinaryOperation(op="=", args=[Identifier("x1"), Constant("2")]), + ], + ), ) with pytest.raises(PlanningException): - plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}}) + plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) def test_select_from_predictor_no_where_error(self): - query = Select( - targets=[Star()], - from_table=Identifier('mindsdb.pred') - ) + query = Select(targets=[Star()], from_table=Identifier("mindsdb.pred")) with pytest.raises(PlanningException): - plan_query(query, predictor_namespace='mindsdb', predictor_metadata={'pred': {}}) + plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) def test_select_from_predictor_default_namespace(self): query = Select( targets=[Star()], - from_table=Identifier('pred'), + from_table=Identifier("pred"), where=BinaryOperation( - op='and', - args=[BinaryOperation(op='=', args=[Identifier('x1'), Constant(1)]), - BinaryOperation(op='=', args=[Identifier('x2'), Constant('2')])], - ) + op="and", + args=[ + BinaryOperation(op="=", args=[Identifier("x1"), Constant(1)]), + BinaryOperation(op="=", args=[Identifier("x2"), Constant("2")]), + ], + ), ) expected_plan = QueryPlan( - predictor_namespace='mindsdb', - default_namespace='mindsdb', + predictor_namespace="mindsdb", + default_namespace="mindsdb", steps=[ - ApplyPredictorRowStep( - namespace='mindsdb', predictor=Identifier('pred'), - row_dict={'x1': 1, 'x2': '2'} - ), + ApplyPredictorRowStep(namespace="mindsdb", predictor=Identifier("pred"), row_dict={"x1": 1, "x2": "2"}), ], ) plan = plan_query( - query, predictor_namespace='mindsdb', default_namespace='mindsdb', predictor_metadata={'pred': {}} + query, predictor_namespace="mindsdb", default_namespace="mindsdb", predictor_metadata={"pred": {}} ) assert plan.steps == expected_plan.steps def test_select_from_predictor_get_columns(self): - sql = 'SELECT GDP_per_capita_USD FROM hdi_predictor_external WHERE 1 = 0' + sql = "SELECT GDP_per_capita_USD FROM hdi_predictor_external WHERE 1 = 0" query = parse_sql(sql) expected_query = Select( - targets=[Identifier('GDP_per_capita_USD')], - from_table=Identifier('hdi_predictor_external'), - where=BinaryOperation( - op="=", - args=[Constant(1), Constant(0)] - ) + targets=[Identifier("GDP_per_capita_USD")], + from_table=Identifier("hdi_predictor_external"), + where=BinaryOperation(op="=", args=[Constant(1), Constant(0)]), ) assert query.to_tree() == expected_query.to_tree() expected_plan = QueryPlan( - predictor_namespace='mindsdb', - default_namespace='mindsdb', + predictor_namespace="mindsdb", + default_namespace="mindsdb", steps=[ - GetPredictorColumns( - namespace='mindsdb', - predictor=Identifier('hdi_predictor_external') - ), - ProjectStep(dataframe=Result(0), columns=[Identifier('GDP_per_capita_USD')]), + GetPredictorColumns(namespace="mindsdb", predictor=Identifier("hdi_predictor_external")), + ProjectStep(dataframe=Result(0), columns=[Identifier("GDP_per_capita_USD")]), ], ) plan = plan_query( - query, predictor_namespace='mindsdb', default_namespace='mindsdb', - predictor_metadata={'hdi_predictor_external': {}} + query, + predictor_namespace="mindsdb", + default_namespace="mindsdb", + predictor_metadata={"hdi_predictor_external": {}}, ) assert plan.steps == expected_plan.steps def test_using_predictor_version(self): query = parse_sql( - ''' + """ select * from mindsdb.pred.21 where x1 = 1 - ''' + """ ) expected_plan = QueryPlan( - predictor_namespace='mindsdb', + predictor_namespace="mindsdb", steps=[ ApplyPredictorRowStep( - namespace='mindsdb', predictor=Identifier(parts=['pred', '21']), - row_dict={'x1': 1} + namespace="mindsdb", predictor=Identifier(parts=["pred", "21"]), row_dict={"x1": 1} ) ], ) - plan = plan_query(query, predictor_metadata=[{'name': 'pred', 'integration_name': 'mindsdb'}]) + plan = plan_query(query, predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}]) assert plan.steps == expected_plan.steps def test_select_from_predictor_subselect(self): query = parse_sql( - ''' + """ select * from mindsdb.pred.21 where x1 = (select id from int1.t1) - ''' + """ ) expected_plan = QueryPlan( - predictor_namespace='mindsdb', + predictor_namespace="mindsdb", steps=[ FetchDataframeStep( - integration='int1', - query=parse_sql('select id as id from t1'), + integration="int1", + query=parse_sql("select id as id from t1"), ), ApplyPredictorRowStep( - namespace='mindsdb', - predictor=Identifier(parts=['pred', '21']), - row_dict={'x1': Parameter(Result(0))} - ) + namespace="mindsdb", + predictor=Identifier(parts=["pred", "21"]), + row_dict={"x1": Parameter(Result(0))}, + ), ], ) plan = plan_query( - query, - integrations=['int1'], - predictor_metadata=[{'name': 'pred', 'integration_name': 'mindsdb'}] + query, integrations=["int1"], predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}] ) assert plan.steps == expected_plan.steps def test_select_from_view_subselect(self): query = parse_sql( - ''' + """ select * from v1 where x1 in (select id from int1.tab1) - ''' + """ ) expected_plan = QueryPlan( - predictor_namespace='mindsdb', + predictor_namespace="mindsdb", steps=[ FetchDataframeStep( - integration='int1', - query=parse_sql('select id as id from tab1'), + integration="int1", + query=parse_sql("select id as id from tab1"), ), FetchDataframeStep( - integration='mindsdb', + integration="mindsdb", query=Select( targets=[Star()], - from_table=Identifier('v1'), - where=BinaryOperation( - op='in', - args=[ - Identifier(parts=['x1']), - Parameter(Result(0)) - ] - ) + from_table=Identifier("v1"), + where=BinaryOperation(op="in", args=[Identifier(parts=["x1"]), Parameter(Result(0))]), ), ), ], @@ -364,81 +354,66 @@ def test_select_from_view_subselect(self): plan = plan_query( query, - integrations=['int1'], - default_namespace='mindsdb', - predictor_metadata=[{'name': 'pred', 'integration_name': 'mindsdb'}] + integrations=["int1"], + default_namespace="mindsdb", + predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}], ) assert plan.steps == expected_plan.steps def test_select_from_view_subselect_view(self): query = parse_sql( - ''' + """ select * from v1 where x1 in (select v2.id from v2) - ''' + """ ) expected_plan = QueryPlan( - predictor_namespace='mindsdb', + predictor_namespace="mindsdb", steps=[ FetchDataframeStep( - integration='mindsdb', - query=parse_sql('select v2.id as id from v2'), + integration="mindsdb", + query=parse_sql("select v2.id as id from v2"), ), FetchDataframeStep( - integration='mindsdb', + integration="mindsdb", query=Select( targets=[Star()], - from_table=Identifier('v1'), - where=BinaryOperation( - op='in', - args=[ - Identifier(parts=['x1']), - Parameter(Result(0)) - ] - ) + from_table=Identifier("v1"), + where=BinaryOperation(op="in", args=[Identifier(parts=["x1"]), Parameter(Result(0))]), ), ), ], ) - plan = plan_query( - query, - integrations=[], - default_namespace='mindsdb', - predictor_metadata=[] - ) + plan = plan_query(query, integrations=[], default_namespace="mindsdb", predictor_metadata=[]) assert plan.steps == expected_plan.steps class TestMLSelect: - def test_select_from_predictor_plan_other_ml(self): # sends to integrations - query = parse_sql(''' select * from mlflow.predictors ''') + query = parse_sql(""" select * from mlflow.predictors """) expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(step_num=0, integration='mlflow', query=parse_sql('SELECT * FROM predictors')) - ], + steps=[FetchDataframeStep(step_num=0, integration="mlflow", query=parse_sql("SELECT * FROM predictors"))], ) - plan = plan_query(query, predictor_metadata=[], integrations=['mlflow']) + plan = plan_query(query, predictor_metadata=[], integrations=["mlflow"]) assert plan.steps == expected_plan.steps class TestNestedSelect: - def test_using_predictor_in_subselect(self): """ Use predictor in subselect when selecting from integration """ sql = """ SELECT * - FROM chromadb.test_tabl + FROM vectordb.test_tabl WHERE search_vector = ( SELECT emebddings @@ -450,37 +425,25 @@ def test_using_predictor_in_subselect(self): ast_tree = parse_sql(sql) plan = plan_query( ast_tree, - integrations=['chromadb'], - predictor_metadata=[ - {'name': 'embedding_model', 'integration_name': 'mindsdb'} - ] + integrations=["vectordb"], + predictor_metadata=[{"name": "embedding_model", "integration_name": "mindsdb"}], ) expected_plan = [ ApplyPredictorRowStep( step_num=0, - namespace='mindsdb', - predictor=Identifier(parts=['embedding_model']), - row_dict={'content': 'some text'} - ), - ProjectStep( - step_num=1, - dataframe=Result(0), - columns=[Identifier(parts=['emebddings'])] + namespace="mindsdb", + predictor=Identifier(parts=["embedding_model"]), + row_dict={"content": "some text"}, ), + ProjectStep(step_num=1, dataframe=Result(0), columns=[Identifier(parts=["emebddings"])]), FetchDataframeStep( step_num=2, - integration='chromadb', + integration="vectordb", query=Select( targets=[Star()], - from_table=Identifier(parts=['test_tabl']), - where=BinaryOperation( - op='=', - args=[ - Identifier(parts=['search_vector']), - Parameter(Result(1)) - ] - ) + from_table=Identifier(parts=["test_tabl"]), + where=BinaryOperation(op="=", args=[Identifier(parts=["search_vector"]), Parameter(Result(1))]), ), ), ] @@ -498,31 +461,27 @@ def test_using_integration_in_subselect(self): WHERE content = ( SELECT content - FROM chromadb.test_tabl + FROM vectordb.test_tabl LIMIT 1 ) """ ast_tree = parse_sql(sql) plan = plan_query( ast_tree, - integrations=['chromadb'], - predictor_metadata=[ - {'name': 'embedding_model', 'integration_name': 'mindsdb'} - ] + integrations=["vectordb"], + predictor_metadata=[{"name": "embedding_model", "integration_name": "mindsdb"}], ) expected_plan = [ FetchDataframeStep( - step_num=0, - integration='chromadb', - query=parse_sql('SELECT content AS content FROM test_tabl LIMIT 1') + step_num=0, integration="vectordb", query=parse_sql("SELECT content AS content FROM test_tabl LIMIT 1") ), ApplyPredictorRowStep( step_num=1, - namespace='mindsdb', - predictor=Identifier(parts=['embedding_model']), - row_dict={'content': Parameter(Result(0))} - ) + namespace="mindsdb", + predictor=Identifier(parts=["embedding_model"]), + row_dict={"content": Parameter(Result(0))}, + ), ] assert plan.steps == expected_plan diff --git a/tests/unit/test_passthrough.py b/tests/unit/test_passthrough.py new file mode 100644 index 00000000000..9b86bc9f7ee --- /dev/null +++ b/tests/unit/test_passthrough.py @@ -0,0 +1,400 @@ +"""Unit tests for PassthroughMixin.""" + +import unittest +from unittest.mock import MagicMock, patch + +from mindsdb.integrations.libs.passthrough import ( + PassthroughMixin, + REDACTED_SENTINEL, +) +from mindsdb.integrations.libs.passthrough_types import ( + HostNotAllowedError, + PassthroughConfigError, + PassthroughRequest, + PassthroughValidationError, +) + + +class _FakeHandler(PassthroughMixin): + """Minimal handler stub for exercising the mixin.""" + + _bearer_token_arg = "api_key" + _base_url_default = "https://api.example.com" + _test_request = PassthroughRequest(method="GET", path="/me") + + def __init__(self, connection_data: dict): + self.name = "fake_ds" + self.connection_data = connection_data + + +def _mock_response(status_code=200, body=b'{"ok":true}', headers=None, content_type="application/json"): + """Return a mock requests.Response exposing the bits the mixin uses.""" + resp = MagicMock() + resp.status_code = status_code + resp.headers = {"Content-Type": content_type, **(headers or {})} + resp.iter_content = MagicMock(return_value=iter([body])) + resp.close = MagicMock() + return resp + + +class PassthroughHappyPathTests(unittest.TestCase): + def setUp(self): + self.handler = _FakeHandler({"api_key": "secret-token-abcdef1234567890"}) + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_injects_bearer_and_uses_default_base_url(self, mock_request): + mock_request.return_value = _mock_response() + resp = self.handler.api_passthrough(PassthroughRequest("GET", "/me")) + + self.assertEqual(resp.status_code, 200) + self.assertEqual(resp.body, {"ok": True}) + + args, kwargs = mock_request.call_args + self.assertEqual(args[0], "GET") + self.assertEqual(args[1], "https://api.example.com/me") + self.assertEqual(kwargs["headers"]["Authorization"], "Bearer secret-token-abcdef1234567890") + self.assertEqual(kwargs["headers"]["X-Minds-Passthrough"], "1") + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_user_base_url_overrides_default(self, mock_request): + self.handler.connection_data["base_url"] = "https://api.eu.example.com" + mock_request.return_value = _mock_response() + self.handler.api_passthrough(PassthroughRequest("GET", "/me")) + self.assertEqual(mock_request.call_args[0][1], "https://api.eu.example.com/me") + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_query_params_forwarded(self, mock_request): + mock_request.return_value = _mock_response() + self.handler.api_passthrough(PassthroughRequest("GET", "/x", query={"a": "1"})) + self.assertEqual(mock_request.call_args.kwargs["params"], {"a": "1"}) + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_json_body_forwarded(self, mock_request): + mock_request.return_value = _mock_response() + self.handler.api_passthrough(PassthroughRequest("POST", "/x", body={"name": "foo"})) + self.assertEqual(mock_request.call_args.kwargs["json"], {"name": "foo"}) + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_default_headers_merged(self, mock_request): + self.handler.connection_data["default_headers"] = {"Accept": "application/json"} + mock_request.return_value = _mock_response() + self.handler.api_passthrough(PassthroughRequest("GET", "/x")) + self.assertEqual(mock_request.call_args.kwargs["headers"]["Accept"], "application/json") + + +class PassthroughHeaderFilteringTests(unittest.TestCase): + def setUp(self): + self.handler = _FakeHandler({"api_key": "secret-token-abcdef1234567890"}) + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_caller_cannot_override_authorization(self, mock_request): + mock_request.return_value = _mock_response() + self.handler.api_passthrough( + PassthroughRequest("GET", "/x", headers={"Authorization": "Bearer hijack", "Cookie": "s=1"}) + ) + outgoing = mock_request.call_args.kwargs["headers"] + self.assertEqual(outgoing["Authorization"], "Bearer secret-token-abcdef1234567890") + self.assertNotIn("Cookie", outgoing) + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_proxy_headers_stripped(self, mock_request): + mock_request.return_value = _mock_response() + self.handler.api_passthrough(PassthroughRequest("GET", "/x", headers={"Proxy-Authorization": "hijack"})) + outgoing = mock_request.call_args.kwargs["headers"] + self.assertNotIn("Proxy-Authorization", outgoing) + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_hop_by_hop_response_headers_stripped(self, mock_request): + mock_request.return_value = _mock_response( + headers={"Connection": "close", "X-Safe": "1", "Transfer-Encoding": "chunked"} + ) + resp = self.handler.api_passthrough(PassthroughRequest("GET", "/x")) + self.assertNotIn("Connection", resp.headers) + self.assertNotIn("Transfer-Encoding", resp.headers) + self.assertEqual(resp.headers.get("X-Safe"), "1") + + +class PassthroughHostAllowlistTests(unittest.TestCase): + def test_rejects_host_outside_allowlist(self): + handler = _FakeHandler( + { + "api_key": "t", + "base_url": "https://api.example.com", + "allowed_hosts": ["api.example.com"], + } + ) + # Direct host check using a bad URL + with self.assertRaises(HostNotAllowedError): + handler._check_host_allowed("evil.com") + + def test_wildcard_allows_any_host(self): + handler = _FakeHandler( + { + "api_key": "t", + "base_url": "https://api.example.com", + "allowed_hosts": ["*"], + } + ) + handler._check_host_allowed("evil.com") # must not raise + + def test_private_ip_rejected_by_default(self): + handler = _FakeHandler({"api_key": "t", "base_url": "http://10.0.0.1"}) + with self.assertRaises(HostNotAllowedError): + handler._check_host_allowed("10.0.0.1") + + def test_private_ip_allowed_when_explicitly_listed(self): + handler = _FakeHandler( + { + "api_key": "t", + "base_url": "http://10.0.0.1", + "allowed_hosts": ["10.0.0.1"], + } + ) + # Explicitly allowlisted private IP should still be rejected β€” the + # mixin treats explicit private-IP allowlisting as a foot-gun that + # requires the "*" escape hatch. Document this behavior. + with self.assertRaises(HostNotAllowedError): + handler._check_host_allowed("10.0.0.1") + + def test_loopback_rejected_with_wildcard_when_asterisk_not_used(self): + handler = _FakeHandler( + { + "api_key": "t", + "base_url": "http://127.0.0.1", + "allowed_hosts": ["127.0.0.1"], + } + ) + with self.assertRaises(HostNotAllowedError): + handler._check_host_allowed("127.0.0.1") + + +class PassthroughValidationTests(unittest.TestCase): + def test_missing_bearer_raises(self): + handler = _FakeHandler({}) # no api_key + with self.assertRaises(PassthroughConfigError): + handler.api_passthrough(PassthroughRequest("GET", "/me")) + + def test_missing_base_url_raises(self): + class NoDefault(_FakeHandler): + _base_url_default = None + + handler = NoDefault({"api_key": "t"}) + with self.assertRaises(PassthroughConfigError): + handler.api_passthrough(PassthroughRequest("GET", "/me")) + + def test_path_must_start_with_slash(self): + handler = _FakeHandler({"api_key": "t"}) + with self.assertRaises(PassthroughValidationError): + handler.api_passthrough(PassthroughRequest("GET", "me")) + + def test_method_allowlist(self): + handler = _FakeHandler({"api_key": "t"}) + with self.assertRaises(PassthroughValidationError): + handler.api_passthrough(PassthroughRequest("TRACE", "/me")) + + +class PassthroughSecretScrubTests(unittest.TestCase): + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_token_scrubbed_from_json_body(self, mock_request): + token = "secret-token-abcdef1234567890" + # Non-UTF-8 byte (0xFF) positioned adjacent to the token. Spec Β§7.6 + # mandates byte-level scrubbing: if the scrub ran after a + # errors="replace" decode, U+FFFD insertions would risk fragmenting + # a token mid-match. Byte-level scrub avoids that entirely. + body = b'{"error":"Invalid token ' + token.encode("utf-8") + b' \xff trailing"}' + handler = _FakeHandler({"api_key": token}) + # Use plain-text content-type so the non-UTF-8 body survives without + # a json.loads detour; the scrub is still invoked. + mock_request.return_value = _mock_response(body=body, content_type="text/plain") + + resp = handler.api_passthrough(PassthroughRequest("GET", "/x")) + # Token must not survive anywhere in the body. + self.assertNotIn(token, str(resp.body)) + self.assertIn(REDACTED_SENTINEL, str(resp.body)) + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_token_scrubbed_from_headers(self, mock_request): + token = "secret-token-abcdef1234567890" + handler = _FakeHandler({"api_key": token}) + mock_request.return_value = _mock_response( + headers={"X-Debug-Auth": f"Bearer {token}"}, + ) + resp = handler.api_passthrough(PassthroughRequest("GET", "/x")) + self.assertIn(REDACTED_SENTINEL, resp.headers["X-Debug-Auth"]) + self.assertNotIn(token, resp.headers["X-Debug-Auth"]) + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_long_default_header_values_scrubbed(self, mock_request): + token = "secret-token-abcdef1234567890" + long_secret = "x" * 32 + handler = _FakeHandler( + { + "api_key": token, + "default_headers": {"X-Api-Secondary": long_secret}, + } + ) + mock_request.return_value = _mock_response(body=('{"echoed":"' + long_secret + '"}').encode("utf-8")) + resp = handler.api_passthrough(PassthroughRequest("GET", "/x")) + self.assertEqual(resp.body["echoed"], REDACTED_SENTINEL) + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_token_scrubbed_in_nested_json_without_corrupting_structure(self, mock_request): + token = "secret-token-abcdef1234567890" + handler = _FakeHandler({"api_key": token}) + body = ('{"data": {"nested": {"token": "' + token + '"}}}').encode("utf-8") + mock_request.return_value = _mock_response(body=body) + + resp = handler.api_passthrough(PassthroughRequest("GET", "/x")) + + # Structure preserved: dict-of-dict-of-dict with the expected keys. + self.assertIsInstance(resp.body, dict) + self.assertIsInstance(resp.body["data"], dict) + self.assertIsInstance(resp.body["data"]["nested"], dict) + self.assertEqual(set(resp.body.keys()), {"data"}) + self.assertEqual(set(resp.body["data"].keys()), {"nested"}) + self.assertEqual(set(resp.body["data"]["nested"].keys()), {"token"}) + # Value redacted at the leaf; token does not survive anywhere. + self.assertEqual(resp.body["data"]["nested"]["token"], REDACTED_SENTINEL) + self.assertNotIn(token, str(resp.body)) + + +class PassthroughTestEndpointTests(unittest.TestCase): + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_returns_ok_on_200(self, mock_request): + handler = _FakeHandler({"api_key": "t"}) + mock_request.return_value = _mock_response(status_code=200) + result = handler.test_passthrough() + self.assertTrue(result["ok"]) + self.assertEqual(result["status_code"], 200) + self.assertEqual(result["host"], "api.example.com") + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_returns_auth_failed_on_401(self, mock_request): + handler = _FakeHandler({"api_key": "t"}) + mock_request.return_value = _mock_response(status_code=401) + result = handler.test_passthrough() + self.assertFalse(result["ok"]) + self.assertEqual(result["error_code"], "auth_failed") + + def test_returns_not_implemented_when_no_test_request(self): + class NoTest(_FakeHandler): + _test_request = None + + handler = NoTest({"api_key": "t"}) + result = handler.test_passthrough() + self.assertFalse(result["ok"]) + self.assertEqual(result["error_code"], "not_implemented") + + +class PassthroughAllowedMethodsTests(unittest.TestCase): + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_rejects_method_not_in_allowed_methods(self, mock_request): + handler = _FakeHandler( + { + "api_key": "t", + "allowed_methods": ["GET"], + } + ) + mock_request.return_value = _mock_response() + + with self.assertRaises(PassthroughValidationError) as cm: + handler.api_passthrough(PassthroughRequest("POST", "/x")) + + self.assertEqual(cm.exception.error_code, "method_not_allowed") + self.assertEqual(cm.exception.http_status, 405) + mock_request.assert_not_called() + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_all_methods_allowed_when_config_absent(self, mock_request): + handler = _FakeHandler({"api_key": "t"}) + mock_request.return_value = _mock_response() + + for method in ("GET", "POST", "PUT", "PATCH", "DELETE"): + mock_request.reset_mock() + mock_request.return_value = _mock_response() + handler.api_passthrough(PassthroughRequest(method, "/x")) + self.assertEqual(mock_request.call_args[0][0], method) + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_string_allowed_methods_raises_config_error(self, mock_request): + handler = _FakeHandler({"api_key": "t", "allowed_methods": "GET"}) + + with self.assertRaises(PassthroughConfigError): + handler.api_passthrough(PassthroughRequest("GET", "/x")) + + mock_request.assert_not_called() + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_lowercase_allowed_methods_normalized(self, mock_request): + handler = _FakeHandler({"api_key": "t", "allowed_methods": ["get"]}) + mock_request.return_value = _mock_response() + + # GET passes after uppercase normalization. + handler.api_passthrough(PassthroughRequest("GET", "/x")) + self.assertEqual(mock_request.call_args[0][0], "GET") + + mock_request.reset_mock() + # POST is rejected with method_not_allowed. + with self.assertRaises(PassthroughValidationError) as cm: + handler.api_passthrough(PassthroughRequest("POST", "/x")) + self.assertEqual(cm.exception.error_code, "method_not_allowed") + self.assertEqual(cm.exception.http_status, 405) + mock_request.assert_not_called() + + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_unknown_verb_in_allowed_methods_raises_config_error(self, mock_request): + handler = _FakeHandler({"api_key": "t", "allowed_methods": ["GET", "TRACE"]}) + + with self.assertRaises(PassthroughConfigError) as cm: + handler.api_passthrough(PassthroughRequest("GET", "/x")) + self.assertIn("TRACE", str(cm.exception)) + mock_request.assert_not_called() + + +class PassthroughAuthHeaderOverrideTests(unittest.TestCase): + @patch("mindsdb.integrations.libs.passthrough.requests.request") + def test_custom_auth_header_name_and_format(self, mock_request): + class ShopifyLikeHandler(_FakeHandler): + _auth_header_name = "X-Shopify-Access-Token" + _auth_header_format = "{token}" + + handler = ShopifyLikeHandler({"api_key": "shpat_abc123"}) + mock_request.return_value = _mock_response() + + handler.api_passthrough(PassthroughRequest("GET", "/x")) + + outgoing = mock_request.call_args.kwargs["headers"] + # Custom header present, with raw token (no "Bearer " prefix). + self.assertEqual(outgoing["X-Shopify-Access-Token"], "shpat_abc123") + # Default Authorization header must NOT be added when the handler + # overrides the auth header name. + self.assertNotIn("Authorization", outgoing) + + +class PassthroughProtocolTests(unittest.TestCase): + def test_non_mixin_class_satisfies_protocol(self): + from mindsdb.integrations.libs.passthrough import PassthroughProtocol + from mindsdb.integrations.libs.passthrough_types import PassthroughResponse + + class ManualHandler: + def api_passthrough(self, req: PassthroughRequest) -> PassthroughResponse: + return PassthroughResponse(status_code=200, headers={}, body=None, content_type=None) + + def test_passthrough(self) -> dict: + return {"ok": True} + + self.assertIsInstance(ManualHandler(), PassthroughProtocol) + + def test_class_missing_methods_fails_protocol(self): + from mindsdb.integrations.libs.passthrough import PassthroughProtocol + + class Incomplete: + def api_passthrough(self, req): ... + + # missing test_passthrough + + self.assertNotIsInstance(Incomplete(), PassthroughProtocol) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unused/integration/flows/test_ml_task_queue.py b/tests/unit/utilities/ml_task_queue/test_ml_task_queue.py similarity index 72% rename from tests/unused/integration/flows/test_ml_task_queue.py rename to tests/unit/utilities/ml_task_queue/test_ml_task_queue.py index 8e17fca4d08..1880c90fc6d 100644 --- a/tests/unused/integration/flows/test_ml_task_queue.py +++ b/tests/unit/utilities/ml_task_queue/test_ml_task_queue.py @@ -15,18 +15,17 @@ @pytest.mark.skipif("localhost" in HTTP_API_ROOT or "127.0.0.1" in HTTP_API_ROOT, reason="Requires redis") class TestMLTaskQueue(HTTPHelperMixin): - def test_redis_connection(self): db = Database(protocol=3, host=REDIS_HOST) db.ping() def test_create_model(self, train_finetune_lock): - """ 1. create db connection - 2. create test dataset - 3. start to train model in 'async' mode: check status - 4. start to train model in 'sync' mode: check status - 5. await model 2 is finished - 6. 2 messages in redis stream + """1. create db connection + 2. create test dataset + 3. start to train model in 'async' mode: check status + 4. start to train model in 'sync' mode: check status + 5. await model 2 is finished + 6. 2 messages in redis stream """ db_details = { @@ -37,8 +36,8 @@ def test_create_model(self, train_finetune_lock): "port": "5432", "user": "demo_user", "password": "demo_password", - "database": "demo" - } + "database": "demo", + }, } self.sql_via_http("DROP MODEL IF EXISTS p_test_queue_async;", RESPONSE_TYPE.OK) @@ -47,7 +46,7 @@ def test_create_model(self, train_finetune_lock): query = f""" CREATE DATABASE IF NOT EXISTS test_demo_queue WITH ENGINE = 'postgres', - PARAMETERS = {json.dumps(db_details['connection_data'])}; + PARAMETERS = {json.dumps(db_details["connection_data"])}; """ self.sql_via_http(query, RESPONSE_TYPE.OK) @@ -58,8 +57,8 @@ def test_create_model(self, train_finetune_lock): predict rental_price; """ response = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - status = response['data'][0][response['column_names'].index('STATUS')] - assert status in ('generating', 'training') + status = response["data"][0][response["column_names"].index("STATUS")] + assert status in ("generating", "training") query = """ create predictor p_test_queue_sync @@ -68,16 +67,16 @@ def test_create_model(self, train_finetune_lock): USING join_learn_process=true; """ response = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - status = response['data'][0][response['column_names'].index('STATUS')] - assert status == 'complete' + status = response["data"][0][response["column_names"].index("STATUS")] + assert status == "complete" - status = self.await_model('p_test_queue_async') - assert status == 'complete' + status = self.await_model("p_test_queue_async") + assert status == "complete" db = Database(protocol=3, host=REDIS_HOST) assert TASKS_STREAM_NAME in db.keys() - assert db.type(TASKS_STREAM_NAME) == b'stream' + assert db.type(TASKS_STREAM_NAME) == b"stream" xlen = db.xlen(TASKS_STREAM_NAME) if xlen != 0: lol = db.xrange(TASKS_STREAM_NAME) @@ -85,8 +84,7 @@ def test_create_model(self, train_finetune_lock): assert db.xlen(TASKS_STREAM_NAME) == 0 def test_predict(self): - """ make predict queries to both trained models - """ + """make predict queries to both trained models""" query = """ SELECT rental_price, @@ -95,8 +93,8 @@ def test_predict(self): WHERE b = 10; """ response = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - assert len(response['data']) == 1 - assert len(response['data'][0]) == 2 + assert len(response["data"]) == 1 + assert len(response["data"][0]) == 2 query = """ SELECT rental_price, @@ -106,15 +104,14 @@ def test_predict(self): LIMIT 3; """ response = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - assert len(response['data']) == 3 - assert len(response['data'][0]) == 2 + assert len(response["data"]) == 3 + assert len(response["data"][0]) == 2 db = Database(protocol=3, host=REDIS_HOST) assert db.xlen(TASKS_STREAM_NAME) == 0 def test_finetune(self, train_finetune_lock): - """ check that finetune is working - """ + """check that finetune is working""" with train_finetune_lock.acquire(timeout=600): query = """ @@ -123,20 +120,20 @@ def test_finetune(self, train_finetune_lock): USING join_learn_process=true; """ response = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - status = response['data'][0][response['column_names'].index('STATUS')] - assert status == 'complete' + status = response["data"][0][response["column_names"].index("STATUS")] + assert status == "complete" query = """ FINETUNE p_test_queue_async FROM test_demo_queue (SELECT * FROM demo_data.home_rentals LIMIT 10); """ response = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - status = response['data'][0][response['column_names'].index('STATUS')] + status = response["data"][0][response["column_names"].index("STATUS")] # FINETUNE in this case may be very fast, so add 'complete' to check - assert status in ('generating', 'training', 'complete') + assert status in ("generating", "training", "complete") - status = self.await_model('p_test_queue_async', version_number=2) - assert status == 'complete' + status = self.await_model("p_test_queue_async", version_number=2) + assert status == "complete" db = Database(protocol=3, host=REDIS_HOST) assert db.xlen(TASKS_STREAM_NAME) == 0 diff --git a/tests/unit/utilities/test_community_handler_fetcher.py b/tests/unit/utilities/test_community_handler_fetcher.py new file mode 100644 index 00000000000..3fd1ad3b942 --- /dev/null +++ b/tests/unit/utilities/test_community_handler_fetcher.py @@ -0,0 +1,302 @@ +import shutil +import tempfile +import unittest +from pathlib import Path +from unittest.mock import MagicMock, patch + +import requests + +from mindsdb.integrations.utilities.community_handler_fetcher import ( + _fetch_tree_recursive, + _resolve_tree_sha, + fetch_handler, +) + +REPO = "mindsdb/mindsdb-community-handlers" +BRANCH = "main" +PATH_PREFIX = "community_handlers" +HANDLER = "elasticsearch_handler" +REMOTE_PREFIX = f"{PATH_PREFIX}/{HANDLER}" +TREE_SHA = "abc123deadbeef" + + +def _make_response(status_code=200, json_data=None, raise_for_status=None): + """Helper: build a MagicMock that looks like a requests.Response.""" + resp = MagicMock() + resp.status_code = status_code + resp.json.return_value = json_data if json_data is not None else {} + resp.text = "" + if raise_for_status is not None: + resp.raise_for_status.side_effect = raise_for_status + else: + resp.raise_for_status.return_value = None + return resp + + +def _make_get_side_effect(contents_resp, trees_resp, raw_resp=None): + """Return a side_effect callable that dispatches mocked responses by URL.""" + + def _get(url, **kwargs): + if "git/trees" in url: + return trees_resp + if "raw.githubusercontent.com" in url: + return raw_resp if raw_resp is not None else _make_response(200, b"") + # Contents API (resolve SHA or other) + return contents_resp + + return _get + + +class TestResolveTreSha(unittest.TestCase): + """Unit tests for _resolve_tree_sha().""" + + @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") + def test_returns_sha_when_found(self, mock_get): + parent_listing = [ + {"name": "other_handler", "type": "dir", "sha": "000"}, + {"name": HANDLER, "type": "dir", "sha": TREE_SHA}, + ] + mock_get.return_value = _make_response(200, parent_listing) + + result = _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {}) + + self.assertEqual(result, TREE_SHA) + mock_get.assert_called_once() + + @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") + def test_returns_none_on_404(self, mock_get): + mock_get.return_value = _make_response(404) + + result = _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {}) + + self.assertIsNone(result) + + @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") + def test_returns_none_when_dir_not_in_listing(self, mock_get): + parent_listing = [{"name": "other_handler", "type": "dir", "sha": "000"}] + mock_get.return_value = _make_response(200, parent_listing) + + result = _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {}) + + self.assertIsNone(result) + + @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") + def test_raises_on_non_200_non_404(self, mock_get): + mock_get.return_value = _make_response(503) + + with self.assertRaises(RuntimeError): + _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {}) + + @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") + def test_raises_on_network_error(self, mock_get): + mock_get.side_effect = requests.RequestException("timeout") + + with self.assertRaises(RuntimeError): + _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {}) + + +class TestFetchTreeRecursive(unittest.TestCase): + """Unit tests for _fetch_tree_recursive().""" + + def setUp(self): + self.tmp = Path(tempfile.mkdtemp()) + + def tearDown(self): + shutil.rmtree(self.tmp, ignore_errors=True) + + def _trees_response(self, entries, truncated=False): + return _make_response(200, {"sha": TREE_SHA, "tree": entries, "truncated": truncated}) + + @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") + def test_downloads_flat_and_nested_files(self, mock_get): + tree_entries = [ + {"path": "__init__.py", "type": "blob", "sha": "s1", "size": 10}, + {"path": "elasticsearch_handler.py", "type": "blob", "sha": "s2", "size": 500}, + {"path": "tests", "type": "tree", "sha": "s3"}, + {"path": "tests/__init__.py", "type": "blob", "sha": "s4", "size": 0}, + {"path": "tests/test_elasticsearch_handler.py", "type": "blob", "sha": "s5", "size": 1107}, + ] + trees_resp = self._trees_response(tree_entries) + raw_resp = _make_response(200) + raw_resp.content = b"# file content" + + def _get(url, **kwargs): + if "git/trees" in url: + return trees_resp + return raw_resp + + mock_get.side_effect = _get + + count = _fetch_tree_recursive(REPO, BRANCH, TREE_SHA, REMOTE_PREFIX, self.tmp, {}) + + self.assertEqual(count, 4) + self.assertTrue((self.tmp / "__init__.py").exists()) + self.assertTrue((self.tmp / "elasticsearch_handler.py").exists()) + self.assertTrue((self.tmp / "tests" / "__init__.py").exists()) + self.assertTrue((self.tmp / "tests" / "test_elasticsearch_handler.py").exists()) + + @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") + def test_max_depth_enforcement(self, mock_get): + # depth 4 means path has 4 slashes β†’ 5 components β†’ should be skipped + deep_path = "a/b/c/d/e.py" + self.assertEqual(deep_path.count("/"), 4) # 4 >= max_depth=4 β†’ skipped + + tree_entries = [ + {"path": "__init__.py", "type": "blob", "sha": "s1", "size": 0}, + {"path": deep_path, "type": "blob", "sha": "s2", "size": 99}, + ] + trees_resp = self._trees_response(tree_entries) + raw_resp = _make_response(200) + raw_resp.content = b"" + + def _get(url, **kwargs): + if "git/trees" in url: + return trees_resp + return raw_resp + + mock_get.side_effect = _get + + count = _fetch_tree_recursive(REPO, BRANCH, TREE_SHA, REMOTE_PREFIX, self.tmp, {}, max_depth=4) + + self.assertEqual(count, 1) + self.assertTrue((self.tmp / "__init__.py").exists()) + self.assertFalse((self.tmp / deep_path).exists()) + + @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") + def test_truncated_tree_logs_warning(self, mock_get): + trees_resp = self._trees_response([], truncated=True) + mock_get.return_value = trees_resp + + with self.assertLogs("mindsdb.integrations.utilities.community_handler_fetcher", level="WARNING") as cm: + _fetch_tree_recursive(REPO, BRANCH, TREE_SHA, REMOTE_PREFIX, self.tmp, {}) + + self.assertTrue(any("truncated" in line for line in cm.output)) + + @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") + def test_raises_on_file_download_failure(self, mock_get): + tree_entries = [{"path": "__init__.py", "type": "blob", "sha": "s1", "size": 10}] + trees_resp = self._trees_response(tree_entries) + raw_resp = _make_response(500) + raw_resp.raise_for_status.side_effect = requests.HTTPError("500 Server Error") + + def _get(url, **kwargs): + if "git/trees" in url: + return trees_resp + return raw_resp + + mock_get.side_effect = _get + + with self.assertRaises(RuntimeError): + _fetch_tree_recursive(REPO, BRANCH, TREE_SHA, REMOTE_PREFIX, self.tmp, {}) + + +class TestFetchHandler(unittest.TestCase): + """Integration-style unit tests for fetch_handler().""" + + def setUp(self): + self.storage = Path(tempfile.mkdtemp()) + + def tearDown(self): + shutil.rmtree(self.storage, ignore_errors=True) + + def _parent_listing(self): + return [{"name": HANDLER, "type": "dir", "sha": TREE_SHA}] + + def _tree_entries(self): + return [ + {"path": "__init__.py", "type": "blob", "sha": "s1", "size": 10}, + {"path": "tests/__init__.py", "type": "blob", "sha": "s2", "size": 0}, + {"path": "tests/test_elasticsearch_handler.py", "type": "blob", "sha": "s3", "size": 1107}, + ] + + @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") + def test_handler_with_subdirectories(self, mock_get): + contents_resp = _make_response(200, self._parent_listing()) + trees_resp = _make_response(200, {"sha": TREE_SHA, "tree": self._tree_entries(), "truncated": False}) + raw_resp = _make_response(200) + raw_resp.content = b"# content" + + mock_get.side_effect = _make_get_side_effect(contents_resp, trees_resp, raw_resp) + + result = fetch_handler(HANDLER, self.storage) + + dest = self.storage / HANDLER + self.assertEqual(result, dest) + self.assertTrue((dest / "__init__.py").exists()) + self.assertTrue((dest / "tests" / "__init__.py").exists()) + self.assertTrue((dest / "tests" / "test_elasticsearch_handler.py").exists()) + + @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") + def test_404_handler_not_found(self, mock_get): + mock_get.return_value = _make_response(404) + + result = fetch_handler(HANDLER, self.storage) + + self.assertIsNone(result) + # tmp dir must not be left behind + self.assertFalse((self.storage / f".tmp_{HANDLER}").exists()) + + @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") + def test_atomic_rename_cleanup_on_failure(self, mock_get): + contents_resp = _make_response(200, self._parent_listing()) + trees_resp = _make_response(200, {"sha": TREE_SHA, "tree": self._tree_entries(), "truncated": False}) + # Simulate a download failure for raw files + raw_resp = _make_response(500) + raw_resp.raise_for_status.side_effect = requests.HTTPError("500") + + mock_get.side_effect = _make_get_side_effect(contents_resp, trees_resp, raw_resp) + + with self.assertRaises(RuntimeError): + fetch_handler(HANDLER, self.storage) + + # tmp dir must be cleaned up after the exception + self.assertFalse((self.storage / f".tmp_{HANDLER}").exists()) + # dest dir must not exist either + self.assertFalse((self.storage / HANDLER).exists()) + + def test_existing_handler_skips_fetch(self): + dest = self.storage / HANDLER + dest.mkdir(parents=True) + (dest / "__init__.py").write_text("# existing") + + with patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") as mock_get: + result = fetch_handler(HANDLER, self.storage) + + self.assertEqual(result, dest) + mock_get.assert_not_called() + + @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") + def test_max_depth_files_not_written(self, mock_get): + deep_path = "a/b/c/d/deep.py" + tree_entries = [ + {"path": "__init__.py", "type": "blob", "sha": "s1", "size": 0}, + {"path": deep_path, "type": "blob", "sha": "s2", "size": 99}, + ] + contents_resp = _make_response(200, self._parent_listing()) + trees_resp = _make_response(200, {"sha": TREE_SHA, "tree": tree_entries, "truncated": False}) + raw_resp = _make_response(200) + raw_resp.content = b"" + + mock_get.side_effect = _make_get_side_effect(contents_resp, trees_resp, raw_resp) + + fetch_handler(HANDLER, self.storage) + + dest = self.storage / HANDLER + self.assertTrue((dest / "__init__.py").exists()) + self.assertFalse((dest / deep_path).exists()) + + @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") + def test_truncated_tree_warning_propagates(self, mock_get): + contents_resp = _make_response(200, self._parent_listing()) + trees_resp = _make_response(200, {"sha": TREE_SHA, "tree": [], "truncated": True}) + + mock_get.side_effect = _make_get_side_effect(contents_resp, trees_resp) + + with self.assertLogs("mindsdb.integrations.utilities.community_handler_fetcher", level="WARNING") as cm: + fetch_handler(HANDLER, self.storage) + + self.assertTrue(any("truncated" in line for line in cm.output)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/utilities/test_config.py b/tests/unit/utilities/test_config.py index 88113161409..d5bd93d46b7 100644 --- a/tests/unit/utilities/test_config.py +++ b/tests/unit/utilities/test_config.py @@ -39,3 +39,23 @@ def test_invalid_mindsdb_db_con_raises_error(self): error_message = str(exc_info.value) assert "Invalid MINDSDB_DB_CON value" in error_message assert invalid_db_con in error_message + + def test_knowledge_bases_storage_env_does_not_override_storage_config(self): + Config._Config__instance = None + + with tempfile.TemporaryDirectory() as tmpdir: + config_file = Path(tmpdir) / "config.json" + config_file.write_text(json.dumps({})) + + with patch.dict( + os.environ, + { + "MINDSDB_CONFIG_PATH": str(config_file), + "MINDSDB_STORAGE_DIR": tmpdir, + "KNOWLEDGE_BASES_STORAGE": "faiss, pgvector", + }, + clear=False, + ): + cfg = Config() + + assert cfg["knowledge_bases"]["storage"] is None diff --git a/tests/unit/various/test_llm_utils.py b/tests/unit/various/test_llm_utils.py index 28c7f41f960..d5df7a77e42 100644 --- a/tests/unit/various/test_llm_utils.py +++ b/tests/unit/various/test_llm_utils.py @@ -1,86 +1,12 @@ import unittest -from textwrap import dedent, indent from numpy import int64 import pandas as pd -from mindsdb.integrations.libs.llm.utils import ft_chat_formatter, ft_code_formatter, ft_cqa_formatter -from mindsdb.integrations.libs.llm.utils import ft_jsonl_validation, ft_chat_format_validation from mindsdb.integrations.libs.llm.utils import get_completed_prompts class TestLLM(unittest.TestCase): - @classmethod - def setUpClass(cls): - # used in `test_ft_chat_format_validation` - cls.valid_chats = [ - # u/a pattern - [ - {"role": "user", "content": "hi"}, - {"role": "assistant", "content": "hello"}, - {"role": "user", "content": "how are you?"}, - {"role": "assistant", "content": "I'm good, thanks"}, - ], - # u/a pattern - [ - {"role": "user", "content": "hi"}, - {"role": "assistant", "content": "hello"}, - {"role": "user", "content": "how are you?"}, - ], - # s/u/a pattern - [ - {"role": "system", "content": "you are a useful assistant."}, - {"role": "user", "content": "hello"}, - {"role": "assistant", "content": "how are you?"}, - ], - # s/u/a pattern - [ - {"role": "system", "content": "you are a useful assistant."}, - {"role": "user", "content": "hello"}, - {"role": "assistant", "content": "how are you?"}, - {"role": "user", "content": "I'm good, thanks"}, - ], - ] - - # used in `test_ft_chat_format_validation` - cls.invalid_chats = [ - # invalid - repeated user - [ - {"role": "user", "content": "hi"}, - {"role": "user", "content": "hello"}, # this is invalid - {"role": "assistant", "content": "how are you?"}, - {"role": "user", "content": "I'm good, thanks"}, - ], - # invalid - repeated assistant - [ - {"role": "user", "content": "hi"}, - {"role": "assistant", "content": "hello"}, - {"role": "assistant", "content": "how are you?"}, # this is invalid - {"role": "user", "content": "I'm good, thanks"}, - ], - # invalid - incorrect system prompt order - [ - {"role": "user", "content": "hi"}, - {"role": "assistant", "content": "hello"}, - {"role": "system", "content": "you are a useful assistant."}, # this is invalid - {"role": "user", "content": "I'm good, thanks"}, - ], - # invalid roles - [ - {"role": "user", "content": "hi"}, - {"role": "invalid", "content": "this is an invalid role"}, - ], - # invalid content - [ - {"role": "user", "content": "hi"}, - {"role": "assistant", "content": None}, # should always be a string - ], - # invalid - no assistant in the chat - [ - {"role": "user", "content": "hi"}, - ], - ] - def test_get_completed_prompts(self): placeholder = "{{text}}" prefix = "You are a helpful assistant. Here is the user's input:" @@ -107,160 +33,3 @@ def test_get_completed_prompts(self): df = pd.DataFrame({"text": user_inputs}) with self.assertRaises(Exception): get_completed_prompts(base_template, df) - - def test_ft_chat_format_validation(self): - for chat in self.valid_chats: - ft_chat_format_validation(chat) # if chat is valid, returns `None` - - for chat in self.invalid_chats: - with self.assertRaises(Exception): - ft_chat_format_validation(chat) # all of these should raise an Exception - - def test_ft_chat_formatter(self): - # 1a. long DF with required columns (`role` and `content`) - df = pd.DataFrame( - { - "role": ["system", "user", "assistant", "user"], - "content": ["you are a helpful assistant", "hello", "hi, how can I help?", "I'm good, thanks"], - } - ) - chats = ft_chat_formatter(df) - assert list(chats[0].keys()) == ["messages"] - ft_chat_format_validation(chats[0]["messages"]) # valid, returns None - - # 1b. add `chat_id` to df - df = pd.DataFrame( - { - "chat_id": [1, 1, 1, 2, 2, 2], - "role": ["system", "user", "assistant"] * 2, - "content": ["you are a helpful assistant", "hello", "hi, how can I help?"] * 2, - } - ) - # add extra row at the end, belonging to first chat. This checks sorting. - df = pd.concat([df, pd.DataFrame({"chat_id": [1], "role": ["user"], "content": ["I'm good, thanks"]})]) - chats = ft_chat_formatter(df) - for chat in chats: - assert list(chat.keys()) == ["messages"] - ft_chat_format_validation(chat["messages"]) # valid, returns None - - # 1c. add `message_id` to df (scrambled to check sorting) - df = pd.DataFrame( - { - "chat_id": [1, 2, 1, 2, 1, 2], - "message_id": [1, 1, 2, 2, 3, 3], - "role": ["system", "system", "user", "user", "assistant", "assistant"], - "content": ["you are a helpful assistant"] * 2 + ["hello"] * 2 + ["hi, how can I help?"] * 2, - } - ) - chats = ft_chat_formatter(df) - for chat in chats: - assert list(chat.keys()) == ["messages"] - ft_chat_format_validation(chat["messages"]) # valid, returns None - - # 2a. json format - df contains single column `chat_json` - df = pd.DataFrame( - { - "chat_json": [ - '{"messages": [{"role": "user", "content": "hi"}, {"role": "assistant", "content": "hello"}]}' - ] - } - ) - chats = ft_chat_formatter(df) - assert list(chats[0].keys()) == ["messages"] - ft_chat_format_validation(chats[0]["messages"]) # valid, returns None - - def test_ft_jsonl_validation(self): - df = pd.DataFrame( - { - "role": ["system", "user", "assistant", "user"], - "content": ["you are a helpful assistant", "hello", "hi, how can I help?", "I'm good, thanks"], - } - ) - chats = ft_chat_formatter(df) - - # when validated, this method won't return anything - assert ft_jsonl_validation([line for line in chats]) is None - - # otherwise, it raises an Exception - chats = ft_chat_formatter(df) - chats[0]["messages"][1]["role"] = "invalid" - with self.assertRaises(Exception): - ft_jsonl_validation([line for line in chats]) - - def test_ft_code_formatter(self): - df = pd.DataFrame( - { - "code": [ - "".join( - [ - indent( - dedent("""\ - # format chunks into prompts - roles = [] - contents = [] - - for idx in range(0, len(chunks), 3): - """), - " " * 4 * 2, - ), # mind the base indent level - indent( - dedent( - """pre, mid, suf = chunks[idx:idx+3] - - interleaved = list(itertools.chain(*zip(templates, (pre, mid, suf)))) - """ - ), - " " * 4 * 3, - ), # mind the base indent level - ] - ) - ] - } - ) - df2 = ft_code_formatter(df, chunk_size=110) - - assert list(df2["role"]) == ["system", "user", "assistant"] - assert ( - df2["content"].iloc[0] - == "You are a powerful text to code model. Your job is to provide great code completions. As context, you are given code that is found immediately before and after the code you must generate.\n\nYou must output the code that should go in between the prefix and suffix.\n\n" - ) # noqa - assert ( - df2["content"].iloc[1] - == "### Code prefix:\n # format chunks into prompts\n roles = []\n contents = []\n\n\n### Code suffix:\n interleaved = list(itertools.chain(*zip(templates, (pre, mid, suf))))\n\n### Completion:" - ) # noqa - assert ( - df2["content"].iloc[2] - == " for idx in range(0, len(chunks), 3):\n pre, mid, suf = chunks[idx:idx+3]\n\n" - ) # noqa - - df2 = ft_code_formatter(df, format="fim", chunk_size=110) - assert list(df2["role"]) == ["system", "user", "assistant"] - assert ( - df2["content"].iloc[0] - == "You are a powerful text to code model. Your job is to provide great code completions. As context, you are given code that is found immediately before and after the code you must generate.\n\nYou must output the code that should go in between the prefix and suffix.\n\n" - ) # noqa - assert ( - df2["content"].iloc[1] - == "
\n        # format chunks into prompts\n        roles = []\n        contents = []\n\n\n\n                                interleaved = list(itertools.chain(*zip(templates, (pre, mid, suf))))\n\n"
-        )  # noqa
-        assert (
-            df2["content"].iloc[2]
-            == "        for idx in range(0, len(chunks), 3):\n            pre, mid, suf = chunks[idx:idx+3]\n\n"
-        )  # noqa
-
-    def test_ft_cqa_formatter(self):
-        df = pd.DataFrame(
-            {
-                "instruction": ["Answer accurately."],
-                "context": ["You are a helpful assistant."],
-                "question": ["What is the capital of France?"],
-                "answer": ["Paris"],
-            }
-        )
-
-        df2 = ft_cqa_formatter(df)
-
-        assert list(df2["role"]) == ["system", "user", "assistant"]
-        assert df2["content"].iloc[0] == "Answer accurately.\nYou are a helpful assistant."
-        assert df2["content"].iloc[1] == "What is the capital of France?"
-        assert df2["content"].iloc[2] == "Paris"
diff --git a/tests/unit/various/test_main.py b/tests/unit/various/test_main.py
new file mode 100644
index 00000000000..689a9eb93af
--- /dev/null
+++ b/tests/unit/various/test_main.py
@@ -0,0 +1,183 @@
+import pathlib
+import shutil
+from unittest.mock import patch
+import pytest
+
+
+class TestMainCleanup:
+    @pytest.fixture
+    def patch_main_config(self, tmp_path, monkeypatch):
+        import mindsdb.__main__ as main_mod
+
+        monkeypatch.setattr(main_mod, "config", {"paths": {"tmp": tmp_path}})
+        return tmp_path, main_mod
+
+    @pytest.fixture
+    def errors(self, caplog):
+        """Capture only ERROR logs as concatenated text"""
+
+        class ErrorCapture:
+            @property
+            def text(self):
+                return "\n".join(r.getMessage() for r in caplog.records if r.levelname == "ERROR")
+
+        caplog.clear()
+        caplog.set_level("ERROR")
+        return ErrorCapture()
+
+    def test_cleans_files_and_dirs_but_keeps_tmp_path(self, patch_main_config):
+        tmp_path, main_mod = patch_main_config
+        (tmp_path / "a.txt").write_text("hello")
+        sub = tmp_path / "sub"
+        sub.mkdir()
+        (sub / "b.txt").write_text("world")
+
+        main_mod.clean_mindsdb_tmp_dir()
+
+        assert tmp_path.exists(), "tmp_path itself should not be deleted"
+        assert list(tmp_path.iterdir()) == [], "All content should be removed"
+
+    def test_empty_directory(self, patch_main_config):
+        tmp_path, main_mod = patch_main_config
+        main_mod.clean_mindsdb_tmp_dir()
+        assert tmp_path.exists()
+        assert list(tmp_path.iterdir()) == []
+
+    def test_deeply_nested_directories(self, patch_main_config):
+        tmp_path, main_mod = patch_main_config
+        deep = tmp_path / "a" / "b" / "c" / "d"
+        deep.mkdir(parents=True)
+        (deep / "file.txt").write_text("deep")
+
+        main_mod.clean_mindsdb_tmp_dir()
+
+        assert tmp_path.exists()
+        assert not (tmp_path / "a").exists()
+
+    def test_symlinks_are_handled(self, patch_main_config):
+        tmp_path, main_mod = patch_main_config
+
+        external_file = tmp_path.parent / "external.txt"
+        external_file.write_text("external")
+
+        (tmp_path / "link_to_external").symlink_to(external_file)
+
+        main_mod.clean_mindsdb_tmp_dir()
+
+        assert tmp_path.exists()
+        assert list(tmp_path.iterdir()) == []
+        assert external_file.exists()
+
+        external_file.unlink()
+
+    def test_unlink_failure_continues_and_logs(self, patch_main_config, errors):
+        tmp_path, main_mod = patch_main_config
+        (tmp_path / "ok1.txt").write_text("a")
+        (tmp_path / "failing_file.txt").write_text("b")
+        (tmp_path / "ok2.txt").write_text("c")
+
+        original_unlink = pathlib.Path.unlink
+
+        def mock_unlink(self, *args, **kwargs):
+            if self.name == "failing_file.txt":
+                raise PermissionError("Cannot delete file")
+            return original_unlink(self, *args, **kwargs)
+
+        with patch.object(pathlib.Path, "unlink", mock_unlink):
+            main_mod.clean_mindsdb_tmp_dir()
+
+        txt = errors.text
+        assert "Failed to clean" in txt
+        assert "Cannot delete file" in txt
+
+        assert not (tmp_path / "ok1.txt").exists()
+        assert not (tmp_path / "ok2.txt").exists()
+        assert (tmp_path / "failing_file.txt").exists()
+
+    def test_rmtree_failure_continues_and_logs(self, patch_main_config, errors):
+        tmp_path, main_mod = patch_main_config
+
+        (tmp_path / "file.txt").write_text("content")
+        (tmp_path / "failing_dir").mkdir()
+        (tmp_path / "another_file.txt").write_text("more content")
+        (tmp_path / "good_dir").mkdir()
+
+        original_rmtree = shutil.rmtree
+
+        def mock_rmtree(path, *args, **kwargs):
+            if "failing_dir" in str(path):
+                raise PermissionError("Cannot delete directory")
+            return original_rmtree(path, *args, **kwargs)
+
+        with patch("shutil.rmtree", mock_rmtree):
+            main_mod.clean_mindsdb_tmp_dir()
+
+        txt = errors.text
+        assert "Failed to clean" in txt
+        assert "Cannot delete directory" in txt
+
+        assert not (tmp_path / "file.txt").exists()
+        assert not (tmp_path / "another_file.txt").exists()
+        assert not (tmp_path / "good_dir").exists()
+        assert (tmp_path / "failing_dir").exists()
+
+    def test_mixed_failures_continue_cleanup(self, patch_main_config, errors):
+        tmp_path, main_mod = patch_main_config
+
+        (tmp_path / "good_file1.txt").write_text("a")
+        (tmp_path / "failing_file.txt").write_text("b")
+        (tmp_path / "good_file2.txt").write_text("c")
+        (tmp_path / "failing_dir").mkdir()
+        (tmp_path / "good_dir").mkdir()
+
+        original_unlink = pathlib.Path.unlink
+        original_rmtree = shutil.rmtree
+
+        def mock_unlink(self, *args, **kwargs):
+            if self.name == "failing_file.txt":
+                raise PermissionError("Cannot delete file")
+            return original_unlink(self, *args, **kwargs)
+
+        def mock_rmtree(path, *args, **kwargs):
+            if "failing_dir" in str(path):
+                raise PermissionError("Cannot delete directory")
+            return original_rmtree(path, *args, **kwargs)
+
+        with patch.object(pathlib.Path, "unlink", mock_unlink), patch("shutil.rmtree", mock_rmtree):
+            main_mod.clean_mindsdb_tmp_dir()
+
+        txt = errors.text
+        # We should have at least two "Failed to clean" lines (file + dir)
+        assert txt.count("Failed to clean") >= 2
+
+        assert not (tmp_path / "good_file1.txt").exists()
+        assert not (tmp_path / "good_file2.txt").exists()
+        assert not (tmp_path / "good_dir").exists()
+        assert (tmp_path / "failing_file.txt").exists()
+        assert (tmp_path / "failing_dir").exists()
+
+    def test_nonexistent_tmp_path(self, monkeypatch):
+        import mindsdb.__main__ as main_mod
+        from pathlib import Path
+
+        nonexistent = Path("/tmp/nonexistent_mindsdb_test_dir_12345")
+        assert not nonexistent.exists()
+
+        monkeypatch.setattr(main_mod, "config", {"paths": {"tmp": nonexistent}})
+        main_mod.clean_mindsdb_tmp_dir()
+        assert not nonexistent.exists()
+
+    def test_logger_called_with_correct_level(self, patch_main_config):
+        tmp_path, main_mod = patch_main_config
+        (tmp_path / "failing_file.txt").write_text("content")
+
+        original_unlink = pathlib.Path.unlink
+
+        def mock_unlink(self, *args, **kwargs):
+            if self.name == "failing_file.txt":
+                raise PermissionError("Test error")
+            return original_unlink(self, *args, **kwargs)
+
+        with patch.object(pathlib.Path, "unlink", mock_unlink), patch("mindsdb.__main__.logger") as mock_logger:
+            main_mod.clean_mindsdb_tmp_dir()
+            assert mock_logger.error.called or mock_logger.exception.called
diff --git a/tests/unit/various/test_rag_config_loader.py b/tests/unit/various/test_rag_config_loader.py
deleted file mode 100644
index fc555a553ff..00000000000
--- a/tests/unit/various/test_rag_config_loader.py
+++ /dev/null
@@ -1,108 +0,0 @@
-from unittest.mock import Mock
-from mindsdb.integrations.utilities.rag.settings import (
-    RetrieverType,
-    MultiVectorRetrieverMode,
-    SearchType,
-    RAGPipelineModel,
-)
-from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
-
-
-def test_load_rag_config_empty():
-    """Test loading RAG config with empty parameters"""
-    config = load_rag_config({})
-    assert isinstance(config, RAGPipelineModel)
-
-
-def test_load_rag_config_basic():
-    """Test loading RAG config with basic parameters"""
-    base_config = {"retriever_type": RetrieverType.VECTOR_STORE.value, "search_type": SearchType.SIMILARITY.value}
-    config = load_rag_config(base_config)
-
-    assert isinstance(config, RAGPipelineModel)
-    assert config.retriever_type == RetrieverType.VECTOR_STORE
-    assert config.search_type == SearchType.SIMILARITY
-
-
-def test_load_rag_config_with_search_kwargs():
-    """Test loading RAG config with search kwargs"""
-    base_config = {
-        "retriever_type": RetrieverType.VECTOR_STORE.value,
-        "search_type": SearchType.SIMILARITY.value,
-        "search_kwargs": {"k": 5},
-    }
-    config = load_rag_config(base_config)
-
-    assert isinstance(config, RAGPipelineModel)
-    assert config.search_kwargs.k == 5
-
-
-def test_load_rag_config_with_embedding_model():
-    """Test loading RAG config with embedding model"""
-    base_config = {"retriever_type": RetrieverType.VECTOR_STORE.value, "search_type": SearchType.SIMILARITY.value}
-
-    # Create a mock that's a subclass of Embeddings
-    class MockEmbeddings:
-        def embed_documents(self, texts):
-            return [[0.0] * 10] * len(texts)
-
-        def embed_query(self, text):
-            return [0.0] * 10
-
-    embedding_model = MockEmbeddings()
-    config = load_rag_config(base_config, embedding_model=embedding_model)
-
-    assert isinstance(config, RAGPipelineModel)
-    assert config.embedding_model == embedding_model
-
-
-def test_load_rag_config_with_multi_vector_mode():
-    """Test loading RAG config with multi vector mode"""
-    base_config = {
-        "retriever_type": RetrieverType.VECTOR_STORE.value,
-        "search_type": SearchType.SIMILARITY.value,
-        "multi_retriever_mode": MultiVectorRetrieverMode.SPLIT.value,  # Use correct enum value
-    }
-    config = load_rag_config(base_config)
-
-    assert isinstance(config, RAGPipelineModel)
-    assert config.retriever_type == RetrieverType.VECTOR_STORE
-    assert config.search_type == SearchType.SIMILARITY
-    assert config.multi_retriever_mode == MultiVectorRetrieverMode.SPLIT
-
-
-def test_load_rag_config_with_kb_params():
-    """Test loading RAG config with knowledge base parameters"""
-    base_config = {"retriever_type": RetrieverType.VECTOR_STORE.value, "search_type": SearchType.SIMILARITY.value}
-    kb_params = {"search_kwargs": {"k": 5}}
-    config = load_rag_config(base_config, kb_params)
-
-    assert isinstance(config, RAGPipelineModel)
-    assert config.search_kwargs.k == 5
-
-
-def test_load_rag_config_with_vector_store_config():
-    """Test loading RAG config with vector store config"""
-    base_config = {"retriever_type": RetrieverType.VECTOR_STORE.value, "search_type": SearchType.SIMILARITY.value}
-    kb_params = {"vector_store_config": {"kb_table": Mock()}}
-    config = load_rag_config(base_config, kb_params)
-
-    assert isinstance(config, RAGPipelineModel)
-    assert config.vector_store_config.kb_table == kb_params["vector_store_config"]["kb_table"]
-
-
-def test_load_rag_config_from_knowledge_base():
-    """Test RAG config loading in knowledge base context"""
-    base_config = {
-        "retriever_type": RetrieverType.VECTOR_STORE.value,
-        "search_type": SearchType.SIMILARITY.value,
-        "search_kwargs": {"k": 5},
-    }
-    kb_params = {"vector_store_config": {"kb_table": Mock()}}
-    config = load_rag_config(base_config, kb_params)
-
-    assert isinstance(config, RAGPipelineModel)
-    assert config.retriever_type == RetrieverType.VECTOR_STORE
-    assert config.search_type == SearchType.SIMILARITY
-    assert config.search_kwargs.k == 5
-    assert config.vector_store_config.kb_table == kb_params["vector_store_config"]["kb_table"]
diff --git a/tests/unit/various/test_retrieval_tool.py b/tests/unit/various/test_retrieval_tool.py
deleted file mode 100644
index 93f4b3c3296..00000000000
--- a/tests/unit/various/test_retrieval_tool.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import pytest
-from unittest.mock import Mock
-from mindsdb.integrations.utilities.rag.settings import (
-    RetrieverType,
-    MultiVectorRetrieverMode,
-    VectorStoreConfig,
-    DEFAULT_LLM_MODEL,
-    DEFAULT_TEST_TABLE_NAME,
-    DEFAULT_CHUNK_SIZE,
-)
-from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
-
-
-@pytest.fixture
-def mock_tools_config():
-    return {
-        "retriever_type": "vector_store",
-        "multi_retriever_mode": "both",
-        "embedding_model": Mock(),
-        "documents": [Mock()],
-        "vector_store_config": {"vector_store_type": "chromadb", "collection_name": "test"},
-        "invalid_param": "should_be_filtered_out",
-    }
-
-
-def test_rag_params_conversion():
-    """Test that parameters are correctly converted to RAGPipelineModel"""
-    tools_config = {
-        "retriever_type": "vector_store",
-        "multi_retriever_mode": "both",
-    }
-    rag_config = load_rag_config(tools_config)
-    assert rag_config.retriever_type == RetrieverType.VECTOR_STORE
-    assert rag_config.multi_retriever_mode == MultiVectorRetrieverMode.BOTH
-
-
-def test_invalid_params():
-    """Test that invalid enum values raise appropriate errors"""
-    tools_config = {
-        "retriever_type": "invalid_type",
-    }
-    with pytest.raises(ValueError):
-        load_rag_config(tools_config)
-
-    tools_config = {"invalid_param": "invalid_type"}
-    with pytest.raises(ValueError):
-        load_rag_config(tools_config)
-
-
-def test_vector_store_config_conversion():
-    """Test that vector store config is properly handled"""
-    tools_config = {"vector_store_config": {"vector_store_type": "chromadb", "collection_name": "test"}}
-    rag_config = load_rag_config(tools_config)
-    assert isinstance(rag_config.vector_store_config, VectorStoreConfig)
-    assert rag_config.vector_store_config.collection_name == "test"
-
-
-def test_default_values():
-    """Test that default values are properly set"""
-    tools_config = {}
-    rag_config = load_rag_config(tools_config)
-    # Test default enum values
-    assert rag_config.retriever_type == RetrieverType.VECTOR_STORE
-    assert rag_config.multi_retriever_mode == MultiVectorRetrieverMode.BOTH
-    # Test other default values
-    assert rag_config.llm_model_name == DEFAULT_LLM_MODEL
-    assert rag_config.table_name == DEFAULT_TEST_TABLE_NAME
-    assert rag_config.chunk_size == DEFAULT_CHUNK_SIZE
-    assert isinstance(rag_config.vector_store_config, VectorStoreConfig)
-
-
-@pytest.mark.parametrize(
-    "field,value,expected",
-    [
-        ("retriever_type", "auto", RetrieverType.AUTO),
-        ("multi_retriever_mode", "split", MultiVectorRetrieverMode.SPLIT),
-        ("chunk_size", 500, 500),
-    ],
-)
-def test_field_assignments(field, value, expected):
-    """Test various field assignments"""
-    tools_config = {field: value}
-    rag_config = load_rag_config(tools_config)
-    assert getattr(rag_config, field) == expected
diff --git a/tests/unused/integration/a2a/__init__.py b/tests/unused/integration/a2a/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/unused/integration/a2a/test_a2a_streaming.py b/tests/unused/integration/a2a/test_a2a_streaming.py
deleted file mode 100644
index 71032a7e679..00000000000
--- a/tests/unused/integration/a2a/test_a2a_streaming.py
+++ /dev/null
@@ -1,436 +0,0 @@
-#!/usr/bin/env python
-import sys
-import json
-import time
-import requests
-import uuid
-import argparse
-import pytest
-import os
-from typing import Dict, List, Set
-
-# Default test configuration
-DEFAULT_HOST = "0.0.0.0"
-DEFAULT_PORT = 10002
-DEFAULT_TIMEOUT = 120  # seconds
-DEFAULT_QUERY = "What's the average price for a one bed?"
-DEFAULT_AGENT_NAME = "my_agent"  # Default agent name to use for requests
-
-"""
-Run instructions:
-
-start A2A server and mindsdb
-
-You should have an agent created in MindsDB (this script does NOT create the agent)
-
-By default the agent is named "my_agent"
-
-e.g.
-
-CREATE AGENT my_agent
-USING
-
-    model='gemini-2.0-flash',
-    include_knowledge_bases=['mindsdb.kb_test'],
-    include_tables=['postgresql_conn.home_rentals', 'postgresql_conn2.car_info'],
-    prompt_template='
-   mindsdb.kb_test knowledge base has info about cities
-   postgresql_conn.home_rentals database tables has rental data
-   postgresql_conn2.car_info contains info on cars specs
-    ';
-
-source .venv/bin/activate
-python test_a2a_streaming.py --host 0.0.0.0 --port 10002 --timeout 180 "What's the average price of homes in berkeley_hills?"
-"""
-
-
-def generate_uuid() -> str:
-    """Generate a random UUID string."""
-    return str(uuid.uuid4()).replace("-", "")
-
-
-def stream_a2a_query(
-    query: str,
-    host: str = DEFAULT_HOST,
-    port: int = DEFAULT_PORT,
-    timeout: int = DEFAULT_TIMEOUT,
-    verbose: bool = False,
-    agent_name: str = DEFAULT_AGENT_NAME,  # Added agent_name parameter with default
-):
-    """
-    Stream a query to the A2A server and yield the responses incrementally.
-
-    Args:
-        query: The text query to send to the agent
-        host: A2A server host
-        port: A2A server port
-        timeout: Maximum time to wait for responses (seconds)
-        verbose: Whether to print responses to stdout
-        agent_name: Name of the agent to use (REQUIRED - the A2A API requires an explicit agent name)
-
-    Yields:
-        Dict: Response messages from the A2A server
-    """
-    # Generate unique IDs for the request
-    task_id = generate_uuid()
-    session_id = generate_uuid()
-    request_id = generate_uuid()
-
-    # Prepare the request payload
-    payload = {
-        "jsonrpc": "2.0",
-        "id": request_id,
-        "method": "tasks/sendSubscribe",
-        "params": {
-            "id": task_id,
-            "sessionId": session_id,
-            "message": {
-                "role": "user",
-                "parts": [
-                    {"type": "text", "text": query},
-                ],
-                "metadata": {"agent_name": agent_name},  # Using the provided agent_name
-            },
-            "acceptedOutputModes": ["text/plain"],
-        },
-    }
-
-    url = f"http://{host}:{port}/a2a"
-    if verbose:
-        print(f"Sending streaming request to {url}")
-        print(f"Query: {query}")
-        print("Streaming responses:")
-        print("-" * 60)
-
-    # Set up headers for SSE
-    headers = {
-        "Content-Type": "application/json",
-        "Accept": "text/event-stream",
-        "Cache-Control": "no-cache",
-        "Connection": "keep-alive",
-    }
-
-    # Track seen messages to avoid duplicates
-    seen_messages: Set[str] = set()
-    all_responses: List[Dict] = []
-    start_time = time.time()
-
-    try:
-        # Make the streaming request
-        with requests.post(url, json=payload, headers=headers, stream=True) as response:
-            if not response.ok:
-                error_msg = f"Error: HTTP {response.status_code} - {response.text}"
-                if verbose:
-                    print(error_msg)
-                yield {"error": error_msg}
-                return
-
-            # Process the SSE stream
-            buffer = ""
-            for chunk in response.iter_content(chunk_size=1):
-                # Check timeout
-                if time.time() - start_time > timeout:
-                    error_msg = f"Timeout after {timeout} seconds"
-                    if verbose:
-                        print(error_msg)
-                    yield {"error": error_msg, "timeout": True}
-                    return
-
-                if not chunk:
-                    continue
-
-                # Decode the chunk and add to buffer
-                buffer += chunk.decode("utf-8")
-
-                # Process complete lines
-                while "\n" in buffer:
-                    line, buffer = buffer.split("\n", 1)
-                    line = line.rstrip()
-
-                    # Skip empty lines
-                    if not line:
-                        continue
-
-                    # Process data lines
-                    if line.startswith("data:"):
-                        data_str = line[5:].strip()
-                        if not data_str:
-                            continue
-
-                        try:
-                            # Parse the JSON data
-                            data = json.loads(data_str)
-
-                            # Extract and display content
-                            if "result" in data:
-                                result = data["result"]
-
-                                # Handle status updates
-                                if "status" in result:
-                                    message = result["status"].get("message", {})
-                                    parts = message.get("parts", [])
-
-                                    for part in parts:
-                                        # Get content and metadata
-                                        content = part.get("text", "")
-                                        metadata = part.get("metadata", {})
-                                        thought_type = metadata.get("thought_type", "")
-
-                                        # Create a unique key for deduplication
-                                        message_key = f"{thought_type}:{content}"
-
-                                        # Skip if we've seen this message before
-                                        if message_key in seen_messages:
-                                            continue
-
-                                        # Add to seen messages
-                                        seen_messages.add(message_key)
-
-                                        # Create response object
-                                        response_obj = {
-                                            "type": thought_type
-                                            or part.get("type", "text"),
-                                            "content": content,
-                                            "metadata": metadata,
-                                        }
-
-                                        # Display based on thought type
-                                        if verbose:
-                                            if thought_type == "thought":
-                                                print(f"Thought: {content}")
-                                            elif thought_type == "observation":
-                                                print(f"Observation: {content}")
-                                            elif thought_type == "sql":
-                                                print(f"SQL Query: {content}")
-                                            elif part.get("type") == "text":
-                                                print(content)
-                                            sys.stdout.flush()
-
-                                        # Yield the response
-                                        yield response_obj
-                                        all_responses.append(response_obj)
-
-                                # Handle artifact updates
-                                if "artifact" in result:
-                                    artifact = result["artifact"]
-                                    parts = artifact.get("parts", [])
-
-                                    for part in parts:
-                                        content = part.get("text", "")
-                                        if content:
-                                            response_obj = {
-                                                "type": "answer",
-                                                "content": content,
-                                            }
-                                            if verbose:
-                                                print(content)
-                                                sys.stdout.flush()
-                                            yield response_obj
-                                            all_responses.append(response_obj)
-
-                                # Handle completion
-                                if result.get("final"):
-                                    response_obj = {"type": "completion", "final": True}
-                                    if verbose:
-                                        print("\n[Completed]")
-                                        sys.stdout.flush()
-                                    yield response_obj
-                                    all_responses.append(response_obj)
-
-                            # Handle errors
-                            elif "error" in data:
-                                error_msg = data["error"].get("message", "")
-                                response_obj = {"error": error_msg}
-                                if verbose:
-                                    print(f"Error: {error_msg}")
-                                    sys.stdout.flush()
-                                yield response_obj
-                                all_responses.append(response_obj)
-
-                        except json.JSONDecodeError:
-                            if verbose:
-                                print(f"Warning: Invalid JSON: {data_str[:50]}...")
-                                sys.stdout.flush()
-
-                    # Process event end
-                    elif line == "":
-                        # Event boundary - process the event
-                        pass
-
-    except KeyboardInterrupt:
-        if verbose:
-            print("\nInterrupted by user")
-        yield {"error": "Interrupted by user"}
-    except Exception as e:
-        error_msg = f"Error: {str(e)}"
-        if verbose:
-            print(error_msg)
-        yield {"error": error_msg}
-
-    # Return all collected responses
-    return all_responses
-
-
-def run_manual_test():
-    """Run a manual test with command line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Test A2A streaming with direct requests"
-    )
-    parser.add_argument("--host", default=DEFAULT_HOST, help="A2A server host")
-    parser.add_argument(
-        "--port", type=int, default=DEFAULT_PORT, help="A2A server port"
-    )
-    parser.add_argument(
-        "--timeout", type=int, default=DEFAULT_TIMEOUT, help="Timeout in seconds"
-    )
-    parser.add_argument(
-        "--agent-name", default=DEFAULT_AGENT_NAME, help="Name of the agent to use"
-    )
-    parser.add_argument(
-        "query", nargs="?", default=DEFAULT_QUERY, help="Query to send to the agent"
-    )
-
-    args = parser.parse_args()
-
-    # Run with verbose output for manual testing
-    for _ in stream_a2a_query(
-        args.query,
-        args.host,
-        args.port,
-        args.timeout,
-        verbose=True,
-        agent_name=args.agent_name,
-    ):
-        pass  # Just consume the generator to display output
-
-
-@pytest.mark.integration
-def test_a2a_streaming_integration():
-    """
-    Integration test for A2A streaming functionality.
-
-    This test requires a running A2A server. It can be configured with environment variables:
-    - A2A_TEST_HOST: A2A server host (default: 0.0.0.0)
-    - A2A_TEST_PORT: A2A server port (default: 10002)
-    - A2A_TEST_QUERY: Query to send (default: "What's the average price for a one bed?")
-    - A2A_TEST_TIMEOUT: Timeout in seconds (default: 120)
-    - A2A_TEST_AGENT_NAME: Agent name to use (default: my_agent)
-
-    IMPORTANT: You must create the agent in MindsDB before running this test.
-    """
-    # Get configuration from environment variables
-    host = os.environ.get("A2A_TEST_HOST", DEFAULT_HOST)
-    port = int(os.environ.get("A2A_TEST_PORT", DEFAULT_PORT))
-    query = os.environ.get("A2A_TEST_QUERY", DEFAULT_QUERY)
-    timeout = int(os.environ.get("A2A_TEST_TIMEOUT", DEFAULT_TIMEOUT))
-    agent_name = os.environ.get("A2A_TEST_AGENT_NAME", DEFAULT_AGENT_NAME)
-
-    # Check if we should skip the test
-    skip_test = os.environ.get("SKIP_A2A_TEST", "false").lower() == "true"
-    if skip_test:
-        pytest.skip(
-            "Skipping A2A test as requested by SKIP_A2A_TEST environment variable"
-        )
-
-    # First, check if the agent exists by making a simple request
-    try:
-        # Make a simple request to check if the agent exists
-        url = f"http://{host}:{port}/a2a"
-        check_payload = {
-            "jsonrpc": "2.0",
-            "id": generate_uuid(),
-            "method": "tasks/send",
-            "params": {
-                "id": generate_uuid(),
-                "sessionId": generate_uuid(),
-                "message": {
-                    "role": "user",
-                    "parts": [{"type": "text", "text": "test"}],
-                    "metadata": {"agent_name": agent_name},
-                },
-            },
-        }
-
-        response = requests.post(url, json=check_payload, timeout=10)
-        if response.status_code == 404 or "not found" in response.text.lower():
-            pytest.skip(
-                f"Agent '{agent_name}' not found. Please create the agent before running this test."
-            )
-    except Exception as e:
-        pytest.skip(f"Error checking if agent exists: {str(e)}")
-
-    # Collect all responses
-    responses = list(
-        stream_a2a_query(
-            query, host, port, timeout, verbose=False, agent_name=agent_name
-        )
-    )
-
-    # Basic assertions
-    assert len(responses) > 0, "No responses received from A2A server"
-
-    # Check for error responses, but be more tolerant of task tracking errors and validation errors
-    errors = [r for r in responses if "error" in r]
-
-    # Identify different types of non-critical errors
-    task_tracking_errors = [
-        e
-        for e in errors
-        if "Task" in e.get("error", "") and "not found" in e.get("error", "")
-    ]
-    validation_errors = [e for e in errors if "validation error" in e.get("error", "")]
-    non_critical_errors = task_tracking_errors + validation_errors
-
-    # If all errors are non-critical, we can consider the test passed
-    if len(errors) > 0 and len(non_critical_errors) == len(errors):
-        print(f"Ignoring non-critical errors: {non_critical_errors}")
-    else:
-        # If there are other types of errors, fail the test
-        real_errors = [e for e in errors if e not in non_critical_errors]
-        assert len(real_errors) == 0, f"Errors in responses: {real_errors}"
-
-    # Print response types for debugging
-    response_types = set(r.get("type", "") for r in responses)
-    print(f"Response types found: {response_types}")
-
-    # Verify we have different types of responses (thoughts, observations, etc.)
-    assert len(response_types) > 1, f"Only found response types: {response_types}"
-
-    # Look for any kind of final/completion message
-    # More flexible approach - check for any of these indicators
-    final_messages = [
-        r
-        for r in responses
-        if (
-            # Original strict check
-            (r.get("type") == "completion" and r.get("final"))
-            or r.get("final") is True
-            or r.get("type") == "answer"
-            or (
-                r.get("type") == "text"
-                and r.get("content", "").strip()
-                and len(r.get("content", "")) > 20
-            )
-        )
-    ]
-
-    print(f"Found {len(final_messages)} potential final/completion messages")
-    if len(final_messages) == 0:
-        # Print the last few responses for debugging
-        print("Last 5 responses for debugging:")
-        for r in responses[-5:]:
-            print(f"  {r}")
-
-    # More lenient assertion - just check if we got any responses at all
-    assert len(responses) > 5, "Not enough responses received"
-
-    # Skip the completion check for now as the format may vary
-    # assert len(final_messages) > 0, "No completion or final message received"
-
-    print(f"βœ… Integration test passed with {len(responses)} responses")
-    return responses
-
-
-if __name__ == "__main__":
-    # If run directly, use the manual test mode
-    run_manual_test()
diff --git a/tests/unused/integration/knowledge_bases/data/seed.sql b/tests/unused/integration/knowledge_bases/data/seed.sql
deleted file mode 100644
index 69fe7847fc2..00000000000
--- a/tests/unused/integration/knowledge_bases/data/seed.sql
+++ /dev/null
@@ -1,14 +0,0 @@
--- Make sure pgvector extension is enabled
-DROP EXTENSION IF EXISTS vector;
-CREATE EXTENSION vector;
-
--- Create the table
-CREATE TABLE IF NOT EXISTS items (
-  id text PRIMARY KEY,
-  content text NOT NULL,
-  embeddings vector NOT NULL,
-  metadata jsonb
-);
-
--- Dummy data will be handled in tests themselves. Leave it empty.
-COMMIT;
diff --git a/tests/unused/integration/knowledge_bases/mindsdb_langchain_pgvector_integration_test.py b/tests/unused/integration/knowledge_bases/mindsdb_langchain_pgvector_integration_test.py
deleted file mode 100644
index 3a16dac44d0..00000000000
--- a/tests/unused/integration/knowledge_bases/mindsdb_langchain_pgvector_integration_test.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from mindsdb.integrations.utilities.rag.loaders.vector_store_loader.pgvector import PGVectorMDB
-from mindsdb.integrations.handlers.langchain_embedding_handler.fastapi_embeddings import FastAPIEmbeddings
-
-
-def setup_pgvector_database():
-    """Setup pgvector database"""
-    # Using port 15432 to avoid conflicts with local PostgreSQL
-    connection_string = "postgresql://gateway:gateway@localhost:15432/gateway"
-
-    print(f"Connecting to: {connection_string}")
-
-    # Initialize FastAPI embeddings
-    embeddings = FastAPIEmbeddings(
-        api_base="http://localhost:8043/v1/embeddings",
-        model="sparse_model"
-    )
-
-    # Initialize PGVectorMDB
-    vector_db = PGVectorMDB(
-        connection_string=connection_string,
-        collection_name="test_dev_doc_vectors",
-        embedding_function=embeddings,
-        is_sparse=True,  # Using sparse vectors
-        vector_size=30522  # Size for sparse vectors
-    )
-
-    return vector_db
-
-
-def test_vector_queries(vector_db):
-    """Test various vector queries"""
-    print("\nTesting vector queries...")
-
-    # Test text to be embedded
-    test_text = "For the Bsecondaryl containment"
-
-    # Get embeddings for the test text
-    embedding = vector_db.embedding_function.embed_query(test_text)
-
-    # Query similar vectors
-    results = vector_db._query_collection(
-        embedding=embedding,
-        k=5
-    )
-
-    print("\nVector similarity search results:")
-    for item, distance in results:
-        print(f"Content: {item.content}")
-        print(f"Metadata: {item.metadata}")
-        print(f"Distance: {distance}")
-        print("---")
-
-
-def main():
-    # Setup vector database
-    print("\nSetting up pgvector database...")
-    vector_db = setup_pgvector_database()
-
-    # Run tests
-    test_vector_queries(vector_db)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tests/unused/integration/knowledge_bases/test_knowledge_bases.py b/tests/unused/integration/knowledge_bases/test_knowledge_bases.py
deleted file mode 100644
index a43c1a77ce5..00000000000
--- a/tests/unused/integration/knowledge_bases/test_knowledge_bases.py
+++ /dev/null
@@ -1,337 +0,0 @@
-from http import HTTPStatus
-from tempfile import TemporaryDirectory
-from time import perf_counter, sleep
-from uuid import uuid4
-
-import os
-import psycopg2
-import pytest
-
-from mindsdb.api.http.initialize import initialize_app
-from mindsdb.migrations import migrate
-from mindsdb.interfaces.storage import db
-from mindsdb.utilities.config import config
-
-# Should match table name in data/seed.sql
-TEST_TABLE_NAME = 'items'
-# Should match column names in data/seed.sql
-COLUMN_NAMES = ['id', 'content', 'embeddings', 'metadata']
-
-CONNECTION_KWARGS = {
-    'connection_data': {
-        'host': os.environ.get('MDB_TEST_PGVECTOR_HOST', '127.0.0.1'),
-        'port': os.environ.get('MDB_TEST_PGVECTOR_PORT', '5432'),
-        'user': os.environ.get('MDB_TEST_PGVECTOR_USER', 'postgres'),
-        'password': os.environ.get('MDB_TEST_PGVECTOR_PASSWORD', 'supersecret'),
-        'database': None  # Different for each test.
-    }
-}
-
-MODEL_WAIT_DURATION_SECONDS = 5
-MODEL_WAIT_SLEEP_INTERVAL_SECONDS = 0.2
-
-
-@pytest.fixture(scope="session", autouse=True)
-def app():
-    old_minds_db_con = ''
-    if 'MINDSDB_DB_CON' in os.environ:
-        old_minds_db_con = os.environ['MINDSDB_DB_CON']
-    with TemporaryDirectory(prefix='knowledge_bases_integration_test_') as temp_dir:
-        db_path = 'sqlite:///' + os.path.join(temp_dir, 'mindsdb.sqlite3.db')
-        # Need to change env variable for migrate module, since it calls db.init().
-        os.environ['MINDSDB_DB_CON'] = db_path
-        db.init()
-        migrate.migrate_to_head()
-        config["gui"]["open_on_start"] = False
-        config["gui"]["autoupdate"] = False
-        app = initialize_app()
-
-        test_client = app.test_client()
-
-        # Create langchain embedding model to use in all tests.
-        create_ml_engine_query = 'CREATE ML_ENGINE langchain_embedding FROM langchain_embedding;'
-        create_ml_engine_data = {
-            'query': create_ml_engine_query
-        }
-        response = test_client.post('/api/sql/query', json=create_ml_engine_data, follow_redirects=True)
-        assert '200' in response.status
-
-        # Create model to use in all tests. Use OpenAI for embeddings.
-        create_query = '''
-        CREATE MODEL mindsdb.test_embedding_model
-        PREDICT embeddings
-        USING
-            engine='langchain_embedding',
-            class = 'OpenAIEmbeddings',
-            input_columns = ['content'];
-        '''
-        train_data = {
-            'query': create_query
-        }
-        response = test_client.post('/api/projects/mindsdb/models', json=train_data, follow_redirects=True)
-        assert '201' in response.status
-
-        # Wait for model to complete.
-        model_complete = False
-        model_start_time = perf_counter()
-        while not model_complete:
-            if (perf_counter() - model_start_time) > MODEL_WAIT_DURATION_SECONDS:
-                pytest.fail('Model test_embedding_model did not finish training in time')
-            response = test_client.get('/api/projects/mindsdb/models/test_embedding_model')
-            model_status = response.get_json().get('status', 'error')
-            if model_status == 'complete':
-                model_complete = True
-                continue
-            if model_status == 'error':
-                pytest.fail('Model test_embedding_model encountered an error while training')
-            sleep(MODEL_WAIT_SLEEP_INTERVAL_SECONDS)
-        yield app
-    os.environ['MINDSDB_DB_CON'] = old_minds_db_con
-
-
-@pytest.fixture()
-def client(app):
-    return app.test_client()
-
-
-def init_db():
-    '''Initialize a new DB for every test.'''
-    conn_info = CONNECTION_KWARGS['connection_data'].copy()
-    conn_info['database'] = 'postgres'
-    db = psycopg2.connect(**conn_info)
-    db.autocommit = True
-    cursor = db.cursor()
-
-    try:
-        new_db_name = f'test_pgvector_{uuid4().hex}'
-        # Create the test database if it does not exist.
-        cursor.execute(f'DROP DATABASE IF EXISTS {new_db_name}')
-        db.commit()
-        cursor.execute(f'CREATE DATABASE {new_db_name};')
-        db.commit()
-
-        # Reconnect to the new database
-        conn_info['database'] = new_db_name
-        db = psycopg2.connect(**conn_info)
-        db.autocommit = True
-        cursor = db.cursor()
-
-        # Seed the database with data
-        curr_dir = os.path.dirname(os.path.realpath(__file__))
-        seed_sql_path = os.path.join(curr_dir, 'data', 'seed.sql')
-        with open(seed_sql_path, 'r') as sql_seed_file:
-            cursor.execute(sql_seed_file.read())
-        db.commit()
-
-    finally:
-        # Close the cursor and the connection
-        cursor.close()
-        db.close()
-
-    return new_db_name
-
-
-@pytest.fixture(autouse=True)
-def pgvector_database_name(client):
-    # Initialize a fresh DB for each test.
-    new_db_name = init_db()
-    # Connect new DB to MindsDB.
-    conn_info = CONNECTION_KWARGS['connection_data'].copy()
-    conn_info['database'] = new_db_name
-    example_db_data = {
-        'database': {
-            'name': new_db_name,
-            'engine': 'pgvector',
-            'parameters': conn_info
-        }
-    }
-    response = client.post('/api/databases', json=example_db_data, follow_redirects=True)
-    assert '201' in response.status
-    return new_db_name
-
-
-@pytest.mark.skipif(os.environ.get('MDB_TEST_PGVECTOR_HOST') is None, reason='MDB_TEST_PGVECTOR_HOST environment variable not set')
-@pytest.mark.skipif(os.environ.get('OPENAI_API_KEY') is None, reason='OPENAI_API_KEY environment variable not set')
-class TestKnowledgeBaseCompletion:
-    def test_chat_completion(self, client, pgvector_database_name):
-        test_kb_name = 'test_chat_completion_kb'
-        create_request = {
-            'knowledge_base': {
-                'name': test_kb_name,
-                'model': 'test_embedding_model',
-                'storage': {
-                    'table': TEST_TABLE_NAME,
-                    'database': pgvector_database_name
-                }
-            }
-        }
-        create_kb_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
-        assert create_kb_response.status_code == HTTPStatus.CREATED
-
-        # Insert documents to help answer the question.
-        rows_to_insert = [
-            {'content': 'The capital of Tyler Fantasy RAG Land is MindsDB'}
-        ]
-        update_request = {
-            'knowledge_base': {
-                'rows': rows_to_insert
-            }
-        }
-        update_kb_response = client.put(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}',
-                                        json=update_request, follow_redirects=True)
-        assert update_kb_response.status_code == HTTPStatus.OK
-
-        completion_request = {
-            'query': 'What is the capital of Tyler Fantasy RAG Land?',
-            'llm_model': 'gpt-4o'
-        }
-        response = client.post(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}/completions',
-                               json=completion_request, follow_redirects=True)
-        assert response.status_code == HTTPStatus.OK
-        response_data = response.get_json()
-        assert 'message' in response_data
-        assert 'content' in response_data['message']
-        assert 'context' in response_data['message']
-        assert response_data['message']['role'] == 'assistant'
-        # Should get the right answer.
-        assert 'mindsdb' in response_data['message']['content'].lower()
-
-    def test_context_completion_with_keywords(self, client, pgvector_database_name):
-        test_kb_name = 'test_context_completion_with_keywords'
-        create_request = {
-            'knowledge_base': {
-                'name': test_kb_name,
-                'model': 'test_embedding_model',
-                'storage': {
-                    'table': TEST_TABLE_NAME,
-                    'database': pgvector_database_name
-                }
-            }
-        }
-        create_kb_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
-        assert create_kb_response.status_code == HTTPStatus.CREATED
-
-        # Insert documents for context.
-        rows_to_insert = [
-            {'content': 'The capital of Tyler Fantasy RAG Land is MindsDB'},
-            {'content': 'The population of Tyler Fantasy RAG Land is 6'}
-        ]
-        update_request = {
-            'knowledge_base': {
-                'rows': rows_to_insert
-            }
-        }
-        update_kb_response = client.put(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}',
-                                        json=update_request, follow_redirects=True)
-        assert update_kb_response.status_code == HTTPStatus.OK
-
-        completion_request = {
-            'query': 'Population of rag land',
-            'keywords': 'population rag land',
-            'type': 'context'
-        }
-        response = client.post(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}/completions',
-                               json=completion_request, follow_redirects=True)
-        assert response.status_code == HTTPStatus.OK
-        response_data = response.get_json()
-        # Should have the most relevant document first.
-        assert 'documents' in response_data
-        assert len(response_data['documents']) == 2
-        assert 'population' in response_data['documents'][0]['content']
-
-    def test_context_completion_with_metadata(self, client, pgvector_database_name):
-        test_kb_name = 'test_context_completion_with_metadata'
-        create_request = {
-            'knowledge_base': {
-                'name': test_kb_name,
-                'model': 'test_embedding_model',
-                'storage': {
-                    'table': TEST_TABLE_NAME,
-                    'database': pgvector_database_name
-                }
-            }
-        }
-        create_kb_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
-        assert create_kb_response.status_code == HTTPStatus.CREATED
-
-        # Insert documents for context.
-        rows_to_insert = [
-            {'content': 'The capital of Tyler Fantasy RAG Land is MindsDB', 'author': 'Danya'},
-            {'content': 'The population of Tyler Fantasy RAG Land is 6', 'author': 'Tyler'},
-            {'content': 'Totally unrelated', 'author': 'Tyler'}
-
-        ]
-        update_request = {
-            'knowledge_base': {
-                'rows': rows_to_insert
-            }
-        }
-        update_kb_response = client.put(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}',
-                                        json=update_request, follow_redirects=True)
-        assert update_kb_response.status_code == HTTPStatus.OK
-
-        completion_request = {
-            'query': 'Population of rag land',
-            'metadata': {
-                'author': 'Tyler'
-            },
-            'type': 'context'
-        }
-        response = client.post(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}/completions',
-                               json=completion_request, follow_redirects=True)
-        assert response.status_code == HTTPStatus.OK
-        response_data = response.get_json()
-        assert 'documents' in response_data
-        # Only 2 have matching metadata.
-        assert len(response_data['documents']) == 2
-        # Should have the most relevant document first.
-        assert 'population' in response_data['documents'][0]['content']
-
-    def test_context_completion_with_keywords_and_metadata(self, client, pgvector_database_name):
-        test_kb_name = 'test_context_completion_with_keywords_and_metadata'
-        create_request = {
-            'knowledge_base': {
-                'name': test_kb_name,
-                'model': 'test_embedding_model',
-                'storage': {
-                    'table': TEST_TABLE_NAME,
-                    'database': pgvector_database_name
-                }
-            }
-        }
-        create_kb_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True)
-        assert create_kb_response.status_code == HTTPStatus.CREATED
-
-        # Insert documents for context.
-        rows_to_insert = [
-            {'content': 'The capital of Tyler Fantasy RAG Land is MindsDB', 'author': 'Danya'},
-            {'content': 'The population of Tyler Fantasy RAG Land is 6', 'author': 'Tyler'},
-            {'content': 'Totally unrelated', 'author': 'Tyler'}
-
-        ]
-        update_request = {
-            'knowledge_base': {
-                'rows': rows_to_insert
-            }
-        }
-        update_kb_response = client.put(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}',
-                                        json=update_request, follow_redirects=True)
-        assert update_kb_response.status_code == HTTPStatus.OK
-
-        completion_request = {
-            'query': 'Population of rag land',
-            'metadata': {
-                'author': 'Tyler'
-            },
-            'keywords': 'rag land population',
-            'type': 'context'
-        }
-        response = client.post(f'/api/projects/mindsdb/knowledge_bases/{test_kb_name}/completions',
-                               json=completion_request, follow_redirects=True)
-        assert response.status_code == HTTPStatus.OK
-        response_data = response.get_json()
-        assert 'documents' in response_data
-        # Only 2 have matching metadata.
-        assert len(response_data['documents']) == 2
-        # Should have the most relevant document first.
-        assert 'population' in response_data['documents'][0]['content']
diff --git a/tests/unused/integration/metrics/test_metrics.py b/tests/unused/integration/metrics/test_metrics.py
deleted file mode 100644
index 970b592d834..00000000000
--- a/tests/unused/integration/metrics/test_metrics.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import datetime
-import os
-import pytest
-import requests
-import time
-from typing import Dict
-
-from tests.integration.utils.http_test_helpers import HTTPHelperMixin
-from tests.integration.conftest import HTTP_API_ROOT
-
-
-def _get_metrics():
-    url = HTTP_API_ROOT.rstrip("api") + "metrics"
-    print(f"Getting metrics from {url}")
-    resp = requests.get(url).text
-    print(f"Metrics response: {resp}")
-    return resp
-
-
-def _wait_for_metric(name: str, labels: Dict[str, str], timeout: datetime.timedelta = None):
-    if timeout is None:
-        timeout = datetime.timedelta(seconds=30)
-    start_time = datetime.datetime.now()
-    while datetime.datetime.now() - start_time < timeout:
-        metrics = _get_metrics()
-        for metrics_line in metrics.split('\n'):
-            if name not in metrics_line:
-                continue
-            # Check labels match metric.
-            found = True
-            for label_name, label_value in labels.items():
-                if f'{label_name}="{label_value}"' not in metrics_line:
-                    found = False
-                    break
-            if found:
-                print(f"Found metric: {metrics_line}")
-                metrics_value = metrics_line.split()[-1]
-                return float(metrics_value)
-        time.sleep(0.5)
-    return -1
-
-
-class TestMetrics(HTTPHelperMixin):
-    @pytest.mark.skipif(("localhost" in HTTP_API_ROOT or "127.0.0.1" in HTTP_API_ROOT) and os.getenv('PROMETHEUS_MULTIPROC_DIR') is None, reason="PROMETHEUS_MULTIPROC_DIR environment variable is not set")
-    def test_http_metrics(self):
-        # Make an HTTP request and check for updated metrics.
-        api_metric_labels = {
-            'endpoint': '/util/ping_native',
-            'method': 'GET',
-            'status': '200'
-        }
-
-        before_metric = _wait_for_metric(
-            'mindsdb_rest_api_latency_seconds_count',
-            api_metric_labels,
-        )
-        print(f"Before metric: {before_metric}")
-        _ = self.api_request('get', '/util/ping_native')
-        assert _wait_for_metric(
-            'mindsdb_rest_api_latency_seconds_count',
-            api_metric_labels,
-        ) == before_metric + 1
-        # Check multiproc dir is populated.
-        multiproc_dir = os.getenv('PROMETHEUS_MULTIPROC_DIR')
-        # We can't check this dir if we're running against a remote env.
-        if multiproc_dir is not None:
-            assert os.path.isdir(multiproc_dir)
-            assert len(os.listdir(os.getenv('PROMETHEUS_MULTIPROC_DIR'))) > 0
diff --git a/tests/unused/integration/rag/test_rag_search_kwargs.py b/tests/unused/integration/rag/test_rag_search_kwargs.py
deleted file mode 100644
index 7ee668790fd..00000000000
--- a/tests/unused/integration/rag/test_rag_search_kwargs.py
+++ /dev/null
@@ -1,271 +0,0 @@
-import os
-import uuid
-import pytest
-from unittest.mock import Mock, patch
-from langchain_openai import ChatOpenAI, OpenAIEmbeddings
-from langchain_core.documents import Document
-from langchain.vectorstores.base import VectorStore
-import tempfile
-import shutil
-from langchain_community.vectorstores import Chroma
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-
-from mindsdb.integrations.utilities.rag.settings import (
-    RAGPipelineModel,
-    RetrieverType,
-    SearchKwargs,
-    SearchType,
-    MultiVectorRetrieverMode,
-    DEFAULT_LLM_MODEL,
-    DEFAULT_LLM_ENDPOINT
-)
-from mindsdb.integrations.utilities.rag.pipelines.rag import LangChainRAGPipeline
-
-requires_openai = pytest.mark.skipif(
-    not os.getenv("OPENAI_API_KEY"),
-    reason="OPENAI_API_KEY environment variable not set"
-)
-
-
-@pytest.fixture
-def chat_llm():
-    api_key = os.getenv("OPENAI_API_KEY")
-    if not api_key:
-        pytest.skip("OPENAI_API_KEY environment variable not set")
-    return ChatOpenAI(
-        model=DEFAULT_LLM_MODEL,
-        openai_api_base=DEFAULT_LLM_ENDPOINT,
-        api_key=api_key
-    )
-
-
-@pytest.fixture
-def embeddings():
-    api_key = os.getenv("OPENAI_API_KEY")
-    if not api_key:
-        pytest.skip("OPENAI_API_KEY environment variable not set")
-    return OpenAIEmbeddings(api_key=api_key)
-
-
-class MockVectorStore(VectorStore):
-    def add_texts(self, *args, **kwargs):
-        pass
-
-    def similarity_search(self, *args, **kwargs):
-        pass
-
-    def as_retriever(self, **kwargs):
-        return Mock()
-
-
-@pytest.fixture
-def sample_documents():
-    return [
-        Document(page_content="Test document 1", metadata={"source": "test1"}),
-        Document(page_content="Test document 2", metadata={"source": "test2"})
-    ]
-
-
-@pytest.fixture
-def vector_store_path():
-    temp_dir = tempfile.mkdtemp()
-    yield temp_dir
-    shutil.rmtree(temp_dir)
-
-
-@pytest.fixture
-def vector_store(embeddings, vector_store_path):
-    return Chroma(
-        embedding_function=embeddings,
-        persist_directory=vector_store_path
-    )
-
-
-@pytest.fixture
-def base_config(sample_documents, chat_llm, embeddings, vector_store):
-    return RAGPipelineModel(
-        documents=sample_documents,
-        vector_store=vector_store,
-        embedding_model=embeddings,
-        llm=chat_llm
-    )
-
-
-class TestRAGSearchKwargs:
-    @pytest.fixture(autouse=True)
-    def setup(self, base_config, sample_documents, chat_llm, embeddings, vector_store):
-        """Setup test configuration with fixtures"""
-        self.base_config = base_config
-        self.sample_documents = sample_documents
-        self.chat_llm = chat_llm
-        self.embeddings = embeddings
-        self.vector_store = vector_store
-        self.base_dict = {
-            'documents': self.sample_documents,
-            'vector_store': self.vector_store,
-            'embedding_model': self.embeddings,
-            'llm': self.chat_llm
-        }
-
-    @requires_openai
-    def test_vector_store_retriever_search_kwargs(self):
-        config = RAGPipelineModel(
-            **self.base_dict,
-            search_type=SearchType.SIMILARITY_SCORE_THRESHOLD,
-            search_kwargs=SearchKwargs(
-                k=3,
-                score_threshold=0.5
-            ),
-            retriever_type=RetrieverType.VECTOR_STORE
-        )
-        mock_retriever = Mock()
-        mock_retriever.search_kwargs = {"k": 3, "score_threshold": 0.5}
-        with patch('mindsdb.integrations.utilities.rag.vector_store.VectorStoreOperator') as mock_vs_op:
-            mock_vs_op.return_value.vector_store.as_retriever.return_value = mock_retriever
-            _ = LangChainRAGPipeline.from_retriever(config)
-            assert mock_retriever.search_kwargs == {"k": 3, "score_threshold": 0.5}
-
-    def test_auto_retriever_search_kwargs(self):
-        config = RAGPipelineModel(
-            **self.base_dict,
-            search_type=SearchType.MMR,
-            search_kwargs=SearchKwargs(
-                k=2,
-                fetch_k=4,
-                lambda_mult=0.7
-            ),
-            retriever_type=RetrieverType.AUTO
-        )
-        mock_retriever = Mock()
-        mock_retriever.search_kwargs = {"k": 2, "fetch_k": 4, "lambda_mult": 0.7}
-        mock_llm_response = Mock()
-        mock_llm_response.content = '[{"name": "source", "description": "Source field", "type": "string"}]'
-        with patch('mindsdb.integrations.utilities.rag.retrievers.auto_retriever.AutoRetriever') as MockAutoRetriever, \
-             patch('langchain_openai.chat_models.ChatOpenAI.invoke', return_value=mock_llm_response):
-            mock_auto = Mock()
-            mock_auto.as_runnable.return_value = mock_retriever
-            MockAutoRetriever.return_value = mock_auto
-            _ = LangChainRAGPipeline.from_auto_retriever(config)
-            assert mock_retriever.search_kwargs == {"k": 2, "fetch_k": 4, "lambda_mult": 0.7}
-
-    def test_search_kwargs_validation(self):
-        """Test the validation rules for SearchKwargs"""
-        # Test fetch_k validation for MMR search type
-        with pytest.raises(ValueError, match="fetch_k must be greater than k"):
-            RAGPipelineModel(
-                **self.base_dict,
-                search_type=SearchType.MMR,
-                search_kwargs=SearchKwargs(
-                    k=5,
-                    fetch_k=3,
-                    lambda_mult=0.7
-                )
-            )
-
-        # Test MMR parameter requirements
-        with pytest.raises(ValueError, match="lambda_mult is required when using fetch_k"):
-            RAGPipelineModel(
-                **self.base_dict,
-                search_type=SearchType.MMR,
-                search_kwargs=SearchKwargs(
-                    k=3,
-                    fetch_k=5
-                )
-            )
-
-        with pytest.raises(ValueError, match="fetch_k is required when using lambda_mult"):
-            RAGPipelineModel(
-                **self.base_dict,
-                search_type=SearchType.MMR,
-                search_kwargs=SearchKwargs(
-                    k=3,
-                    lambda_mult=0.7
-                )
-            )
-
-        # Test score_threshold requirement for SIMILARITY_SCORE_THRESHOLD
-        with pytest.raises(ValueError, match="score_threshold is required"):
-            RAGPipelineModel(
-                **self.base_dict,
-                search_type=SearchType.SIMILARITY_SCORE_THRESHOLD,
-                search_kwargs=SearchKwargs(
-                    k=3
-                )
-            )
-
-    def test_search_type_compatibility(self):
-        """Test that search kwargs match the search type"""
-        # Test MMR search configuration
-        config = RAGPipelineModel(
-            **self.base_dict,
-            search_type=SearchType.MMR,
-            search_kwargs=SearchKwargs(
-                k=3,
-                fetch_k=6,
-                lambda_mult=0.7
-            )
-        )
-        assert config.search_kwargs.fetch_k == 6
-        assert config.search_kwargs.lambda_mult == 0.7
-
-        # Test similarity_score_threshold configuration
-        config = RAGPipelineModel(
-            **self.base_dict,
-            search_type=SearchType.SIMILARITY_SCORE_THRESHOLD,
-            search_kwargs=SearchKwargs(
-                k=3,
-                score_threshold=0.5
-            )
-        )
-        assert config.search_kwargs.score_threshold == 0.5
-
-        # Test basic similarity configuration
-        config = RAGPipelineModel(
-            **self.base_dict,
-            search_type=SearchType.SIMILARITY,
-            search_kwargs=SearchKwargs(
-                k=3,
-                filter={"source": "test1"}
-            )
-        )
-        assert config.search_kwargs.filter == {"source": "test1"}
-
-    def test_multi_vector_retriever_search_kwargs(self):
-        """Test search kwargs for multi vector retriever"""
-        config = RAGPipelineModel(
-            **self.base_dict,
-            search_type=SearchType.SIMILARITY,
-            search_kwargs=SearchKwargs(
-                k=5,
-                filter={"source": "test1"}
-            ),
-            retriever_type=RetrieverType.MULTI,
-            multi_retriever_mode=MultiVectorRetrieverMode.BOTH
-        )
-
-        mock_retriever = Mock()
-        mock_retriever.search_kwargs = {"k": 5, "filter": {"source": "test1"}}
-
-        with patch('mindsdb.integrations.utilities.rag.pipelines.rag.MultiVectorRetriever') as MockMultiRetrieverClass:
-            class MockMultiRetriever:
-                def __init__(self, config):
-                    self.text_splitter = RecursiveCharacterTextSplitter(
-                        chunk_size=config.chunk_size,
-                        chunk_overlap=config.chunk_overlap
-                    )
-                    self.documents = config.documents
-                    self.config = config
-
-                def as_runnable(self):
-                    return mock_retriever
-
-                def _split_documents(self):
-                    return [], []
-
-                def _generate_id_and_split_document(self, doc):
-                    return str(uuid.uuid4()), [doc]
-
-            MockMultiRetrieverClass.side_effect = MockMultiRetriever
-
-            _ = LangChainRAGPipeline.from_multi_vector_retriever(config)
-            assert mock_retriever.search_kwargs == {"k": 5, "filter": {"source": "test1"}}
diff --git a/tests/unused/integrations/utilities/rag/rerankers/test_openai_reranker.py b/tests/unused/integrations/utilities/rag/rerankers/test_openai_reranker.py
deleted file mode 100644
index 7b0c5cafa91..00000000000
--- a/tests/unused/integrations/utilities/rag/rerankers/test_openai_reranker.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from langchain.schema import Document
-import pytest
-
-from mindsdb.integrations.utilities.rag.rerankers.reranker_compressor import LLMReranker
-from mindsdb.integrations.utilities.rag.settings import RerankerConfig
-
-
-@pytest.mark.asyncio
-async def test_openai_reranker():
-    openai_reranker = LLMReranker()
-    results = await openai_reranker.compress_documents(
-        documents=[Document(page_content="Jack declared that he likes cats more than dogs"),
-                   Document(page_content="Jack declared that he likes AI")],
-        query="Jack's opinion on animals",
-    )
-    assert len(results) == 1
-    assert "cats" in results[0].page_content
-
-
-@pytest.mark.asyncio
-async def test_openai_reranker_diff_threshold():
-    openai_reranker = LLMReranker(filtering_threshold=0.6)
-    assert openai_reranker.filtering_threshold == 0.6
-    results = await openai_reranker.compress_documents(
-        documents=[Document(page_content="Jack declared that he likes cats more than dogs"),
-                   Document(page_content="Jack declared that he likes AI")],
-        query="Jack's opinion on animals",
-    )
-    assert len(results) == 1
-    assert "cats" in results[0].page_content
-    assert openai_reranker.filtering_threshold == 0.6
-
-
-@pytest.mark.asyncio
-async def test_openai_reranker_config():
-    config = RerankerConfig(filtering_threshold=0.6, model="gpt-3.5-turbo", base_url="https://api.openai.com/v1")
-    openai_reranker = LLMReranker(filtering_threshold=config.filtering_threshold, model=config.model,
-                                  base_url=config.base_url)
-    assert openai_reranker.filtering_threshold == 0.6
-    results = await openai_reranker.compress_documents(
-        documents=[Document(page_content="Jack declared that he likes cats more than dogs"),
-                   Document(page_content="Jack declared that he likes AI")],
-        query="Jack's opinion on animals",
-    )
-    assert len(results) == 1
-    assert "cats" in results[0].page_content
-    assert openai_reranker.filtering_threshold == 0.6
diff --git a/tests/unused/integrations/utilities/rag/retrievers/test_multi_hop_retriever.py b/tests/unused/integrations/utilities/rag/retrievers/test_multi_hop_retriever.py
deleted file mode 100644
index 9cbb199f966..00000000000
--- a/tests/unused/integrations/utilities/rag/retrievers/test_multi_hop_retriever.py
+++ /dev/null
@@ -1,127 +0,0 @@
-from typing import List, Any, Optional
-
-import pytest
-from langchain_core.documents import Document
-from langchain_core.retrievers import BaseRetriever
-from langchain_core.language_models import BaseChatModel
-from langchain_core.messages import BaseMessage
-
-from mindsdb.integrations.utilities.rag.retrievers import MultiHopRetriever
-
-
-# Simple template for testing
-TEST_TEMPLATE = """Question: {question}
-Context: {context}
-Generate follow-up questions:"""
-
-
-class MockRetriever(BaseRetriever):
-    """Simple mock retriever that returns predefined documents"""
-    def _get_relevant_documents(self, query: str, **kwargs) -> List[Document]:
-        if "Wright brothers" in query:
-            return [Document(page_content="The Wright brothers invented the airplane.")]
-        if "World War 1" in query:
-            return [Document(page_content="Airplanes were used extensively in WWI.")]
-        return []
-
-
-class MockLLM(BaseChatModel):
-    """Simple mock LLM that returns predefined responses"""
-    @property
-    def _llm_type(self) -> str:
-        return "mock"
-
-    def _generate(self, messages: List[BaseMessage], stop: Optional[List[str]] = None, run_manager: Optional[Any] = None, **kwargs) -> Any:
-        raise NotImplementedError("Not needed for tests")
-
-    def invoke(self, input_str: str, **kwargs) -> str:
-        if "Wright brothers" in str(input_str):
-            return '["How were airplanes used in World War 1?"]'
-        return "[]"
-
-
-class InvalidOutputLLM(BaseChatModel):
-    """Mock LLM that always returns invalid JSON"""
-    @property
-    def _llm_type(self) -> str:
-        return "mock"
-
-    def _generate(self, messages: List[BaseMessage], stop: Optional[List[str]] = None, run_manager: Optional[Any] = None, **kwargs) -> Any:
-        raise NotImplementedError("Not needed for tests")
-
-    def invoke(self, input_str: str, **kwargs) -> str:
-        return "invalid json"
-
-
-@pytest.fixture
-def mock_retriever():
-    return MockRetriever()
-
-
-@pytest.fixture
-def mock_llm():
-    return MockLLM()
-
-
-def test_multi_hop_retriever_basic_functionality(mock_retriever, mock_llm):
-    """Test the basic functionality of MultiHopRetriever"""
-    retriever = MultiHopRetriever(
-        base_retriever=mock_retriever,
-        llm=mock_llm,
-        max_hops=2,
-        reformulation_template=TEST_TEMPLATE
-    )
-
-    # Test with a query that should trigger follow-up
-    docs = retriever._get_relevant_documents("Tell me about the Wright brothers")
-
-    # Should have documents from both queries
-    assert len(docs) == 2
-    assert any("Wright brothers" in doc.page_content for doc in docs)
-    assert any("WWI" in doc.page_content for doc in docs)
-
-
-def test_multi_hop_retriever_no_results(mock_retriever, mock_llm):
-    """Test behavior when no documents are found"""
-    retriever = MultiHopRetriever(
-        base_retriever=mock_retriever,
-        llm=mock_llm,
-        max_hops=2,
-        reformulation_template=TEST_TEMPLATE
-    )
-
-    # Test with a query that won't find any documents
-    docs = retriever._get_relevant_documents("Something unrelated")
-
-    # Should have no documents
-    assert len(docs) == 0
-
-
-def test_multi_hop_retriever_invalid_llm_output(mock_retriever):
-    """Test handling of invalid LLM output"""
-    retriever = MultiHopRetriever(
-        base_retriever=mock_retriever,
-        llm=InvalidOutputLLM(),
-        max_hops=2,
-        reformulation_template=TEST_TEMPLATE
-    )
-
-    # Should still work and return initial results
-    docs = retriever._get_relevant_documents("Tell me about the Wright brothers")
-    assert len(docs) == 1
-    assert "Wright brothers" in docs[0].page_content
-
-
-def test_multi_hop_retriever_max_hops(mock_retriever, mock_llm):
-    """Test that max_hops is respected"""
-    retriever = MultiHopRetriever(
-        base_retriever=mock_retriever,
-        llm=mock_llm,
-        max_hops=1,  # Only allow 1 hop
-        reformulation_template=TEST_TEMPLATE
-    )
-
-    # Should only get initial documents
-    docs = retriever._get_relevant_documents("Tell me about the Wright brothers")
-    assert len(docs) == 1
-    assert "Wright brothers" in docs[0].page_content
diff --git a/tests/unused/integrations/utilities/rag/test_file_loader.py b/tests/unused/integrations/utilities/rag/test_file_loader.py
deleted file mode 100644
index 092ca0936f2..00000000000
--- a/tests/unused/integrations/utilities/rag/test_file_loader.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from mindsdb.integrations.utilities.rag.loaders.file_loader import FileLoader
-
-
-def test_load_pdf():
-    loader = FileLoader('./tests/data/test.pdf')
-    docs = loader.load()
-    # Each page is a doc.
-    assert len(docs) == 3
-    assert 'THE CASE FOR MACHINE LEARNING' in docs[0].page_content
-    assert 'INTRODUCTION' in docs[1].page_content
-    assert 'THE CASE FOR \nDEMOCRATIZING \nMACHINE LEARNING' in docs[2].page_content
-
-
-def test_load_csv():
-    loader = FileLoader('./tests/data/movies.csv')
-    docs = loader.load()
-    # Each row is a doc.
-    assert len(docs) == 10
-    assert 'Toy Story' in docs[0].page_content
-    assert 'GoldenEye' in docs[9].page_content
-
-
-def test_load_html():
-    loader = FileLoader('./tests/data/test.html')
-    docs = loader.load()
-    assert len(docs) == 1
-    assert 'Some intro text about Foo' in docs[0].page_content
-
-
-def test_load_md():
-    loader = FileLoader('./mindsdb/integrations/handlers/langchain_handler/README.md')
-    docs = loader.load()
-    assert len(docs) == 1
-    assert 'This documentation describes the integration of MindsDB with LangChain' in docs[0].page_content
-
-
-def test_load_text():
-    loader = FileLoader('./tests/data/test.txt')
-    docs = loader.load()
-    assert len(docs) == 1
-    assert 'This is a test plaintext file' in docs[0].page_content
diff --git a/tests/unused/integrations/utilities/rag/test_file_splitter.py b/tests/unused/integrations/utilities/rag/test_file_splitter.py
deleted file mode 100644
index ce757284b4b..00000000000
--- a/tests/unused/integrations/utilities/rag/test_file_splitter.py
+++ /dev/null
@@ -1,164 +0,0 @@
-from unittest.mock import patch
-
-from langchain_core.documents import Document
-from langchain_text_splitters import MarkdownHeaderTextSplitter, HTMLHeaderTextSplitter, RecursiveCharacterTextSplitter
-from mindsdb.integrations.utilities.rag.splitters.file_splitter import FileSplitter, FileSplitterConfig
-
-
-def test_split_documents_pdf():
-    pdf_doc = Document(
-        page_content='This is a test PDF file. Let us try to do some splitting!',
-        metadata={'extension': '.pdf'}
-    )
-    recursive_splitter = RecursiveCharacterTextSplitter()
-    file_splitter = FileSplitter(FileSplitterConfig(
-        recursive_splitter=recursive_splitter
-    ))
-    split_pdf_docs = file_splitter.split_documents([pdf_doc])
-    assert len(split_pdf_docs) > 0
-
-
-def test_split_documents_md():
-    md_content = '''
-    # Unit Testing for Dummies
-    This MD document covers how to write basic unit tests.
-    ## Introduction
-    Unit testing helps ensure code works as expected and prevents regressions. Time to dive in!
-    ## How to Write Tests
-    To be continued!
-'''
-    md_doc = Document(
-        page_content=md_content,
-        metadata={'extension': '.md'}
-    )
-    headers_to_split_on = [
-        ('#', 'Header 1'),
-        ('##', 'Header 2'),
-    ]
-    md_text_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
-    file_splitter = FileSplitter(FileSplitterConfig(
-        markdown_splitter=md_text_splitter
-    ))
-    split_md_docs = file_splitter.split_documents([md_doc])
-    assert len(split_md_docs) == 3
-    # Check we actually split on headers.
-    assert 'This MD document covers how to write basic unit tests.' in split_md_docs[0].page_content
-    assert 'Unit testing helps ensure code works as expected and prevents regressions. Time to dive in!' in split_md_docs[1].page_content
-    assert 'To be continued!' in split_md_docs[2].page_content
-
-
-def test_split_documents_html():
-    html_content = '''
-
-
-
-    
-

Foo

-

Some intro text about Foo.

-
-

Bar main section

-

Some intro text about Bar.

-

Bar subsection 1

-

Some text about the first subtopic of Bar.

-

Bar subsection 2

-

Some text about the second subtopic of Bar.

-
-
-

Baz

-

Some text about Baz

-
-
-

Some concluding text about Foo

-
- - -''' - headers_to_split_on = [ - ('h1', 'Header 1'), - ('h2', 'Header 2'), - ('h3', 'Header 3') - ] - html_text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) - file_splitter = FileSplitter(FileSplitterConfig( - html_splitter=html_text_splitter - )) - html_doc = Document( - page_content=html_content, - metadata={'extension': '.html'} - ) - split_html_docs = file_splitter.split_documents([html_doc]) - assert len(split_html_docs) == 8 - # # Check we actually split on headers. - assert 'Foo' in split_html_docs[0].page_content - assert 'Some intro text about Foo' in split_html_docs[1].page_content - assert 'Some intro text about Bar' in split_html_docs[2].page_content - assert 'Some text about the first subtopic of Bar' in split_html_docs[3].page_content - assert 'Some text about the second subtopic of Bar' in split_html_docs[4].page_content - assert 'Baz' in split_html_docs[5].page_content - assert 'Some text about Baz' in split_html_docs[6].page_content - assert 'Some concluding text about Foo' in split_html_docs[7].page_content - - -def test_split_documents_default(): - recursive_splitter = RecursiveCharacterTextSplitter() - file_splitter = FileSplitter(FileSplitterConfig( - recursive_splitter=recursive_splitter - )) - txt_doc = Document( - page_content='This is a text file!', - metadata={'extension': '.txt'} - ) - split_txt_docs = file_splitter.split_documents([txt_doc]) - assert len(split_txt_docs) == 1 - assert 'This is a text file!' in split_txt_docs[0].page_content - - -@patch('mindsdb.integrations.utilities.rag.splitters.file_splitter.MarkdownHeaderTextSplitter') -def test_split_documents_failover(mock_md_splitter): - md_content = ''' - # Unit Testing for Dummies - This MD document covers how to write basic unit tests. - ## Introduction - Unit testing helps ensure code works as expected and prevents regressions. Time to dive in! - ## How to Write Tests - To be continued! -''' - mock_md_splitter.split_text.side_effect = Exception('Something went wrong!') - file_splitter = FileSplitter(FileSplitterConfig( - markdown_splitter=mock_md_splitter - )) - md_doc = Document( - page_content=md_content, - metadata={'extension': '.md'} - ) - - # Should throw an exception and go to default. - split_md_docs = file_splitter.split_documents([md_doc]) - assert len(split_md_docs) > 0 - - -@patch('mindsdb.integrations.utilities.rag.splitters.file_splitter.MarkdownHeaderTextSplitter') -def test_split_documents_no_failover(mock_md_splitter): - md_content = ''' - # Unit Testing for Dummies - This MD document covers how to write basic unit tests. - ## Introduction - Unit testing helps ensure code works as expected and prevents regressions. Time to dive in! - ## How to Write Tests - To be continued! -''' - mock_md_splitter.split_text.side_effect = Exception('Something went wrong!') - file_splitter = FileSplitter(FileSplitterConfig( - markdown_splitter=mock_md_splitter - )) - md_doc = Document( - page_content=md_content, - metadata={'extension': '.md'} - ) - - # Should throw an exception. - try: - _ = file_splitter.split_documents([md_doc], default_failover=False) - except Exception: - return - assert False diff --git a/tests/unused/interfaces/agents/test_agents_controller.py b/tests/unused/interfaces/agents/test_agents_controller.py deleted file mode 100644 index ba5c4b41028..00000000000 --- a/tests/unused/interfaces/agents/test_agents_controller.py +++ /dev/null @@ -1,43 +0,0 @@ -from unittest.mock import patch - -import pandas as pd - -from mindsdb.interfaces.storage import db -from mindsdb.interfaces.agents.agents_controller import AgentsController - - -@patch('mindsdb.api.executor.datahub.datanodes.project_datanode.ProjectDataNode') -@patch('mindsdb.api.executor.datahub.datahub.InformationSchemaDataNode') -@patch('mindsdb.interfaces.model.model_controller.ModelController') -def test_get_completion(mock_model_controller, mock_schema_datanode, mock_project_datanode): - mock_project_datanode_instance = mock_project_datanode.return_value - mock_datanode_instance = mock_schema_datanode.return_value - mock_datanode_instance.get.return_value = mock_project_datanode_instance - mock_model_controller_instance = mock_model_controller.return_value - mock_model_controller_instance.get_model.return_value = { - 'model_name': 'test_model', - 'predict': 'answer', - 'problem_definition': { - 'using': - { - 'prompt_template': 'What is the meaning of life?' - } - } - } - agents_controller = AgentsController( - mock_datanode_instance, - model_controller=mock_model_controller_instance) - - completion_response = {'answer': '42'} - df = pd.DataFrame.from_records([completion_response]) - mock_project_datanode_instance.predict.return_value = df - agent = db.Agents() - agent.model_name = 'test_model' - agent.provider = 'mindsdb' - agent.params = {} - messages = [{'question': 'What is the meaning of life?', 'answer': None}] - completion_df = agents_controller.get_completion(agent, messages) - - assert not completion_df.empty - assert 'answer' in completion_df.columns - assert completion_df['answer'].loc[0] == '42' diff --git a/tests/unused/load/__init__.py b/tests/unused/load/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/unused/load/tasks.py b/tests/unused/load/tasks.py deleted file mode 100644 index 9a4e364e9c2..00000000000 --- a/tests/unused/load/tasks.py +++ /dev/null @@ -1,49 +0,0 @@ -from locust import SequentialTaskSet, task, events -from tests.utils.query_generator import QueryGenerator as query -from tests.utils.config import get_value_from_json_env_var, generate_random_db_name - -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class BaseDBConnectionBehavior(SequentialTaskSet): - def on_start(self): - """This method is called once for each user when they start.""" - self.query_generator = query() - self.random_db_name = generate_random_db_name(f"{self.db_type}_datasource") - self.create_new_datasource() - - def __post_query(self, query): - try: - response = self.client.post('/api/sql/query', json={'query': query}) - response.raise_for_status() - assert response.json()['type'] != 'error' - return response - except Exception as e: - logger.error(f'Error running {query}: {e}') - events.request.fire(request_type="POST", name="/api/sql/query", response_time=0, response_length=0, exception=e) - self.interrupt(reschedule=True) - - def create_new_datasource(self): - """This method creates a new data source.""" - db_config = get_value_from_json_env_var("INTEGRATIONS_CONFIG", self.db_type) - query = self.query_generator.create_database_query( - self.random_db_name, - self.db_type, - db_config - ) - self.__post_query(query) - - @task - def select_integration_query(self): - """This task performs a SELECT query from integration.""" - query = f'SELECT * FROM {self.random_db_name}.{self.table_name} LIMIT 10' - self.__post_query(query) - - @task - def run_native_query(self): - """This task runs a native DB select query.""" - for n_query in self.native_queries: - query = f'SELECT * FROM {self.random_db_name}( {n_query})' - self.__post_query(query) diff --git a/tests/unused/load/test_postgresql.py b/tests/unused/load/test_postgresql.py deleted file mode 100644 index d7e137ab5c8..00000000000 --- a/tests/unused/load/test_postgresql.py +++ /dev/null @@ -1,15 +0,0 @@ -from tests.load.tasks import BaseDBConnectionBehavior - - -class PostgreSQLConnectionBehavior(BaseDBConnectionBehavior): - """ - This class defines the behavior of a PostgreSQL connection. - @TODO: Read query values from sql_queries.json file - """ - db_type = "postgres" - table_name = "solar_flare_data" - native_queries = ["native_query_average", "native_query_aggregation", "native_query_max", "native_query_grouping"] - native_query_aggregation = f"SELECT COUNT(*) AS total_flares FROM tests.{table_name};" - native_query_average = f"SELECT AVG(peak_c_per_s) AS avg_peak_counts FROM tests.{table_name};" - native_query_max = f"SELECT MAX(energy_kev) AS max_energy FROM tests.{table_name};" - native_query_grouping = f"SELECT active_region_ar, COUNT(*) AS flare_count FROM tests.{table_name} GROUP BY active_region_ar;" diff --git a/tests/unused/load/tests_start.py b/tests/unused/load/tests_start.py deleted file mode 100644 index 3eaed9bf111..00000000000 --- a/tests/unused/load/tests_start.py +++ /dev/null @@ -1,25 +0,0 @@ -from locust import between, HttpUser -from tests.load.test_postgresql import PostgreSQLConnectionBehavior -from tests.utils.config import get_value_from_json_env_var - -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class DBConnectionUser(HttpUser): - tasks = [PostgreSQLConnectionBehavior] - wait_time = between(5, 15) - config = get_value_from_json_env_var("INTEGRATIONS_CONFIG", "mindsdb_cloud") - host = config['host'] - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - try: - response = self.client.post('/cloud/login', json={ - 'email': self.config['user'], - 'password': self.config['password'] - }) - response.raise_for_status() - except Exception as e: - logger.error(f'Logging to MindsDB failed: {e}') diff --git a/tests/unused/unit/api/http/knowledge_bases_test.py b/tests/unused/unit/api/http/knowledge_bases_test.py deleted file mode 100644 index 0e0f65b1fd7..00000000000 --- a/tests/unused/unit/api/http/knowledge_bases_test.py +++ /dev/null @@ -1,1049 +0,0 @@ -from http import HTTPStatus - -import pytest -from unittest.mock import patch, MagicMock - -from mindsdb.api.mysql.mysql_proxy.mysql_proxy import SQLAnswer -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -def test_prepare(client): - # Create langchain embedding model to use in all tests. - create_ml_engine_query = 'CREATE ML_ENGINE langchain_embedding FROM langchain_embedding;' - create_ml_engine_data = { - 'query': create_ml_engine_query - } - response = client.post('/api/sql/query', json=create_ml_engine_data, follow_redirects=True) - assert response.status_code == HTTPStatus.OK - - # Create model to use in all tests. - create_query = ''' - CREATE MODEL mindsdb.test_embedding_model - PREDICT embeddings - USING - engine='langchain_embedding', - class = 'FakeEmbeddings', - size = 512, - input_columns = ['content']; - ''' - train_data = { - 'query': create_query - } - response = client.post('/api/projects/mindsdb/models', json=train_data, follow_redirects=True) - assert response.status_code == HTTPStatus.CREATED - - -def test_get_knowledge_base(client): - get_knowledge_bases_response = client.get('/api/projects/mindsdb/knowledge_bases') - - assert len(get_knowledge_bases_response.get_json()) == 0 - - create_request = { - 'knowledge_base': { - 'name': 'test_get_kb', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - get_knowledge_bases_response = client.get('/api/projects/mindsdb/knowledge_bases') - assert get_knowledge_bases_response.status_code == HTTPStatus.OK - - all_knowledge_bases = get_knowledge_bases_response.get_json() - assert len(all_knowledge_bases) == 1 - - get_knowledge_base_response = client.get('/api/projects/mindsdb/knowledge_bases/test_get_kb') - assert get_knowledge_base_response.status_code == HTTPStatus.OK - - created_knowledge_base = create_response.get_json() - test_get_kb = get_knowledge_base_response.get_json() - expected_create_knowledge_base = { - 'name': 'test_get_kb', - 'embedding_model': 'test_embedding_model', - 'vector_database': 'test_get_kb_chromadb', - 'vector_database_table': 'default_collection', - 'id': created_knowledge_base['id'], - 'project_id': created_knowledge_base['project_id'], - 'params': created_knowledge_base['params'], - 'created_at': created_knowledge_base['created_at'], - 'updated_at': created_knowledge_base['updated_at'] - } - assert created_knowledge_base == expected_create_knowledge_base - assert test_get_kb == expected_create_knowledge_base - - # Returned fields are slightly different for GET all vs POST. - fetched_knowledge_base = all_knowledge_bases[0] - expected_get_knowledge_base = { - 'name': 'test_get_kb', - 'embedding_model': 'test_embedding_model', - 'vector_database': 'test_get_kb_chromadb', - 'vector_database_table': 'default_collection', - 'id': created_knowledge_base['id'], - 'project_name': 'mindsdb', - 'project_id': created_knowledge_base['project_id'], - 'params': created_knowledge_base['params'] - } - assert fetched_knowledge_base == expected_get_knowledge_base - - -def test_create_knowledge_base_no_storage(client): - create_request = { - 'knowledge_base': { - 'name': 'test_kb', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - created_knowledge_base = create_response.get_json() - expected_knowledge_base = { - 'name': 'test_kb', - 'embedding_model': 'test_embedding_model', - 'vector_database': 'test_kb_chromadb', - 'vector_database_table': 'default_collection', - 'id': created_knowledge_base['id'], - 'project_id': created_knowledge_base['project_id'], - 'params': created_knowledge_base['params'], - 'created_at': created_knowledge_base['created_at'], - 'updated_at': created_knowledge_base['updated_at'] - } - assert created_knowledge_base == expected_knowledge_base - - -def test_create_knowledge_base_no_knowledge_base_param(client): - create_request = { - 'name': 'test_kb', - 'model': 'test_embedding_model' - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.BAD_REQUEST - - -def test_create_knowledge_base_no_name(client): - create_request = { - 'knowledge_base': { - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.BAD_REQUEST - - -def test_create_knowledge_base_no_model(client): - create_request = { - 'knowledge_base': { - 'name': 'test_kb' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.BAD_REQUEST - - -def test_create_knowledge_base_no_storage_database(client): - create_request = { - 'knowledge_base': { - 'name': 'test_kb', - 'model': 'test_embedding_model', - 'storage': { - 'table': 'vector_db_table' - } - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.BAD_REQUEST - - -def test_create_knowledge_base_no_storage_table(client): - create_request = { - 'knowledge_base': { - 'name': 'test_kb', - 'model': 'test_embedding_model', - 'storage': { - 'database': 'vector_db' - } - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.BAD_REQUEST - - -def test_create_knowledge_base_project_not_found(client): - create_request = { - 'knowledge_base': { - 'name': 'test_kb', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/buoluobao/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.NOT_FOUND - - -def test_create_knowledge_base_already_exists(client): - create_request = { - 'knowledge_base': { - 'name': 'test_kb_already_exists', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CONFLICT - - -def test_delete_knowledge_base(client): - create_request = { - 'knowledge_base': { - 'name': 'test_delete_kb', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - delete_response = client.delete('/api/projects/mindsdb/knowledge_bases/test_delete_kb', follow_redirects=True) - assert delete_response.status_code == HTTPStatus.NO_CONTENT - - get_response = client.get('/api/projects/mindsdb/knowledge_bases/test_delete_kb', follow_redirects=True) - assert get_response.status_code == HTTPStatus.NOT_FOUND - - -def test_delete_knowledge_base_project_not_found(client): - delete_response = client.delete('/api/projects/chasiubao/knowledge_bases/test_kb', follow_redirects=True) - assert delete_response.status_code == HTTPStatus.NOT_FOUND - - -def test_delete_knowledge_base_not_found(client): - delete_response = client.delete('/api/projects/mindsdb/knowledge_bases/xiaolongbao_kb', follow_redirects=True) - assert delete_response.status_code == HTTPStatus.NOT_FOUND - - -def test_put_knowledge_base_rows(client): - create_request = { - 'knowledge_base': { - 'name': 'test_kb_update_rows', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - content_to_embed = '''To begin with a perfect Peking duck recipe at home, firstly choose head on (easy to hang for air drying out), clean and leaner ducks. - Add around 1 teaspoon of white vinegar in clean water and soak the duck for 1 hour. Then prepare lines and tie the ducks from the top of the neck. - Hang them on hooks. I hang the ducks on the top of kitchen pool. - Please note: I make this peking duck in March when the room temperature is around 13-15 degree C, you will need to hang the duck in fridge or in a room with air conditioner in hot summer days. - ''' - - test_id = 0 - rows_to_insert = [ - {'id': test_id, 'content': content_to_embed} - ] - update_request = { - 'knowledge_base': { - 'rows': rows_to_insert - } - } - - with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table: - # Create a mock KB instance - mock_table_instance = MagicMock() - mock_kb_table.return_value = mock_table_instance - - # Setup the _kb attribute with required params - mock_kb = MagicMock() - mock_kb.params = {'preprocessing': None} # Initial state - mock_table_instance._kb = mock_kb - - update_response = client.put( - '/api/projects/mindsdb/knowledge_bases/test_kb_update_rows', - json=update_request, - follow_redirects=True - ) - - assert update_response.status_code == HTTPStatus.OK - - # Verify insert_rows was called with correct data - mock_table_instance.insert_rows.assert_called_once() - actual_rows = mock_table_instance.insert_rows.call_args[0][0] - assert len(actual_rows) == 1 - assert actual_rows[0]['id'] == test_id - assert actual_rows[0]['content'] == content_to_embed - - -def test_put_knowledge_base_query(client): - create_request = { - 'knowledge_base': { - 'name': 'test_kb_update_query', - 'model': 'test_embedding_model' - } - } - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - content_to_embed = '''To begin with a perfect Peking duck recipe at home, firstly choose head on (easy to hang for air drying out), clean and leaner ducks. - Add around 1 teaspoon of white vinegar in clean water and soak the duck for 1 hour. Then prepare lines and tie the ducks from the top of the neck. - Hang them on hooks. I hang the ducks on the top of kitchen pool. - Please note: I make this peking duck in March when the room temperature is around 13-15 degree C, you will need to hang the duck in fridge or in a room with air conditioner in hot summer days. - ''' - update_request = { - 'knowledge_base': { - 'query': 'SELECT * FROM mock_db.recipes' - } - } - - # Mock the FakeMysqlProxy - mock_proxy = MagicMock() - mock_proxy.process_query.return_value = SQLAnswer( - resp_type=RESPONSE_TYPE.TABLE, - columns=[{'alias': 'id'}, {'name': 'content'}], - data=[(0, content_to_embed)] - ) - - with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table: - mock_table_instance = MagicMock() - mock_kb_table.return_value = mock_table_instance - - # Setup the mock table - mock_kb = MagicMock() - mock_kb.params = {'preprocessing': None} - mock_table_instance._kb = mock_kb - mock_table_instance.mysql_proxy = mock_proxy - - update_response = client.put( - '/api/projects/mindsdb/knowledge_bases/test_kb_update_query', - json=update_request, - follow_redirects=True - ) - - assert update_response.status_code == HTTPStatus.OK - - # Verify insert was called with correct data - mock_table_instance.insert_query_result.assert_called_once_with( - 'SELECT * FROM mock_db.recipes', - 'mindsdb' - ) - - -@pytest.fixture -def create_test_kb(client): - kb_name = 'test_completions_kb' - - # First, try to delete the existing knowledge base if it exists - client.delete(f'/api/projects/mindsdb/knowledge_bases/{kb_name}', follow_redirects=True) - create_kb_request = { - 'knowledge_base': { - 'name': kb_name, - 'model': 'test_embedding_model' - } - } - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_kb_request, - follow_redirects=True) - - # Check if creation was successful - assert create_response.status_code in [ - HTTPStatus.CREATED, - HTTPStatus.OK - ], f"Failed to create knowledge base. Status: {create_response.status}" - - return kb_name - - -def test_successful_completion(client, create_test_kb): - kb_name = create_test_kb - completion_request = { - 'query': 'What is the capital of France?', - } - response = client.post(f'/api/projects/mindsdb/knowledge_bases/{kb_name}/completions', - json=completion_request, follow_redirects=True) - assert response.status_code == HTTPStatus.OK - response_data = response.get_json() - assert 'message' in response_data - assert 'content' in response_data['message'] - assert 'context' in response_data['message'] - assert response_data['message']['role'] == 'assistant' - - -def test_completion_missing_query_parameter(client, create_test_kb): - kb_name = create_test_kb - invalid_request = { - 'knowledge_base': kb_name - } - response = client.post(f'/api/projects/mindsdb/knowledge_bases/{kb_name}/completions', - json=invalid_request, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_completion_non_existent_project(client, create_test_kb): - kb_name = create_test_kb - completion_request = { - 'query': 'What is the capital of France?', - } - response = client.post(f'/api/projects/nonexistent/knowledge_bases/{kb_name}/completions', - json=completion_request, follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_completion_non_existent_knowledge_base(client): - completion_request = { - 'query': 'What is the capital of France?', - } - response = client.post('/api/projects/mindsdb/knowledge_bases/nonexistent_kb/completions', - json=completion_request, follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_create_knowledge_base_with_preprocessing(client): - """Test creating a knowledge base with preprocessing configuration""" - create_request = { - 'knowledge_base': { - 'name': 'test_kb_preprocess', - 'model': 'test_embedding_model', - 'preprocessing': { - 'type': 'contextual', - 'contextual_config': { - 'chunk_size': 1000, - 'chunk_overlap': 200, - 'llm_model': 'gpt-4' - } - } - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - created_knowledge_base = create_response.get_json() - assert 'preprocessing' in created_knowledge_base['params'] - assert created_knowledge_base['params']['preprocessing']['type'] == 'contextual' - - # Verify preprocessing config is preserved in GET - get_response = client.get('/api/projects/mindsdb/knowledge_bases/test_kb_preprocess') - assert get_response.status_code == HTTPStatus.OK - kb_data = get_response.get_json() - assert 'preprocessing' in kb_data['params'] - assert kb_data['params']['preprocessing'] == create_request['knowledge_base']['preprocessing'] - - -def test_create_knowledge_base_invalid_preprocessing(client): - """Test creating a knowledge base with invalid preprocessing configuration""" - create_request = { - 'knowledge_base': { - 'name': 'test_kb_invalid_preprocess', - 'model': 'test_embedding_model', - 'preprocessing': { - 'type': 'invalid_type', - 'contextual_config': { - 'chunk_size': 'invalid' - } - } - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.BAD_REQUEST - - -def test_put_knowledge_base_with_preprocessing(client): - """Test updating knowledge base with preprocessing""" - # First create a KB - create_request = { - 'knowledge_base': { - 'name': 'test_kb_update_preprocess', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - # Update with content and preprocessing - content_to_embed = [ - "First document to be processed and chunked.", - "Second document with different content for testing.", - ] - rows_to_insert = [ - {'content': content_to_embed[0]}, - {'content': content_to_embed[1]} - ] - - update_request = { - 'knowledge_base': { - 'rows': rows_to_insert, - 'preprocessing': { - 'type': 'contextual', - 'contextual_config': { - 'chunk_size': 500, - 'chunk_overlap': 100, - 'llm_model': 'gpt-4' - } - } - } - } - - with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table: - # Create a mock KB instance - mock_table_instance = MagicMock() - mock_kb_table.return_value = mock_table_instance - - # Setup the _kb attribute with required params - mock_kb = MagicMock() - mock_kb.params = {'preprocessing': None} # Initial state - mock_table_instance._kb = mock_kb - - update_response = client.put( - '/api/projects/mindsdb/knowledge_bases/test_kb_update_preprocess', - json=update_request, - follow_redirects=True - ) - - assert update_response.status_code == HTTPStatus.OK - - # Verify preprocessing was configured - mock_table_instance.configure_preprocessing.assert_called_with({ - 'type': 'contextual', - 'contextual_config': { - 'chunk_size': 500, - 'chunk_overlap': 100, - 'llm_model': 'gpt-4' - } - }) - - # Verify rows were inserted - mock_table_instance.insert_rows.assert_called_once() - inserted_rows = mock_table_instance.insert_rows.call_args[0][0] - assert len(inserted_rows) == 2 - assert inserted_rows[0]['content'] == content_to_embed[0] - assert inserted_rows[1]['content'] == content_to_embed[1] - - -def test_put_knowledge_base_invalid_preprocessing(client): - """Test updating knowledge base with invalid preprocessing config""" - # First create a KB - create_request = { - 'knowledge_base': { - 'name': 'test_kb_invalid_update_preprocess', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - # Update with invalid preprocessing config - update_request = { - 'knowledge_base': { - 'rows': [{'content': 'test content'}], - 'preprocessing': { - 'type': 'invalid_type', - 'invalid_config': { - 'invalid_param': 'invalid_value' - } - } - } - } - - with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table: - # Create a mock KB instance - mock_table_instance = MagicMock() - mock_kb_table.return_value = mock_table_instance - - # Setup the _kb attribute with required params - mock_kb = MagicMock() - mock_kb.params = {'preprocessing': None} - mock_table_instance._kb = mock_kb - - # Configure the mock to raise an error when invalid preprocessing config is provided - mock_table_instance.configure_preprocessing.side_effect = ValueError("Invalid preprocessing type") - - update_response = client.put( - '/api/projects/mindsdb/knowledge_bases/test_kb_invalid_update_preprocess', - json=update_request, - follow_redirects=True - ) - - assert update_response.status_code == HTTPStatus.BAD_REQUEST - - -def test_put_knowledge_base_with_documents(client): - """Test updating knowledge base with Document objects""" - create_request = { - 'knowledge_base': { - 'name': 'test_kb_documents', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - # Test data - test_documents = [ - { - 'content': 'First test document content', - 'metadata': {'source': 'test1', 'category': 'A'} - }, - { - 'content': 'Second test document content', - 'metadata': {'source': 'test2', 'category': 'B'} - } - ] - - update_request = { - 'knowledge_base': { - 'rows': test_documents - } - } - - with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table: - mock_table_instance = MagicMock() - mock_kb_table.return_value = mock_table_instance - - # Mock the dependencies - mock_kb = MagicMock() - mock_kb.params = {'preprocessing': None} - mock_table_instance._kb = mock_kb - - update_response = client.put( - '/api/projects/mindsdb/knowledge_bases/test_kb_documents', - json=update_request, - follow_redirects=True - ) - - assert update_response.status_code == HTTPStatus.OK - - # Verify insert_rows was called with correct Document objects - mock_table_instance.insert_rows.assert_called_once() - inserted_rows = mock_table_instance.insert_rows.call_args[0][0] - assert len(inserted_rows) == 2 - assert all(isinstance(row, dict) for row in inserted_rows) - assert inserted_rows[0]['content'] == test_documents[0]['content'] - assert inserted_rows[0]['metadata'] == test_documents[0]['metadata'] - - -def test_put_knowledge_base_mixed_content(client): - """Test updating knowledge base with multiple content types""" - create_request = { - 'knowledge_base': { - 'name': 'test_kb_mixed', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - update_request = { - 'knowledge_base': { - 'rows': [{'content': 'Test content', 'metadata': {'source': 'manual'}}], - 'files': ['test_file.txt'], - 'urls': ['http: //example.com'], - 'query': 'SELECT * FROM test_table', - 'preprocessing': { - 'type': 'contextual', - 'contextual_config': { - 'chunk_size': 500, - 'chunk_overlap': 50 - } - } - } - } - - with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table: - mock_table_instance = MagicMock() - mock_kb_table.return_value = mock_table_instance - - # Mock the dependencies - mock_kb = MagicMock() - mock_kb.params = {'preprocessing': None} - mock_table_instance._kb = mock_kb - - update_response = client.put( - '/api/projects/mindsdb/knowledge_bases/test_kb_mixed', - json=update_request, - follow_redirects=True - ) - - assert update_response.status_code == HTTPStatus.OK - - -def test_create_knowledge_base_with_text_chunking(client): - """Test creating a knowledge base with text chunking preprocessing configuration""" - create_request = { - 'knowledge_base': { - 'name': 'test_kb_text_chunking', - 'model': 'test_embedding_model', - 'preprocessing': { - 'type': 'text_chunking', - 'text_chunking_config': { - 'chunk_size': 500, - 'chunk_overlap': 50, - 'separators': ["\n\n", "\n", ".", " "] - } - } - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - created_knowledge_base = create_response.get_json() - assert 'preprocessing' in created_knowledge_base['params'] - assert created_knowledge_base['params']['preprocessing']['type'] == 'text_chunking' - - # Verify preprocessing config is preserved in GET - get_response = client.get('/api/projects/mindsdb/knowledge_bases/test_kb_text_chunking') - assert get_response.status_code == HTTPStatus.OK - kb_data = get_response.get_json() - assert 'preprocessing' in kb_data['params'] - assert kb_data['params']['preprocessing'] == create_request['knowledge_base']['preprocessing'] - - -def test_create_knowledge_base_invalid_text_chunking(client): - """Test creating a knowledge base with invalid text chunking configuration""" - create_request = { - 'knowledge_base': { - 'name': 'test_kb_invalid_chunking', - 'model': 'test_embedding_model', - 'preprocessing': { - 'type': 'text_chunking', - 'text_chunking_config': { - 'chunk_size': -100, # Invalid negative size - 'chunk_overlap': "invalid", # Invalid type - } - } - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.BAD_REQUEST - - -def test_put_knowledge_base_default_preprocessing(client): - """Test that text chunking is used as default preprocessing when none specified""" - create_request = { - 'knowledge_base': { - 'name': 'test_kb_default_preprocess', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - update_request = { - 'knowledge_base': { - 'rows': [{ - 'content': 'Test content for default preprocessing', - 'source': 'test' # Flat metadata instead of nested - }] - } - } - - # Send update request directly - update_response = client.put( - '/api/projects/mindsdb/knowledge_bases/test_kb_default_preprocess', - json=update_request, - follow_redirects=True - ) - - assert update_response.status_code == HTTPStatus.OK - - -def test_put_knowledge_base_missing_metadata(client): - """Test proper error handling when metadata is missing""" - create_request = { - 'knowledge_base': { - 'name': 'test_kb_metadata', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - # Request with missing source - update_request = { - 'knowledge_base': { - 'rows': [{ - 'content': 'Test content without metadata' - }] - } - } - - update_response = client.put( - '/api/projects/mindsdb/knowledge_bases/test_kb_metadata', - json=update_request, - follow_redirects=True - ) - - assert update_response.status_code == HTTPStatus.OK # Should succeed with default metadata - - -def test_document_processing_with_valid_metadata(client): - """Test document processing with valid metadata configuration""" - create_request = { - 'knowledge_base': { - 'name': 'test_kb_valid_metadata', - 'model': 'test_embedding_model', - 'preprocessing': { - 'type': 'text_chunking', - 'text_chunking_config': { - 'chunk_size': 100, - 'chunk_overlap': 20 - } - } - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - # Various valid metadata configurations as flat key-value pairs - test_rows = [ - { - 'content': 'First test content', - 'source': 'test1', - 'category': 'A' - }, - { - 'content': 'Second test content', - 'source': 'test2', - 'doc_type': 'document' - }, - { - 'content': 'Third test content', - 'source': 'test3', - 'tag': 'tag1' - } - ] - - update_request = { - 'knowledge_base': { - 'rows': test_rows - } - } - - update_response = client.put( - '/api/projects/mindsdb/knowledge_bases/test_kb_valid_metadata', - json=update_request, - follow_redirects=True - ) - - assert update_response.status_code == HTTPStatus.OK - - -def test_document_processing_with_default_metadata(client): - """Test document processing where system adds default metadata""" - create_request = { - 'knowledge_base': { - 'name': 'test_kb_default_metadata', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - # Only provide content, system should add default metadata - update_request = { - 'knowledge_base': { - 'rows': [{ - 'content': 'Test content with system metadata', - 'source': 'api' # This will be moved to metadata - }] - } - } - - # Need to patch both controller and preprocessor - with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table: - mock_instance = MagicMock() - mock_kb_table.return_value = mock_instance - - # Mock KB params - mock_kb = MagicMock() - mock_kb.params = {} - mock_instance._kb = mock_kb - - update_response = client.put( - '/api/projects/mindsdb/knowledge_bases/test_kb_default_metadata', - json=update_request, - follow_redirects=True - ) - - assert update_response.status_code == HTTPStatus.OK - - # Verify that insert_rows was called with the correct data - mock_instance.insert_rows.assert_called_once() - inserted_rows = mock_instance.insert_rows.call_args[0][0] - assert len(inserted_rows) == 1 - assert inserted_rows[0]['content'] == 'Test content with system metadata' - assert inserted_rows[0]['source'] == 'api' - - -def test_document_loader_with_file_extensions(client): - """Test document loader handling different file extensions""" - create_request = { - 'knowledge_base': { - 'name': 'test_kb_file_extensions', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - # Create test files - with patch('mindsdb.interfaces.file.file_controller.FileController') as mock_file_controller: - # Mock file existence checks - mock_file_controller.return_value.get_file_path.return_value = MagicMock() - - update_request = { - 'knowledge_base': { - 'files': ['test.md', 'test.html', 'test.pdf'] - } - } - - with patch('mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable') as mock_kb_table: - mock_table_instance = MagicMock() - mock_kb_table.return_value = mock_table_instance - - # Mock KB params - mock_kb = MagicMock() - mock_kb.params = {} - mock_table_instance._kb = mock_kb - - update_response = client.put( - '/api/projects/mindsdb/knowledge_bases/test_kb_file_extensions', - json=update_request, - follow_redirects=True - ) - - assert update_response.status_code == HTTPStatus.OK - - # Verify document loader was used - mock_table_instance.insert_files.assert_called_once_with(['test.md', 'test.html', 'test.pdf']) - - -def test_document_loader_sql_error_handling(client): - """Test document loader handling SQL query errors""" - create_request = { - 'knowledge_base': { - 'name': 'test_kb_sql_errors', - 'model': 'test_embedding_model' - } - } - - create_response = client.post('/api/projects/mindsdb/knowledge_bases', json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - # This will not use the document loader. - update_request = { - 'knowledge_base': { - 'query': 'INVALID SQL QUERY' - } - } - - update_response = client.put( - '/api/projects/mindsdb/knowledge_bases/test_kb_sql_errors', - json=update_request, - follow_redirects=True - ) - - assert update_response.status_code == HTTPStatus.BAD_REQUEST - - -def test_preprocessing_update(client): - """Test updating preprocessing configuration""" - initial_request = { - 'knowledge_base': { - 'name': 'test_kb_preprocess_update', - 'model': 'test_embedding_model' - } - } - - # First try to delete if exists - client.delete( - f'/api/projects/mindsdb/knowledge_bases/{initial_request["knowledge_base"]["name"]}', - follow_redirects=True - ) - - with patch('mindsdb.api.http.namespaces.knowledge_bases.SessionController') as mock_session_class: - mock_session = MagicMock() - mock_session_class.return_value = mock_session - - # Setup KB controller mock - mock_kb_controller = MagicMock() - mock_session.kb_controller = mock_kb_controller - - # Setup table mock - mock_table = MagicMock() - mock_kb_controller.get_table.return_value = mock_table - - # Setup KB object mock - mock_kb = MagicMock() - mock_kb.params = {} - mock_kb.id = 1 - mock_kb.name = initial_request['knowledge_base']['name'] - mock_kb.as_dict.return_value = { - 'id': 1, - 'name': initial_request['knowledge_base']['name'], - 'params': mock_kb.params, - 'created_at': '2024-11-07T12: 13: 46', - 'updated_at': '2024-11-07T12: 13: 46' - } - - # Setup controller methods - mock_kb_controller.get.side_effect = [None, mock_kb] - mock_kb_controller.add.return_value = mock_kb - - create_response = client.post( - '/api/projects/mindsdb/knowledge_bases', - json=initial_request, - follow_redirects=True - ) - - assert create_response.status_code == HTTPStatus.CREATED - - # Now update with preprocessing config - update_request = { - 'knowledge_base': { - 'preprocessing': { - 'type': 'text_chunking', - 'text_chunking_config': { - 'chunk_size': 300, - 'chunk_overlap': 30 - } - } - } - } - - # Reset get to always return the KB now that it exists - mock_kb_controller.get.reset_mock() - mock_kb_controller.get.return_value = mock_kb - - update_response = client.put( - f'/api/projects/mindsdb/knowledge_bases/{initial_request["knowledge_base"]["name"]}', - json=update_request, - follow_redirects=True - ) - - assert update_response.status_code == HTTPStatus.OK - - # Verify preprocessing was updated - mock_table.configure_preprocessing.assert_called_with( - update_request['knowledge_base']['preprocessing'] - ) diff --git a/tests/unused/unit/broken/test_knowledge_base.py b/tests/unused/unit/broken/test_knowledge_base.py deleted file mode 100644 index 02e224003c6..00000000000 --- a/tests/unused/unit/broken/test_knowledge_base.py +++ /dev/null @@ -1,452 +0,0 @@ -import tempfile -import time -from unittest.mock import patch - -import pandas as pd -import pytest -from mindsdb_sql_parser import parse_sql - -from mindsdb.interfaces.storage.db import KnowledgeBase - -from tests.unit.executor_test_base import BaseExecutorTest - - -class TestKnowledgeBase(BaseExecutorTest): - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def wait_predictor(self, project, name): - # wait - done = False - for _ in range(200): - ret = self.run_sql(f"select * from {project}.models where name='{name}'") - if not ret.empty: - if ret["STATUS"][0] == "complete": - done = True - break - elif ret["STATUS"][0] == "error": - break - time.sleep(0.5) - if not done: - raise RuntimeError("predictor wasn't created") - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def setup_method(self, method, mock_handler): - super().setup_method() - - vectordatabase_name = "chroma_test" - - # create a vector database table - tmp_directory = tempfile.mkdtemp() - self.run_sql( - f""" - CREATE DATABASE {vectordatabase_name} - WITH ENGINE = "chromadb", - PARAMETERS = {{ - "persist_directory" : "{tmp_directory}" - }} - """ - ) - - # mock the data - df = pd.DataFrame( - { - "id": ["id1", "id2", "id3"], - "content": ["content1", "content2", "content3"], - "metadata": [ - '{"datasource": "web", "some_field": "some_value"}', - '{"datasource": "web"}', - '{"datasource": "web"}', - ], - "embeddings": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], - } - ) - - self.save_file("df", df) - - # create the table - vectordatabase_table_name = "test_table" - sql = f""" - CREATE TABLE chroma_test.{vectordatabase_table_name} - ( - SELECT * FROM files.df - ) - """ - self.run_sql(sql) - - # create an embedding model - embedding_model_name = "test_dummy_embedding" - self.run_sql( - f""" - CREATE MODEL {embedding_model_name} - PREDICT embeddings - USING - engine='langchain_embedding', - class = 'FakeEmbeddings', - size = 3, - input_columns = ['content'] - """ - ) - - self.wait_predictor("mindsdb", embedding_model_name) - self.vector_database_table_name = vectordatabase_table_name - self.vector_database_name = vectordatabase_name - self.embedding_model_name = embedding_model_name - self.database_path = tmp_directory - - def teardown_method(self, method): - # drop the vector database - self.run_sql("DROP KNOWLEDGE_BASE IF EXISTS test_kb") - self.run_sql(f"DROP TABLE {self.vector_database_name}.{self.vector_database_table_name}") - self.run_sql(f"DROP DATABASE {self.vector_database_name}") - - def test_create_kb(self): - # create knowledge base - sql = f""" - CREATE KNOWLEDGE BASE test_kb - USING - MODEL = {self.embedding_model_name}, - STORAGE = {self.vector_database_name}.{self.vector_database_table_name} - """ - self.run_sql(sql) - - # verify the knowledge base is created - kb_obj = self.db.session.query(KnowledgeBase).filter_by(name="test_kb").first() - assert kb_obj is not None - - # create a knowledge base from select - # todo this should be supported but isn't yet - - # sql = f""" - # CREATE KNOWLEDGE BASE test_kb2 - # FROM ( - # SELECT content, embeddings, metadata - # FROM {self.vector_database_name}.{self.vector_database_table_name} - # ) - # USING - # MODEL = {self.embedding_model_name}, - # STORAGE = {self.vector_database_name}.{self.vector_database_table_name} - # """ - # - # self.run_sql(sql) - # - # # verify the knowledge base is created - # kb_obj = self.db.session.query(KnowledgeBase).filter_by(name="test_kb2").first() - # assert kb_obj is not None - - # create a knowledge base with invalid model and storage name should throw an exception - sql = f""" - CREATE KNOWLEDGE BASE test_kb3 - USING - MODEL = invalid_model_name, - STORAGE = {self.vector_database_name}.{self.vector_database_table_name} - """ - with pytest.raises(Exception): - self.run_sql(sql) - - sql = f""" - CREATE KNOWLEDGE BASE test_kb4 - USING - MODEL = {self.embedding_model_name}, - STORAGE = invalid_storage_name - """ - with pytest.raises(Exception): - self.run_sql(sql) - - # create a knowledge base without a storage name, default should be used - - sql = f""" - CREATE KNOWLEDGE BASE test_kb5 - USING - MODEL = {self.embedding_model_name} - """ - - self.run_sql(sql) - - # verify the knowledge base is created - kb_obj = self.db.session.query(KnowledgeBase).filter_by(name="test_kb5").first() - assert kb_obj is not None - assert kb_obj.vector_database.name == "test_kb5_chromadb" - - # create a knowledge base without a model name, default should be used - sql = f""" - CREATE KNOWLEDGE BASE test_kb6 - USING - STORAGE = {self.vector_database_name}.{self.vector_database_table_name} - """ - - self.run_sql(sql) - - # verify the knowledge base is created - kb_obj = self.db.session.query(KnowledgeBase).filter_by(name="test_kb6").first() - assert kb_obj is not None - assert kb_obj.embedding_model.name == "kb_default_embedding_model" - # clean up - self.run_sql("DROP KNOWLEDGE BASE test_kb6") - - def test_drop_kb(self): - # create a knowledge base - sql = f""" - CREATE KNOWLEDGE BASE test_kb_delete - USING - MODEL = {self.embedding_model_name}, - STORAGE = {self.vector_database_name}.{self.vector_database_table_name} - """ - self.run_sql(sql) - - # verify the knowledge base is created - kb_obj = self.db.session.query(KnowledgeBase).filter_by(name="test_kb_delete").first() - assert kb_obj is not None - - # drop a knowledge base - sql = """ - DROP KNOWLEDGE BASE test_kb_delete - """ - self.run_sql(sql) - - # verify the knowledge base is dropped - kb_obj = self.db.session.query(KnowledgeBase).filter_by(name="test_kb").first() - assert kb_obj is None - - def test_select_from_kb(self): - # create the knowledge base - sql = f""" - CREATE KNOWLEDGE BASE test_kb - USING - MODEL = {self.embedding_model_name}, - STORAGE = {self.vector_database_name}.{self.vector_database_table_name} - """ - self.run_sql(sql) - - # select from the knowledge base without any filters - sql = """ - SELECT * - FROM test_kb - """ - df = self.run_sql(sql) - assert df.shape[0] == 3 - - # select from the knowledge base with an id filter - sql = """ - SELECT * - FROM test_kb - WHERE id = 'id1' - """ - df = self.run_sql(sql) - assert df.shape[0] == 1 - - # select from the knowledge base with a metadata filter - sql = """ - SELECT * - FROM test_kb - WHERE - `metadata.some_field` = 'some_value' - """ - df = self.run_sql(sql) - assert df.shape[0] == 1 - - # select with a search query - sql = """ - SELECT * - FROM test_kb - WHERE - content = 'some query' - LIMIT 1 - """ - df = self.run_sql(sql) - assert df.shape[0] == 1 - - @pytest.mark.skip(reason="need to cleanly mock embedding model predict") - def test_insert_into_kb(self): - # create the knowledge base - sql = f""" - CREATE KNOWLEDGE BASE test_kb - USING - MODEL = {self.embedding_model_name}, - STORAGE = {self.vector_database_name}.{self.vector_database_table_name} - """ - self.run_sql(sql) - - # insert into the knowledge base using values - sql = """ - INSERT INTO test_kb (id, content, embeddings, metadata) - VALUES ( - 'id4', - 'content4', - '[4, 5, 6]', - '{"d": 4}' - ) - - """ - self.run_sql(sql) - - # verify the knowledge base is updated - sql = """ - SELECT * - FROM test_kb - WHERE id = 'id4' - """ - df = self.run_sql(sql) - assert df.shape[0] == 1 - - # insert into the knowledge base using a select - sql = """ - INSERT INTO test_kb - SELECT - content, metadata - FROM files.df - """ - self.run_sql(sql) - - # verify the knowledge base is updated - sql = """ - SELECT * - FROM test_kb - """ - - df = self.run_sql(sql) - assert df.shape[0] == 7 - - @pytest.mark.skip(reason="Not implemented") - def test_update_kb(self): - ... - - def test_delete_from_kb(self): - # create the knowledge base - sql = f""" - CREATE KNOWLEDGE BASE test_kb - USING - MODEL = {self.embedding_model_name}, - STORAGE = {self.vector_database_name}.{self.vector_database_table_name} - """ - - self.run_sql(sql) - - # delete with id filter - sql = """ - DELETE FROM test_kb - WHERE id = 'id1' - """ - self.run_sql(sql) - - # verify the knowledge base is updated - sql = """ - SELECT * - FROM test_kb - WHERE id = 'id1' - """ - df = self.run_sql(sql) - assert df.shape[0] == 0 - - # delete with metadata filter - sql = """ - DELETE FROM test_kb - WHERE `metadata.datasource` = 'web' - """ - self.run_sql(sql) - - # verify the knowledge base is updated - sql = """ - SELECT * - FROM test_kb - WHERE id = 'id2' - """ - df = self.run_sql(sql) - assert df.shape[0] == 0 - - # delete from the knowledge base without any filters is not allowed - sql = """ - DELETE FROM test_kb - """ - with pytest.raises(Exception): - self.run_sql(sql) - - def test_show_knowledge_bases(self): - # create the knowledge base - sql = f""" - CREATE KNOWLEDGE BASE test_kb - USING - MODEL = {self.embedding_model_name}, - STORAGE = {self.vector_database_name}.{self.vector_database_table_name} - """ - self.run_sql(sql) - - # show knowledge bases - sql = """ - SHOW KNOWLEDGE BASES - """ - df = self.run_sql(sql) - assert df.shape[0] == 1 - - @pytest.mark.skip(reason="need to cleanly mock embedding model predict") - def test_kb_params(self): - - df = pd.DataFrame([ - {'id': 1, 'ticket': 'NFLX', 'value': 532, 'created_at': '2020-01-01', 'ma': 100}, - {'id': 2, 'ticket': 'MSFT', 'value': 311, 'created_at': '2020-01-02', 'ma': 200}, - ]) - - self.save_file('stock', df) - - # ---- default ---- - self.run_sql(f'create knowledge base kb_test USING MODEL = {self.embedding_model_name}') - self.run_sql('INSERT INTO kb_test select * from files.stock') - ret = self.run_sql("select * from kb_test where content='msft'") - self.run_sql('drop knowledge base kb_test') # have to drop KB with model and vector sore before assertions - - # second row is the result, all columns in content - content = ret.content[0] - assert 'MSFT' in content and 'created_at' in content and '311' in content and '200' in content - - # metadata is empty - assert ret.metadata[0] is None - - # id = 2 - assert ret.id[0] == '2' - - # ---- choose content ---- - self.run_sql(''' - create knowledge base kb_test - using content_columns = ['ticket', 'value'] - ''') - self.run_sql('INSERT INTO kb_test select * from files.stock') - ret = self.run_sql("select * from kb_test where content='msft'") - self.run_sql('drop knowledge base kb_test') - - metadata = ret.metadata[0] - content = ret.content[0] - # ticket and value in content - assert 'MSFT' in content and '311' in content - # created and ma in metadata - assert 'created_at' in metadata and 'ma' in metadata - - # ---- choose metadata ---- - self.run_sql(''' - create knowledge base kb_test - using metadata_columns = ['created_at', 'value'] - ''') - self.run_sql('INSERT INTO kb_test select * from files.stock') - ret = self.run_sql("select * from kb_test where content='msft'") - self.run_sql('drop knowledge base kb_test') - - metadata = ret.metadata[0] - content = ret.content[0] - # ticket and ma in content - assert 'MSFT' in content and '200' in content - # created and value in metadata - assert 'created_at' in metadata and 'value' in metadata - - # ---- choose id ---- - self.run_sql(''' - create knowledge base kb_test - using id_column='ma' - ''') - self.run_sql('INSERT INTO kb_test select * from files.stock') - ret = self.run_sql("select * from kb_test where content='msft'") - self.run_sql('drop knowledge base kb_test') - - # id = 200 - assert ret.id[0] == '200' diff --git a/tests/unused/unit/broken/test_map_reduce_summarizer_chain.py b/tests/unused/unit/broken/test_map_reduce_summarizer_chain.py deleted file mode 100644 index 2953a94ad6f..00000000000 --- a/tests/unused/unit/broken/test_map_reduce_summarizer_chain.py +++ /dev/null @@ -1,76 +0,0 @@ -from unittest.mock import AsyncMock, MagicMock - -import pandas as pd -from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain -from langchain_core.documents import Document - -from mindsdb.integrations.libs.vectordatabase_handler import VectorStoreHandler -from mindsdb.integrations.utilities.rag.chains.map_reduce_summarizer_chain import MapReduceSummarizerChain -from mindsdb.integrations.utilities.rag.settings import SummarizationConfig -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator - - -class TestMapReduceSummarizerChain: - def test_summarizes_documents(self): - mock_vector_store_handler = MagicMock(spec=VectorStoreHandler, wraps=VectorStoreHandler) - mock_vector_store_handler.select.side_effect = [ - pd.DataFrame.from_records([ - {'content': 'Chunk 1'}, - {'content': 'Chunk 2'}, - ]), - pd.DataFrame.from_records([ - {'content': 'Chunk 3'} - ]) - ] - mock_map_reduce_documents_chain = AsyncMock(spec=MapReduceDocumentsChain, wraps=MapReduceDocumentsChain) - mock_map_reduce_documents_chain.ainvoke.side_effect = [{'output_text': 'Final summary 1'}, {'output_text': 'Final summary 2'}] - test_summarizer_chain = MapReduceSummarizerChain( - vector_store_handler=mock_vector_store_handler, - map_reduce_documents_chain=mock_map_reduce_documents_chain, - summarization_config=SummarizationConfig() - ) - - chain_input = { - 'context': [ - Document(page_content='Chunk 1', metadata={'original_row_id': '1'}), - Document(page_content='Chunk 2', metadata={'original_row_id': '1'}), - Document(page_content='Chunk 3', metadata={'original_row_id': '2'}) - ], - 'question': 'What is the answer to life?', - } - actual_chain_output = test_summarizer_chain.invoke(chain_input) - - # Make sure we select from the vector store correctly. - mock_vector_store_handler.select.assert_any_call( - 'embeddings', - columns=['content', 'metadata'], - conditions=[FilterCondition( - "metadata->>'original_row_id'", - FilterOperator.EQUAL, - '1' - )] - ) - mock_vector_store_handler.select.assert_any_call( - 'embeddings', - columns=['content', 'metadata'], - conditions=[FilterCondition( - "metadata->>'original_row_id'", - FilterOperator.EQUAL, - '2' - )] - ) - - # Make sure we are calling the summarization chain with the right chunks. - mock_map_reduce_documents_chain.ainvoke.assert_awaited() - - # Make sure the summary is actually added to the context. - expected_chain_output = { - 'context': [ - Document(page_content='Chunk 1', metadata={'original_row_id': '1', 'summary': 'Final summary 1'}), - Document(page_content='Chunk 2', metadata={'original_row_id': '1', 'summary': 'Final summary 1'}), - Document(page_content='Chunk 3', metadata={'original_row_id': '2', 'summary': 'Final summary 2'}) - ], - 'question': 'What is the answer to life?', - } - - assert actual_chain_output == expected_chain_output diff --git a/tests/unused/unit/broken/test_sql_retriever.py b/tests/unused/unit/broken/test_sql_retriever.py deleted file mode 100644 index 7734739540b..00000000000 --- a/tests/unused/unit/broken/test_sql_retriever.py +++ /dev/null @@ -1,313 +0,0 @@ -from unittest.mock import MagicMock - -import pandas as pd -from langchain_core.documents import Document -from langchain_core.embeddings import Embeddings -from langchain_core.outputs.generation import Generation -from langchain_core.outputs.llm_result import LLMResult -from langchain_core.retrievers import BaseRetriever -from langchain_openai.chat_models.base import ChatOpenAI - -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.integrations.libs.response import HandlerResponse -from mindsdb.integrations.libs.vectordatabase_handler import DistanceFunction, VectorStoreHandler -from mindsdb.integrations.utilities.rag.retrievers.sql_retriever import SQLRetriever -from mindsdb.integrations.utilities.rag.settings import DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE, DEFAULT_SEMANTIC_PROMPT_TEMPLATE, ColumnSchema, MetadataSchema, SearchKwargs - - -class TestSQLRetriever: - def test_basic(self): - llm = MagicMock(spec=ChatOpenAI, wraps=ChatOpenAI) - llm_result = MagicMock(spec=LLMResult, wraps=LLMResult) - llm_result.generations = [ - [ - Generation( - text='''```json -{ - "filters": [ - { - "attribute": "ContributorName", - "comparator": "=", - "value": "Alfred" - } - ] -} -```''' - ) - ] - ] - llm.generate_prompt.return_value = llm_result - vector_db_mock = MagicMock(spec=VectorStoreHandler, wraps=VectorStoreHandler) - series = pd.Series( - [0, 'Chunk1', '[1.0, 2.0, 3.0]', {'key1': 'value1'}, 0, 1], - index=['id', 'content', 'embeddings', 'metadata', 'Id', 'Type'] - ) - df = pd.DataFrame([series]) - vector_db_mock.native_query.return_value = HandlerResponse( - RESPONSE_TYPE.TABLE, - data_frame=df - ) - embeddings_mock = MagicMock(spec=Embeddings, wraps=Embeddings) - embeddings_mock.embed_query.return_value = list(range(768)) - - source_schema = MetadataSchema( - table='test_source_table', - description='Contains source documents', - columns=[ - ColumnSchema(name='Id', type='int', description='Unique ID as primary key of doc'), - ColumnSchema(name='Type', type='int', description='Document Type', values={1: 'Unknown', 2: 'Site Audit'}) - ] - ) - unit_schema = MetadataSchema( - table='unit', - description='Contains information about specific units of power plants. Several units can be part of a single plant.', - columns=[ - ColumnSchema(name='UnitKey', type='int', description='Unique ID of the unit'), - ColumnSchema(name='PlantKey', type='int', description='ID of the plant the unit belongs to') - ] - ) - plant_schema = MetadataSchema( - table='plant', - description='Contains information about specific power plants', - columns=[ - ColumnSchema(name='PlantKey', type='int', description='The unique ID of the plant'), - ColumnSchema(name='PlantName', type='str', description='The name of the plant') - ] - ) - document_unit_schema = MetadataSchema( - table='document_unit', - description='Links documents to the power plant they are relevant to', - columns=[ - ColumnSchema(name='DocumentId', type='int', description='The ID of the document associated with the unit'), - ColumnSchema(name='UnitKey', type='int', description='The ID of the unit the documnet is associated with') - ] - ) - all_schemas = [source_schema, unit_schema, plant_schema, document_unit_schema] - fallback_retriever = MagicMock(spec=BaseRetriever, wraps=BaseRetriever) - sql_retriever = SQLRetriever( - fallback_retriever=fallback_retriever, - vector_store_handler=vector_db_mock, - metadata_schemas=all_schemas, - embeddings_model=embeddings_mock, - metadata_filters_prompt_template=DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE, - rewrite_prompt_template=DEFAULT_SEMANTIC_PROMPT_TEMPLATE, - num_retries=2, - embeddings_table='test_embeddings_table', - source_table='test_source_table', - distance_function=DistanceFunction.SQUARED_EUCLIDEAN_DISTANCE, - search_kwargs=SearchKwargs(k=5), - llm=llm - ) - - docs = sql_retriever.invoke('What are Beaver Valley plant documents for nuclear fuel waste?') - # Make sure right doc was retrieved. - assert len(docs) == 1 - assert docs[0].page_content == 'Chunk1' - assert docs[0].metadata == {'key1': 'value1'} - - def test_retries(self): - llm = MagicMock(spec=ChatOpenAI, wraps=ChatOpenAI) - llm_result = MagicMock(spec=LLMResult, wraps=LLMResult) - llm_result.generations = [ - [ - Generation( - text='''```json -{ - "filters": [ - { - "attribute": "ContributorName", - "comparator": "=", - "value": "Alfred" - } - ] -} -```''' - ) - ] - ] - llm.generate_prompt.return_value = llm_result - vector_db_mock = MagicMock(spec=VectorStoreHandler, wraps=VectorStoreHandler) - series = pd.Series( - [0, 'Chunk1', '[1.0, 2.0, 3.0]', {'key1': 'value1'}, 0, 1], - index=['id', 'content', 'embeddings', 'metadata', 'Id', 'Type'] - ) - df = pd.DataFrame([series]) - vector_db_mock.native_query.side_effect = [ - HandlerResponse( - RESPONSE_TYPE.ERROR, - error_message='Something went wrong I am in absolute shambles' - ), - HandlerResponse( - RESPONSE_TYPE.ERROR, - error_message='Something went wrong I am in absolute shambles' - ), - HandlerResponse( - RESPONSE_TYPE.TABLE, - data_frame=df - ) - ] - embeddings_mock = MagicMock(spec=Embeddings, wraps=Embeddings) - embeddings_mock.embed_query.return_value = list(range(768)) - - source_schema = MetadataSchema( - table='test_source_table', - description='Contains source documents', - columns=[ - ColumnSchema(name='Id', type='int', description='Unique ID as primary key of doc'), - ColumnSchema(name='Type', type='int', description='Document Type', values={1: 'Unknown', 2: 'Site Audit'}) - ] - ) - unit_schema = MetadataSchema( - table='unit', - description='Contains information about specific units of power plants. Several units can be part of a single plant.', - columns=[ - ColumnSchema(name='UnitKey', type='int', description='Unique ID of the unit'), - ColumnSchema(name='PlantKey', type='int', description='ID of the plant the unit belongs to') - ] - ) - plant_schema = MetadataSchema( - table='plant', - description='Contains information about specific power plants', - columns=[ - ColumnSchema(name='PlantKey', type='int', description='The unique ID of the plant'), - ColumnSchema(name='PlantName', type='str', description='The name of the plant') - ] - ) - document_unit_schema = MetadataSchema( - table='document_unit', - description='Links documents to the power plant they are relevant to', - columns=[ - ColumnSchema(name='DocumentId', type='int', description='The ID of the document associated with the unit'), - ColumnSchema(name='UnitKey', type='int', description='The ID of the unit the documnet is associated with') - ] - ) - all_schemas = [source_schema, unit_schema, plant_schema, document_unit_schema] - fallback_retriever = MagicMock(spec=BaseRetriever, wraps=BaseRetriever) - sql_retriever = SQLRetriever( - fallback_retriever=fallback_retriever, - vector_store_handler=vector_db_mock, - metadata_schemas=all_schemas, - embeddings_model=embeddings_mock, - metadata_filters_prompt_template=DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE, - rewrite_prompt_template=DEFAULT_SEMANTIC_PROMPT_TEMPLATE, - num_retries=3, - embeddings_table='test_embeddings_table', - source_table='test_source_table', - distance_function=DistanceFunction.SQUARED_EUCLIDEAN_DISTANCE, - search_kwargs=SearchKwargs(k=5), - llm=llm - ) - - docs = sql_retriever.invoke('What are Beaver Valley plant documents for nuclear fuel waste?') - # Make sure we retried. - assert len(vector_db_mock.native_query.mock_calls) == 3 - # Make sure right doc was retrieved. - assert len(docs) == 1 - assert docs[0].page_content == 'Chunk1' - assert docs[0].metadata == {'key1': 'value1'} - - def test_fallback(self): - llm = MagicMock(spec=ChatOpenAI, wraps=ChatOpenAI) - llm_result = MagicMock(spec=LLMResult, wraps=LLMResult) - llm_result.generations = [ - [ - Generation( - text='''```json -{ - "filters": [ - { - "attribute": "ContributorName", - "comparator": "=", - "value": "Alfred" - } - ] -} -```''' - ) - ] - ] - llm.generate_prompt.return_value = llm_result - vector_db_mock = MagicMock(spec=VectorStoreHandler, wraps=VectorStoreHandler) - vector_db_mock.native_query.side_effect = [ - HandlerResponse( - RESPONSE_TYPE.ERROR, - error_message='Something went wrong I am in absolute shambles' - ), - HandlerResponse( - RESPONSE_TYPE.ERROR, - error_message='Something went wrong I am in absolute shambles' - ), - HandlerResponse( - RESPONSE_TYPE.ERROR, - error_message='Something went wrong I am in absolute shambles' - ), - ] - embeddings_mock = MagicMock(spec=Embeddings, wraps=Embeddings) - embeddings_mock.embed_query.return_value = list(range(768)) - - source_schema = MetadataSchema( - table='test_source_table', - description='Contains source documents', - columns=[ - ColumnSchema(name='Id', type='int', description='Unique ID as primary key of doc'), - ColumnSchema(name='Type', type='int', description='Document Type', values={1: 'Unknown', 2: 'Site Audit'}) - ] - ) - unit_schema = MetadataSchema( - table='unit', - description='Contains information about specific units of power plants. Several units can be part of a single plant.', - columns=[ - ColumnSchema(name='UnitKey', type='int', description='Unique ID of the unit'), - ColumnSchema(name='PlantKey', type='int', description='ID of the plant the unit belongs to') - ] - ) - plant_schema = MetadataSchema( - table='plant', - description='Contains information about specific power plants', - columns=[ - ColumnSchema(name='PlantKey', type='int', description='The unique ID of the plant'), - ColumnSchema(name='PlantName', type='str', description='The name of the plant') - ] - ) - document_unit_schema = MetadataSchema( - table='document_unit', - description='Links documents to the power plant they are relevant to', - columns=[ - ColumnSchema(name='DocumentId', type='int', description='The ID of the document associated with the unit'), - ColumnSchema(name='UnitKey', type='int', description='The ID of the unit the documnet is associated with') - ] - ) - all_schemas = [source_schema, unit_schema, plant_schema, document_unit_schema] - fallback_retriever = MagicMock(spec=BaseRetriever, wraps=BaseRetriever) - fallback_retriever._get_relevant_documents.return_value = [ - Document( - page_content='Chunk1', - metadata={ - 'key1': 'value1' - } - ) - ] - sql_retriever = SQLRetriever( - fallback_retriever=fallback_retriever, - vector_store_handler=vector_db_mock, - metadata_schemas=all_schemas, - embeddings_model=embeddings_mock, - metadata_filters_prompt_template=DEFAULT_METADATA_FILTERS_PROMPT_TEMPLATE, - rewrite_prompt_template=DEFAULT_SEMANTIC_PROMPT_TEMPLATE, - num_retries=2, - embeddings_table='test_embeddings_table', - source_table='test_source_table', - distance_function=DistanceFunction.SQUARED_EUCLIDEAN_DISTANCE, - search_kwargs=SearchKwargs(k=5), - llm=llm - ) - - docs = sql_retriever.invoke('What are Beaver Valley plant documents for nuclear fuel waste?') - # Make sure we retried. - assert len(vector_db_mock.native_query.mock_calls) == 3 - # Make sure we falled back. - assert len(fallback_retriever._get_relevant_documents.mock_calls) == 1 - # Make sure right doc was retrieved. - assert len(docs) == 1 - assert docs[0].page_content == 'Chunk1' - assert docs[0].metadata == {'key1': 'value1'} diff --git a/tests/unused/unit/handler_tests/__init__.py b/tests/unused/unit/handler_tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/unused/unit/handler_tests/data/pgvector/seed.sql b/tests/unused/unit/handler_tests/data/pgvector/seed.sql deleted file mode 100644 index 0b1739b555d..00000000000 --- a/tests/unused/unit/handler_tests/data/pgvector/seed.sql +++ /dev/null @@ -1,20 +0,0 @@ --- Make sure pgvector extension is enabled -DROP EXTENSION IF EXISTS vector; -CREATE EXTENSION vector; - --- Create the table -CREATE TABLE items ( - id bigserial PRIMARY KEY, - content text NOT NULL, - embeddings vector(3) NOT NULL, -- 3 dimensions - metadata jsonb -); - --- Dummy data -INSERT INTO items (content, embeddings, metadata) VALUES - ('a fat cat sat on a mat and ate a fat rat', '[1, 2, 3]', '{"location": "Wonderland", "author": "Taishan"}'), - ('a fat dog sat on a mat and ate a fat rat', '[4, 5, 6]', '{"location": "Wonderland", "author": "Taishan"}'), - ('a thin cat sat on a mat and ate a thin rat', '[7, 8, 9]', '{"location": "Zimbabwe", "author": "Taishan"}'), - ('a thin dog sat on a mat and ate a thin rat', '[10, 11, 12]', '{"location": "Springfield", "author": "Muhammad"}'); - -COMMIT; diff --git a/tests/unused/unit/handler_tests/data/rag_pipelines/auto_retriever.yml b/tests/unused/unit/handler_tests/data/rag_pipelines/auto_retriever.yml deleted file mode 100644 index c3dada1b80b..00000000000 --- a/tests/unused/unit/handler_tests/data/rag_pipelines/auto_retriever.yml +++ /dev/null @@ -1 +0,0 @@ -retriever_type: auto diff --git a/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_both.yml b/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_both.yml deleted file mode 100644 index 0974bdc14de..00000000000 --- a/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_both.yml +++ /dev/null @@ -1,3 +0,0 @@ -retriever_type: multi -multi_retriever_mode: both - diff --git a/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_split.yml b/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_split.yml deleted file mode 100644 index 9daf6c19f88..00000000000 --- a/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_split.yml +++ /dev/null @@ -1,3 +0,0 @@ -retriever_type: multi -multi_retriever_mode: split - diff --git a/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_summarize.yml b/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_summarize.yml deleted file mode 100644 index 9379c365ec2..00000000000 --- a/tests/unused/unit/handler_tests/data/rag_pipelines/multi_retriever_summarize.yml +++ /dev/null @@ -1,3 +0,0 @@ -retriever_type: multi -multi_retriever_mode: summarize - diff --git a/tests/unused/unit/handler_tests/data/rag_pipelines/vector_retriever_chroma.yml b/tests/unused/unit/handler_tests/data/rag_pipelines/vector_retriever_chroma.yml deleted file mode 100644 index bc35cd3e998..00000000000 --- a/tests/unused/unit/handler_tests/data/rag_pipelines/vector_retriever_chroma.yml +++ /dev/null @@ -1,3 +0,0 @@ -retriever_type: vector_store -vector_store_config: - vector_store_type: chroma diff --git a/tests/unused/unit/handler_tests/data/rag_pipelines/vector_retriever_pgvector.yml b/tests/unused/unit/handler_tests/data/rag_pipelines/vector_retriever_pgvector.yml deleted file mode 100644 index a75a9b33b71..00000000000 --- a/tests/unused/unit/handler_tests/data/rag_pipelines/vector_retriever_pgvector.yml +++ /dev/null @@ -1,3 +0,0 @@ -retriever_type: vector_store -vector_store_config: - vector_store_type: pgvector \ No newline at end of file diff --git a/tests/unused/unit/handler_tests/test_apache_doris_handler.py b/tests/unused/unit/handler_tests/test_apache_doris_handler.py deleted file mode 100644 index f4b1f3ac861..00000000000 --- a/tests/unused/unit/handler_tests/test_apache_doris_handler.py +++ /dev/null @@ -1,104 +0,0 @@ -import pytest - -from mindsdb.integrations.handlers.apache_doris_handler.apache_doris_handler import ApacheDorisHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - -HANDLER_KWARGS = { - "connection_data": { - "host": "127.0.0.1", - "port": 9030, - "user": "root", - "password": "password", - "database": "mindstest" - } -} - -HANDLER_NAME = 'test_doris_handler' - - -@pytest.fixture(scope="module") -def handler(request): - handler = ApacheDorisHandler(HANDLER_NAME, **HANDLER_KWARGS) - yield handler - - -class TestMySQLHandler: - - def check_valid_response(self, res): - if res.resp_type == RESPONSE_TYPE.TABLE: - assert res.data_frame is not None, "expected to have some data, but got None" - assert res.error_code == 0, f"expected to have zero error_code, but got {res.error_code}" - assert res.error_message is None, f"expected to have None in error message, but got {res.error_message}" - - def get_table_names(self, handler): - res = handler.get_tables() - tables = res.data_frame - assert tables is not None, "expected to have some tables in the db, but got None" - assert 'table_name' in tables, f"expected to get 'table_name' column in the response:\n{tables}" - return list(tables['table_name']) - - def test_connect(self, handler): - handler.connect() - assert handler.is_connected, "connection error" - - def test_check_connection(self, handler): - res = handler.check_connection() - assert res.success, res.error_message - - def test_native_query_show_dbs(self, handler): - dbs = handler.native_query("SHOW DATABASES;") - dbs = dbs.data_frame - assert dbs is not None, "expected to get some data, but got None" - assert 'Database' in dbs, f"expected to get 'Database' column in response:\n{dbs}" - dbs = list(dbs["Database"]) - expected_db = HANDLER_KWARGS["connection_data"]["database"] - assert expected_db in dbs, f"expected to have {expected_db} db in response: {dbs}" - - def test_get_tables(self, handler): - tables = self.get_table_names(handler) - assert "example_tbl" in tables, f"expected to have 'example_tbl' table in the db but got: {tables}" - - def test_describe_table(self, handler): - described = handler.get_columns("example_tbl") - describe_data = described.data_frame - self.check_valid_response(described) - got_columns = list(describe_data.iloc[:, 0]) - want_columns = ["user_id", "date", "city", "age", "sex", "last_visit_date", "cost", "max_dwell_time", "min_dwell_time"] - assert got_columns == want_columns, f"expected to have next columns in table:\n{want_columns}\nbut got:\n{got_columns}" - - def test_create_table(self, handler): - new_table = "test_mdb" - res = handler.native_query(f""" - CREATE TABLE IF NOT EXISTS {new_table} (test_col INT) - DISTRIBUTED BY HASH(test_col) BUCKETS 1 - PROPERTIES ( - "replication_allocation" = "tag.location.default: 1" - ); - """) - self.check_valid_response(res) - tables = self.get_table_names(handler) - assert new_table in tables, f"expected to have {new_table} in database, but got: {tables}" - - def test_drop_table(self, handler): - drop_table = "test_mdb" - res = handler.native_query(f"DROP TABLE IF EXISTS {drop_table}") - self.check_valid_response(res) - tables = self.get_table_names(handler) - assert drop_table not in tables - - def test_select_query(self, handler): - limit = 3 - query = f"SELECT * FROM example_tbl LIMIT {limit}" - res = handler.query(query) - self.check_valid_response(res) - got_rows = res.data_frame.shape[0] - want_rows = limit - assert got_rows == want_rows, f"expected to have {want_rows} rows in response but got: {got_rows}" - - def test_select_where_query(self, handler): - want_rows = 5 - query = "SELECT * FROM example_tbl WHERE sex = 0" - res = handler.query(query) - self.check_valid_response(res) - got_rows = res.data_frame.shape[0] - assert got_rows == want_rows, f"expected to have {want_rows} rows in response but got: {got_rows}" diff --git a/tests/unused/unit/handler_tests/test_aqicn_handler.py b/tests/unused/unit/handler_tests/test_aqicn_handler.py deleted file mode 100644 index 674e4f04799..00000000000 --- a/tests/unused/unit/handler_tests/test_aqicn_handler.py +++ /dev/null @@ -1,50 +0,0 @@ -import importlib -import os -import pytest -from mindsdb_sql_parser import parse_sql - -from ..unit.executor_test_base import BaseExecutorTest - -try: - importlib.import_module("requests") - REQUESTS_INSTALLED = True -except ImportError: - REQUESTS_INSTALLED = False - - -@pytest.mark.skipif(not REQUESTS_INSTALLED, reason="requests package is not installed") -class TestAQICNHandler(BaseExecutorTest): - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def setup_method(self): - super().setup_method() - self.api_key = os.environ.get("AQICN_KEY") - self.run_sql(f""" - CREATE DATABASE mindsdb_aqicn - WITH ENGINE = 'aqicn', - PARAMETERS = { - "api_key": '{self.api_key}' - }; - """) - - def test_basic_select_from(self): - sql = 'SELECT * FROM mindsdb_aqicn.air_quality_city where city="Bangalore";' - assert self.run_sql(sql).shape[0] == 1 - - sql = 'SELECT * FROM mindsdb_aqicn.air_quality_lat_lng where lat="12.938539" AND lng="77.5901";' - assert self.run_sql(sql).shape[0] == 1 - - sql = 'SELECT * FROM mindsdb_aqicn.air_quality_user_location;' - assert self.run_sql(sql).shape[0] == 1 - - def test_complex_select(self): - sql = 'SELECT data.city.url, data.city.name FROM mindsdb_aqicn.air_quality_lat_lng where lat="12.938539" AND lng="77.5901";' - assert self.run_sql(sql).shape[1] == 2 - - sql = 'SELECT * FROM mindsdb_aqicn.air_quality_user_location LIMIT 1;' - assert self.run_sql(sql).shape[0] == 1 diff --git a/tests/unused/unit/handler_tests/test_binance_handler.py b/tests/unused/unit/handler_tests/test_binance_handler.py deleted file mode 100644 index 456b9d7345b..00000000000 --- a/tests/unused/unit/handler_tests/test_binance_handler.py +++ /dev/null @@ -1,110 +0,0 @@ -from mindsdb.integrations.handlers.binance_handler.binance_tables import BinanceAggregatedTradesTable -from mindsdb.integrations.handlers.binance_handler.binance_handler import BinanceHandler -from mindsdb_sql_parser import ast -from mindsdb_sql_parser.ast.select.star import Star -from mindsdb_sql_parser.ast.select.identifier import Identifier - -from unittest.mock import Mock - -import pandas as pd -import unittest - - -class BinanceAggregatedTradesTableTest(unittest.TestCase): - def test_get_columns_returns_all_columns(self): - api_handler = Mock(BinanceHandler) - trades_table = BinanceAggregatedTradesTable(api_handler) - # Order matters. - expected_columns = [ - 'symbol', - 'open_time', - 'open_price', - 'high_price', - 'low_price', - 'close_price', - 'volume', - 'close_time', - 'quote_asset_volume', - 'number_of_trades', - 'taker_buy_base_asset_volume', - 'taker_buy_quote_asset_volume' - ] - self.assertListEqual(trades_table.get_columns(), expected_columns) - - def test_select_returns_all_columns(self): - api_handler = Mock(BinanceHandler) - api_handler.call_binance_api.return_value = pd.DataFrame([ - [ - 'symbol', # Symbol - 1499040000000, # Kline open time - '0.01634790', # Open price - '0.80000000', # High price - '0.01575800', # Low price - '0.01577100', # Close price - '148976.11427815', # Volume - 1499644799999, # Kline Close time - '2434.19055334', # Quote asset volume - 308, # Number of trades - '1756.87402397', # Taker buy base asset volume - '28.46694368', # Taker buy quote asset volume - ] - ]) - trades_table = BinanceAggregatedTradesTable(api_handler) - - select_all = ast.Select( - targets=[Star()], - from_table='aggregated_trade_data', - where='aggregated_trade_data.symbol = "symbol"' - ) - - all_trade_data = trades_table.select(select_all) - first_trade_data = all_trade_data.iloc[0] - - self.assertEqual(all_trade_data.shape[1], 12) - self.assertEqual(first_trade_data['symbol'], 'symbol') - self.assertEqual(first_trade_data['open_time'], 1499040000000) - self.assertEqual(first_trade_data['open_price'], '0.01634790') - self.assertEqual(first_trade_data['high_price'], '0.80000000') - self.assertEqual(first_trade_data['low_price'], '0.01575800') - self.assertEqual(first_trade_data['close_price'], '0.01577100') - self.assertEqual(first_trade_data['volume'], '148976.11427815') - self.assertEqual(first_trade_data['close_time'], 1499644799999) - self.assertEqual(first_trade_data['quote_asset_volume'], '2434.19055334') - self.assertEqual(first_trade_data['number_of_trades'], 308) - self.assertEqual(first_trade_data['taker_buy_base_asset_volume'], '1756.87402397') - self.assertEqual(first_trade_data['taker_buy_quote_asset_volume'], '28.46694368') - - def test_select_returns_only_selected_columns(self): - api_handler = Mock(BinanceHandler) - api_handler.call_binance_api.return_value = pd.DataFrame([ - [ - 'symbol', # Symbol - 1499040000000, # Kline open time - '0.01634790', # Open price - '0.80000000', # High price - '0.01575800', # Low price - '0.01577100', # Close price - '148976.11427815', # Volume - 1499644799999, # Kline Close time - '2434.19055334', # Quote asset volume - 308, # Number of trades - '1756.87402397', # Taker buy base asset volume - '28.46694368', # Taker buy quote asset volume - ] - ]) - trades_table = BinanceAggregatedTradesTable(api_handler) - - open_time_identifier = Identifier(path_str='open_time') - close_time_identifier = Identifier(path_str='close_time') - select_times = ast.Select( - targets=[open_time_identifier, close_time_identifier], - from_table='aggregated_trade_data', - where='aggregated_trade_data.symbol = "symbol"' - ) - - all_trade_data = trades_table.select(select_times) - first_trade_data = all_trade_data.iloc[0] - - self.assertEqual(all_trade_data.shape[1], 2) - self.assertEqual(first_trade_data['open_time'], 1499040000000) - self.assertEqual(first_trade_data['close_time'], 1499644799999) diff --git a/tests/unused/unit/handler_tests/test_coinbase_handler.py b/tests/unused/unit/handler_tests/test_coinbase_handler.py deleted file mode 100644 index 0a7b3067b82..00000000000 --- a/tests/unused/unit/handler_tests/test_coinbase_handler.py +++ /dev/null @@ -1,94 +0,0 @@ -from mindsdb.integrations.handlers.coinbase_handler.coinbase_tables import CoinBaseAggregatedTradesTable -from mindsdb.integrations.handlers.coinbase_handler.coinbase_handler import CoinBaseHandler -from mindsdb_sql_parser import ast -from mindsdb_sql_parser.ast.select.star import Star -from mindsdb_sql_parser.ast.select.identifier import Identifier - -from unittest.mock import Mock - -import pandas as pd -import unittest - - -class CoinBaseAggregatedTradesTableTest(unittest.TestCase): - def test_get_columns_returns_all_columns(self): - api_handler = Mock(CoinBaseHandler) - trades_table = CoinBaseAggregatedTradesTable(api_handler) - # Order matters. - expected_columns = [ - 'symbol', - 'low', - 'high', - 'open', - 'close', - 'volume', - 'timestamp', - 'timestamp_iso' - ] - self.assertListEqual(trades_table.get_columns(), expected_columns) - - def test_select_returns_all_columns(self): - api_handler = Mock(CoinBaseHandler) - api_handler.call_coinbase_api.return_value = pd.DataFrame([ - [ - 'BTC-USD', # symbol - 34330.01, # low - 34623.21, # high - 34493.51, # open - 34349.16, # close - 719.064133, # volume - 1698710400, # timestamp - "2023-10-30T20:00:00-04:00" # timestamp_iso - ] - ]) - trades_table = CoinBaseAggregatedTradesTable(api_handler) - - select_all = ast.Select( - targets=[Star()], - from_table='coinbase_candle_data', - where='coinbase_candle_data.symbol = "BTC-USD"' - ) - - all_trade_data = trades_table.select(select_all) - first_trade_data = all_trade_data.iloc[0] - - self.assertEqual(all_trade_data.shape[1], 8) - self.assertEqual(first_trade_data['symbol'], 'BTC-USD') - self.assertEqual(first_trade_data['low'], 34330.01) - self.assertEqual(first_trade_data['high'], 34623.21) - self.assertEqual(first_trade_data['open'], 34493.51) - self.assertEqual(first_trade_data['close'], 34349.16) - self.assertEqual(first_trade_data['volume'], 719.064133) - self.assertEqual(first_trade_data['timestamp'], 1698710400) - self.assertEqual(first_trade_data['timestamp_iso'], '2023-10-30T20:00:00-04:00') - - def test_select_returns_only_selected_columns(self): - api_handler = Mock(CoinBaseHandler) - api_handler.call_coinbase_api.return_value = pd.DataFrame([ - [ - 'BTC-USD', # symbol - 34330.01, # low - 34623.21, # high - 34493.51, # open - 34349.16, # close - 719.064133, # volume - 1698710400, # timestamp - "2023-10-30T20:00:00-04:00" # timestamp_iso - ] - ]) - trades_table = CoinBaseAggregatedTradesTable(api_handler) - - open_time_identifier = Identifier(path_str='open') - close_time_identifier = Identifier(path_str='close') - select_times = ast.Select( - targets=[open_time_identifier, close_time_identifier], - from_table='coinbase_candle_data', - where='coinbase_candle_data.symbol = "BTC-USD"' - ) - - all_trade_data = trades_table.select(select_times) - first_trade_data = all_trade_data.iloc[0] - - self.assertEqual(all_trade_data.shape[1], 2) - self.assertEqual(first_trade_data['open'], 34493.51) - self.assertEqual(first_trade_data['close'], 34349.16) diff --git a/tests/unused/unit/handler_tests/test_eventbrite_handler.py b/tests/unused/unit/handler_tests/test_eventbrite_handler.py deleted file mode 100644 index b07e3f09cdd..00000000000 --- a/tests/unused/unit/handler_tests/test_eventbrite_handler.py +++ /dev/null @@ -1,405 +0,0 @@ -from mindsdb.integrations.handlers.eventbrite_handler.eventbrite_handler import ( - EventbriteHandler, -) - -from mindsdb.integrations.handlers.eventbrite_handler.eventbrite_handler import ( - CategoryInfoTable, - EventDetailsTable, -) - -from mindsdb_sql_parser import ast -from mindsdb_sql_parser.ast.select.identifier import Identifier - -from unittest.mock import Mock - -import pandas as pd -import unittest - - -class CategoryInfoTableTest(unittest.TestCase): - def test_get_columns_returns_all_columns(self): - api_handler = Mock(EventbriteHandler) - trades_table = CategoryInfoTable(api_handler) - # Order matters. - expected_columns = [ - "resource_uri", - "id", - "name", - "name_localized", - "short_name", - "short_name_localized", - ] - self.assertListEqual(trades_table.get_columns(), expected_columns) - - def test_select_returns_some_columns(self): - api_handler = Mock() - api_handler.api.list_categories.return_value = pd.DataFrame( - { - "locale": "en_US", - "categories": [ - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/103/", - "id": "103", - "name": "Music", - "name_localized": "Music", - "short_name": "Music", - "short_name_localized": "Music", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/101/", - "id": "101", - "name": "Business & Professional", - "name_localized": "Business & Professional", - "short_name": "Business", - "short_name_localized": "Business", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/110/", - "id": "110", - "name": "Food & Drink", - "name_localized": "Food & Drink", - "short_name": "Food & Drink", - "short_name_localized": "Food & Drink", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/113/", - "id": "113", - "name": "Community & Culture", - "name_localized": "Community & Culture", - "short_name": "Community", - "short_name_localized": "Community", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/105/", - "id": "105", - "name": "Performing & Visual Arts", - "name_localized": "Performing & Visual Arts", - "short_name": "Arts", - "short_name_localized": "Arts", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/104/", - "id": "104", - "name": "Film, Media & Entertainment", - "name_localized": "Film, Media & Entertainment", - "short_name": "Film & Media", - "short_name_localized": "Film & Media", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/108/", - "id": "108", - "name": "Sports & Fitness", - "name_localized": "Sports & Fitness", - "short_name": "Sports & Fitness", - "short_name_localized": "Sports & Fitness", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/107/", - "id": "107", - "name": "Health & Wellness", - "name_localized": "Health & Wellness", - "short_name": "Health", - "short_name_localized": "Health", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/102/", - "id": "102", - "name": "Science & Technology", - "name_localized": "Science & Technology", - "short_name": "Science & Tech", - "short_name_localized": "Science & Tech", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/109/", - "id": "109", - "name": "Travel & Outdoor", - "name_localized": "Travel & Outdoor", - "short_name": "Travel & Outdoor", - "short_name_localized": "Travel & Outdoor", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/111/", - "id": "111", - "name": "Charity & Causes", - "name_localized": "Charity & Causes", - "short_name": "Charity & Causes", - "short_name_localized": "Charity & Causes", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/114/", - "id": "114", - "name": "Religion & Spirituality", - "name_localized": "Religion & Spirituality", - "short_name": "Spirituality", - "short_name_localized": "Spirituality", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/115/", - "id": "115", - "name": "Family & Education", - "name_localized": "Family & Education", - "short_name": "Family & Education", - "short_name_localized": "Family & Education", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/116/", - "id": "116", - "name": "Seasonal & Holiday", - "name_localized": "Seasonal & Holiday", - "short_name": "Holiday", - "short_name_localized": "Holiday", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/112/", - "id": "112", - "name": "Government & Politics", - "name_localized": "Government & Politics", - "short_name": "Government", - "short_name_localized": "Government", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/106/", - "id": "106", - "name": "Fashion & Beauty", - "name_localized": "Fashion & Beauty", - "short_name": "Fashion", - "short_name_localized": "Fashion", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/117/", - "id": "117", - "name": "Home & Lifestyle", - "name_localized": "Home & Lifestyle", - "short_name": "Home & Lifestyle", - "short_name_localized": "Home & Lifestyle", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/118/", - "id": "118", - "name": "Auto, Boat & Air", - "name_localized": "Auto, Boat & Air", - "short_name": "Auto, Boat & Air", - "short_name_localized": "Auto, Boat & Air", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/119/", - "id": "119", - "name": "Hobbies & Special Interest", - "name_localized": "Hobbies & Special Interest", - "short_name": "Hobbies", - "short_name_localized": "Hobbies", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/199/", - "id": "199", - "name": "Other", - "name_localized": "Other", - "short_name": "Other", - "short_name_localized": "Other", - }, - { - "resource_uri": "https://www.eventbriteapi.com/v3/categories/120/", - "id": "120", - "name": "School Activities", - "name_localized": "School Activities", - "short_name": "School Activities", - "short_name_localized": "School Activities", - }, - ], - } - ) - eventbrite_table = CategoryInfoTable(api_handler) - - resource_uri_identifier = Identifier(path_str="resource_uri") - id_identifier = Identifier(path_str="id") - name_identifier = Identifier(path_str="name") - name_localized_identifier = Identifier(path_str="name_localized") - - select_all = ast.Select( - targets=[ - resource_uri_identifier, - id_identifier, - name_identifier, - name_localized_identifier, - ], - from_table="categoryInfoTable", - ) - - all_category_data = eventbrite_table.select(select_all) - - self.assertEqual(all_category_data.shape[0], 20) - self.assertEqual(all_category_data.shape[1], 4) - - -class EventDetailsTableTest(unittest.TestCase): - def test_get_columns_returns_all_columns(self): - api_handler = Mock(EventbriteHandler) - trades_table = EventDetailsTable(api_handler) - # Order matters. - expected_columns = [ - "name_text", - "name_html", - "description_text", - "description_html", - "url", - "start_timezone", - "start_local", - "start_utc", - "end_timezone", - "end_local", - "end_utc", - "organization_id", - "created", - "changed", - "published", - "capacity", - "capacity_is_custom", - "status", - "currency", - "listed", - "shareable", - "online_event", - "tx_time_limit", - "hide_start_date", - "hide_end_date", - "locale", - "is_locked", - "privacy_setting", - "is_series", - "is_series_parent", - "inventory_type", - "is_reserved_seating", - "show_pick_a_seat", - "show_seatmap_thumbnail", - "show_colors_in_seatmap_thumbnail", - "source", - "is_free", - "version", - "summary", - "facebook_event_id", - "logo_id", - "organizer_id", - "venue_id", - "category_id", - "subcategory_id", - "format_id", - "id", - "resource_uri", - "is_externally_ticketed", - "logo_crop_mask", - "logo_original", - "logo_id", - "logo_url", - "logo_aspect_ratio", - "logo_edge_color", - "logo_edge_color_set", - ] - self.assertListEqual(trades_table.get_columns(), expected_columns) - - def test_select_returns_some_columns(self): - api_handler = Mock() - api_handler.api.get_event.return_value = pd.DataFrame( - { - "name": { - "text": "AI Forum: Can AI Fix Climate Change?", - "html": "AI Forum: Can AI Fix Climate Change?", - }, - "description": { - "text": "The third in a series of lunchtime presentations by King's researchers at Science Gallery London", - "html": "The third in a series of lunchtime presentations by King's researchers at Science Gallery London", - }, - "url": "https://www.eventbrite.co.uk/e/ai-forum-can-ai-fix-climate-change-tickets-717926867587", - "start": { - "timezone": "Europe/London", - "local": "2023-11-01T13:15:00", - "utc": "2023-11-01T13:15:00Z", - }, - "end": { - "timezone": "Europe/London", - "local": "2023-11-01T14:00:00", - "utc": "2023-11-01T14:00:00Z", - }, - "organization_id": "112948679745", - "created": "2023-09-12T15:38:12Z", - "changed": "2023-10-11T20:01:50Z", - "published": "2023-09-12T15:44:26Z", - "capacity": None, - "capacity_is_custom": None, - "status": "live", - "currency": "GBP", - "listed": True, - "shareable": True, - "online_event": False, - "tx_time_limit": 1200, - "hide_start_date": False, - "hide_end_date": False, - "locale": "en_GB", - "is_locked": False, - "privacy_setting": "unlocked", - "is_series": False, - "is_series_parent": False, - "inventory_type": "limited", - "is_reserved_seating": False, - "show_pick_a_seat": False, - "show_seatmap_thumbnail": False, - "show_colors_in_seatmap_thumbnail": False, - "source": "coyote", - "is_free": True, - "version": None, - "summary": "The third in a series of lunchtime presentations by King's researchers at Science Gallery London", - "facebook_event_id": None, - "logo_id": "596060689", - "organizer_id": "7201076133", - "venue_id": "173614819", - "category_id": "102", - "subcategory_id": None, - "format_id": "2", - "id": "717926867587", - "resource_uri": "https://www.eventbriteapi.com/v3/events/717926867587/", - "is_externally_ticketed": False, - "logo": { - "crop_mask": { - "top_left": {"x": 0, "y": 0}, - "width": 2160, - "height": 1080, - }, - "original": { - "url": "https://img.evbuc.com/https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F596060689%2F112948679745%2F1%2Foriginal.20230912-154003?auto=format%2Ccompress&q=75&sharp=10&s=0bb5e81d009275e956f98c418a9a3025", - "width": 2160, - "height": 1080, - }, - "id": "596060689", - "url": "https://img.evbuc.com/https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F596060689%2F112948679745%2F1%2Foriginal.20230912-154003?h=200&w=450&auto=format%2Ccompress&q=75&sharp=10&rect=0%2C0%2C2160%2C1080&s=282ef80475d468edff954cb243435432", - "aspect_ratio": "2", - "edge_color": "#d6b4be", - "edge_color_set": True, - }, - } - ) - eventbrite_table = EventDetailsTable(api_handler) - - name_text_identifier = Identifier(path_str="name_text") - description_text_identifier = Identifier(path_str="description_text") - url_identifier = Identifier(path_str="url") - - select_all = ast.Select( - targets=[name_text_identifier, description_text_identifier, url_identifier], - from_table="eventDetailsTable", - where='locationId = "717926867587"', - ) - - review_data = eventbrite_table.select(select_all) - first_data = review_data.iloc[0] - - self.assertEqual(review_data.shape[1], 3) - self.assertEqual( - first_data["name_text"], "AI Forum: Can AI Fix Climate Change?" - ) - self.assertEqual( - first_data["description_text"], - "The third in a series of lunchtime presentations by King's researchers at Science Gallery London", - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unused/unit/handler_tests/test_gmail_handler.py b/tests/unused/unit/handler_tests/test_gmail_handler.py deleted file mode 100644 index 7e8fe86758d..00000000000 --- a/tests/unused/unit/handler_tests/test_gmail_handler.py +++ /dev/null @@ -1,240 +0,0 @@ -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.integrations.handlers.gmail_handler.gmail_handler import GmailHandler -from mindsdb.integrations.handlers.gmail_handler.gmail_handler import EmailsTable -from google.oauth2.credentials import Credentials -from mindsdb_sql_parser import parse_sql -import unittest -from unittest.mock import Mock, patch -from unittest import mock - - -class GmailHandlerTest(unittest.TestCase): - def setUp(self) -> None: - self.credentials_file = 'test1_credentials.json' - self.credentials_url = 's3://your-bucket/test_credentials.json' - self.handler = GmailHandler(connection_data={ - 'credentials_file': self.credentials_file, - 'credentials_url': self.credentials_url - }) - - @patch('mindsdb.integrations.handlers.gmail_handler.gmail_handler.requests.get') # Patching the requests.get method - def test_has_creds_file_with_valid_s3_link(self, mock_get): - # Configure the mock behavior - mock_response = mock_get.return_value - mock_response.status_code = 200 - mock_response.text = 'Mocked credentials file content' - - result = self.handler._has_creds_file(self.credentials_file) - # Assert that the requests.get method was called with the correct URL - mock_get.assert_called_once_with(self.credentials_url) - # Assert that the method returns True - self.assertTrue(result) - - @patch('mindsdb.integrations.handlers.gmail_handler.gmail_handler.requests.get') # Patching the requests.get method - def test_has_creds_file_with_invalid_s3_link(self, mock_get): - # Test when invalid S3 credentials file is provided - mock_response = mock.Mock() - mock_response.status_code = 404 - mock_get.return_value = mock_response - - # TODO this will be broken now that we don't use global loggers anymore - with patch('mindsdb.utilities.log.logger') as mock_logger: - result = self.handler._has_creds_file(self.credentials_file) - # Assert that the requests.get method was called with the correct URL - self.assertFalse(result) - # Assert that the error message is logged - mock_logger.error.assert_called_once_with("Failed to get credentials from S3", 404) - - def test_create_connection_with_mocked_token(self): - with mock.patch('google.oauth2.credentials.Credentials.from_authorized_user_file') as mock_credentials: - mock_credentials.return_value = Credentials('token.json') - with mock.patch('os.path.isfile') as mock_isfile: - mock_isfile.return_value = True - result = self.handler.create_connection() - self.assertIsNotNone(result) - - def test_create_connection_with_mocked_credentials_file(self): - with mock.patch('google.oauth2.credentials.Credentials.from_authorized_user_file') as mock_credentials: - mock_credentials.is_valid.return_value = False - with mock.patch('os.path.isfile') as mock_isfile: - mock_isfile.return_value = True - result = self.handler.create_connection() - self.assertIsNotNone(result) - - def test_create_connection_with_mocked_credentials_file_and_s3(self): - with mock.patch('google.oauth2.credentials.Credentials.from_authorized_user_file') as mock_credentials: - mock_credentials.is_valid.return_value = False - with mock.patch('os.path.isfile') as mock_isfile: - mock_isfile.return_value = True - with mock.patch( - 'mindsdb.integrations.handlers.gmail_handler.gmail_handler.GmailHandler._has_creds_file') \ - as mock_has_creds_file: - mock_has_creds_file.return_value = True - result = self.handler.create_connection() - self.assertIsNotNone(result) - - def test_parse_parts_with_multipart_mime_type(self): - email_parts = [ - { - 'mimeType': 'multipart/mixed', - 'parts': [ - { - 'mimeType': 'multipart/alternative', - 'parts': [ - { - 'mimeType': 'text/plain', - 'body': { - 'data': 'VGhpcyBpcyB0aGUgcGxhaW4gdGV4dCBib2R5IG9mIHRoZSBlbWFpbC4=' - } - }, - { - 'mimeType': 'text/html', - 'body': { - 'data': 'PGh0bWw+CiAgICA8Ym9keT4KICAgICAgPHA+V' - 'GhpcyBpcyB0aGUgSFRNTCBib2R5IG9mIHRoZSBlbWFpbC4' - 'gPC9wPgogICAgPC9ib2R5PjwvaHRtbD4=' - } - } - ] - }, - { - 'mimeType': 'application/pdf', - 'filename': 'example.pdf', - 'body': { - 'attachmentId': '<>' - } - } - ] - } - - ] - attachments = [] - email_body = self.handler._parse_parts(email_parts, attachments) - expected_body = "This is the plain text body of the email." - expected_attachments = [ - { - 'filename': 'example.pdf', - 'mimeType': 'application/pdf', - 'attachmentId': '<>' - } - ] - self.assertEqual(email_body, expected_body) - self.assertEqual(attachments, expected_attachments) - - def test_parse_parts_with_multipart_mime_type_and_no_parts(self): - email_parts = [ - { - 'mimeType': 'multipart/mixed', - 'parts': [] - } - ] - attachments = [] - email_body = self.handler._parse_parts(email_parts, attachments) - expected_body = "" - expected_attachments = [] - self.assertEqual(email_body, expected_body) - self.assertEqual(attachments, expected_attachments) - - def test_parse_parts_with_multiple_attachments(self): - email_parts = [ - { - 'mimeType': 'multipart/mixed', - 'parts': [ - { - 'mimeType': 'multipart/alternative', - 'parts': [ - { - 'mimeType': 'text/plain', - 'body': { - 'data': 'VGhpcyBpcyB0aGUgcGxhaW4gdGV4dCBib2R5IG9mIHRoZSBlbWFpbC4=' - } - }, - { - 'mimeType': 'text/html', - 'body': { - 'data': 'PGh0bWw+CiAgICA8Ym9keT4KICAgICAgPHA+' - 'VGhpcyBpcyB0aGUgSFRNTCBib2R5IG9mIHRoZSBlbWFpb' - 'C4gPC9wPgogICAgPC9ib2R5PjwvaHRtbD4=' - } - } - ] - }, - { - 'mimeType': 'application/pdf', - 'filename': 'example.pdf', - 'body': { - 'attachmentId': '<>' - } - }, - { - 'mimeType': 'application/pdf', - 'filename': 'example2.pdf', - 'body': { - 'attachmentId': '<>' - } - } - ] - } - ] - attachments = [] - email_body = self.handler._parse_parts(email_parts, attachments) - expected_body = "This is the plain text body of the email." - expected_attachments = [ - { - 'filename': 'example.pdf', - 'mimeType': 'application/pdf', - 'attachmentId': '<>' - }, - { - 'filename': 'example2.pdf', - 'mimeType': 'application/pdf', - 'attachmentId': '<>' - } - ] - self.assertEqual(email_body, expected_body) - self.assertEqual(attachments, expected_attachments) - - -class EmailsTableTest(unittest.TestCase): - - def test_get_tables(self): - handler = Mock(GmailHandler) - tables = handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_get_columns_returns_all_columns(self): - gmail_handler = Mock(GmailHandler) - gmail_table = EmailsTable(gmail_handler) - expected_columns = [ - 'id', - 'message_id', - 'thread_id', - 'label_ids', - 'sender', - 'to', - 'date', - 'subject', - 'snippet', - 'history_id', - 'size_estimate', - 'body', - 'attachments' - - ] - self.assertListEqual(gmail_table.get_columns(), expected_columns) - - def test_delete_method(self): - gmail_handler = Mock(GmailHandler) - gmail_table = EmailsTable(gmail_handler) - query = parse_sql('delete from gmail where id=1') - gmail_table.delete(query) - gmail_handler.call_gmail_api.assert_called_once_with('delete_message', {'id': 1}) - - def test_update_method(self): - gmail_handler = Mock(GmailHandler) - gmail_table = EmailsTable(gmail_handler) - query = parse_sql('update gmail set addLabel="test1",removeLabel = "test" where id=1') - gmail_table.update(query) - gmail_handler.call_gmail_api.assert_called_once_with('modify_message', {'id': 1, - 'body': {'addLabelIds': ['test1'], - 'removeLabelIds': ['test']}}) diff --git a/tests/unused/unit/handler_tests/test_instatus_handler.py b/tests/unused/unit/handler_tests/test_instatus_handler.py deleted file mode 100644 index f42932f281d..00000000000 --- a/tests/unused/unit/handler_tests/test_instatus_handler.py +++ /dev/null @@ -1,144 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.instatus_handler.instatus_handler import InstatusHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -import pandas as pd -import os - - -class InstatusHandlerTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.handler = InstatusHandler(name='mindsdb_instatus', connection_data={'api_key': os.environ.get('INSTATUS_API_KEY')}) - - def setUp(self): - self.pageId = self.handler.call_instatus_api(endpoint='/v2/pages')['id'][0] - self.componentId = self.handler.call_instatus_api(endpoint=f'/v1/{self.pageId}/components')['id'][0] - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_call_instatus_api(self): - self.assertIsInstance(self.handler.call_instatus_api(endpoint='/v2/pages'), pd.DataFrame) - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_3_get_columns(self): - status_pages_columns = self.handler.get_columns(table_name='status_pages') - components_columns = self.handler.get_columns(table_name='components') - assert type(status_pages_columns) is not RESPONSE_TYPE.ERROR - assert type(components_columns) is not RESPONSE_TYPE.ERROR - - def test_4_select_status_pages(self): - query = '''SELECT * - FROM mindsdb_instatus.status_pages''' - self.assertTrue(self.handler.native_query(query)) - - def test_5_select_status_pages_by_conditions(self): - query = '''SELECT name, status, subdomain - FROM mindsdb_instatus.status_pages - WHERE id = "clo3xshsk1114842hkn377y3lrap"''' - self.assertTrue(self.handler.native_query(query)) - - def test_6_insert_status_pages(self): - query = f'''INSERT INTO mindsdb_instatus.status_pages (email, name, subdomain, components, logoUrl) VALUES ('{os.environ.get('EMAIL_ID')}', 'mindsdb', 'somtirtha-roy', '["Website", "App", "API"]', 'https://instatus.com/sample.png')''' - try: - self.assertTrue(self.handler.native_query(query)) - except Exception as e: - error_message = str(e) - if "This subdomain is taken by another status page" in error_message: - print("Subdomain is already taken. Choose a different one.") - - def test_7_update_status_pages(self): - # update the row with the id obtained - query = f'''UPDATE mindsdb_instatus.status_pages - SET logoUrl = 'https://instatus.com/sample.png', - faviconUrl = 'https://instatus.com/favicon-32x32.png', - websiteUrl = 'https://instatus.com', - language = 'en', - useLargeHeader = true, - brandColor = '#111', - okColor = '#33B17E', - disruptedColor = '#FF8C03', - degradedColor = '#ECC94B', - downColor = '#DC123D', - noticeColor = '#70808F', - unknownColor = '#DFE0E1', - googleAnalytics = 'UA-00000000-1', - subscribeBySms = true, - smsService = 'twilio', - twilioSid = 'YOUR_TWILIO_SID', - twilioToken = 'YOUR_TWILIO_TOKEN', - twilioSender = 'YOUR_TWILIO_SENDER', - nexmoKey = null, - nexmoSecret = null, - nexmoSender = null, - htmlInMeta = null, - htmlAboveHeader = null, - htmlBelowHeader = null, - htmlAboveFooter = null, - htmlBelowFooter = null, - htmlBelowSummary = null, - cssGlobal = null, - launchDate = null, - dateFormat = 'MMMMMM d, yyyy', - dateFormatShort = 'MMM yyyy', - timeFormat = 'p', - private = false, - useAllowList = false, - translations = '{{ - "name": {{ - "fr": "nasa" - }} - }}' - WHERE id = "{self.pageId}"''' - self.assertTrue(self.handler.native_query(query)) - - def test_8_select_components(self): - query = f'''SELECT * - FROM mindsdb_instatus.components - WHERE page_id = '{self.pageId}';''' - self.assertTrue(self.handler.native_query(query)) - - def test_9_select_components_by_conditions(self): - query = f'''SELECT * - FROM mindsdb_instatus.components - WHERE page_id = '{self.pageId}' - AND component_id = '{self.componentId}';''' - self.assertTrue(self.handler.native_query(query)) - - def test_10_insert_components(self): - query = f'''INSERT INTO mindsdb_instatus.components (page_id, name, description, status, order, showUptime, grouped, translations_name_in_fr, translations_desc_in_fr) - VALUES ( - '{self.pageId}', - 'Test component', - 'Testing', - 'OPERATIONAL', - 6, - true, - false, - "Composant de test", - "En test" - );''' - self.assertTrue(self.handler.native_query(query)) - - def test_11_update_components(self): - query = f'''UPDATE mindsdb_instatus.components - SET - name = 'Test component 4', - description = 'Test test test', - status = 'OPERATIONAL', - order = 6, - showUptime = true, - grouped = false, - translations_name_in_fr = "Composant de test 4", - translations_desc_in_fr = "Test test test" - WHERE page_id = '{self.pageId}' - AND component_id = '{self.componentId}';''' - self.assertTrue(self.handler.native_query(query)) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/unused/unit/handler_tests/test_intercom_handler.py b/tests/unused/unit/handler_tests/test_intercom_handler.py deleted file mode 100644 index cee50735d13..00000000000 --- a/tests/unused/unit/handler_tests/test_intercom_handler.py +++ /dev/null @@ -1,62 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.intercom_handler.intercom_handler import IntercomHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -import pandas as pd -import os - - -class InstatusHandlerTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.handler = IntercomHandler(name="mindsdb_intercon", connection_data={'access_token': os.environ.get('INTERCOM_ACCESS_TOKEN')}) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_connect(self): - assert self.handler.connect() - - def test_2_call_instatus_api(self): - self.assertIsInstance(self.handler.call_intercom_api(endpoint='/articles'), pd.DataFrame) - - def test_3_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_4_get_columns(self): - columns = self.handler.get_columns(table_name='articles') - assert type(columns) is not RESPONSE_TYPE.ERROR - - def test_5_select_articles(self): - query = "SELECT * FROM articles" - self.assertTrue(self.handler.native_query(query=query)) - - def test_6_select_articles_by_condition(self): - query = "SELECT * FROM articles WHERE id = '8553922'" - self.assertTrue(self.handler.native_query(query=query)) - - def test_7_insert_article(self): - query = '''INSERT INTO myintercom.articles (title, description, body, author_id, state, parent_id, parent_type) - VALUES ('Thanks for everything', - 'Description of the Article', - 'Body of the Article', - 6840572, - 'published', - 6801839, - 'collection' - );''' - self.assertTrue(self.handler.native_query(query=query)) - - def test_8_update_article(self): - df = pd.DataFrame(self.handler.call_intercom_api(endpoint='/articles', params={'page': 1, 'per_page': 1})['data'][0]) - _id = df['id'][0] - query = f'''UPDATE myintercom.articles - SET title = 'Christmas is here!', - body = '

New gifts in store for the jolly season

' - WHERE id = {_id};''' - self.assertTrue(self.handler.native_query(query=query)) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/unused/unit/handler_tests/test_lightdash_handler.py b/tests/unused/unit/handler_tests/test_lightdash_handler.py deleted file mode 100644 index 84cba655bd4..00000000000 --- a/tests/unused/unit/handler_tests/test_lightdash_handler.py +++ /dev/null @@ -1,146 +0,0 @@ -import importlib -import os - -import pytest -from mindsdb_sql_parser import parse_sql - -from ..unit.executor_test_base import BaseExecutorTest - -try: - importlib.import_module("requests") - REQUESTS_INSTALLED = True -except ImportError: - REQUESTS_INSTALLED = False - - -@pytest.mark.skipif(not REQUESTS_INSTALLED, reason="requests package is not installed") -class TestLightdashHandler(BaseExecutorTest): - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def setup_method(self): - super().setup_method() - self.api_key = os.environ.get("LIGHTDASH_API_KEY") - self.base_url = os.environ.get("LIGHTDASH_BASE_URL") - self.project_uuid = os.environ.get("LIGHTDASH_PROJECT_UUID") - self.space_uuid = os.environ.get("LIGHTDASH_SPACE_UUID") - self.chart_uuid = os.environ.get("LIGHTDASH_CHART_UUID") - self.chart_version_uuid = os.environ.get("LIGHTDASH_CHART_VERSION_UUID") - self.scheduler_uuid = os.environ.get("LIGHTDASH_SCHEDULER_UUID") - self.job_id = os.environ.get("LIGHTDASH_JOB_ID") - self.run_sql(f""" - CREATE DATABASE lightdash_datasource - WITH ENGINE = "lightdash", - PARAMETERS = {{ - "api_key": '{self.api_key}', - "base_url": '{self.base_url}' - }}; - """) - - def test_basic_select_from(self): - sql = """ - SELECT * FROM lightdash_datasource.user; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM lightdash_datasource.user_ability; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM lightdash_datasource.org; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM lightdash_datasource.org_projects; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM lightdash_datasource.org_members; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.project_table WHERE project_uuid='{self.project_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.warehouse_connection WHERE project_uuid='{self.project_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.dbt_connection WHERE project_uuid='{self.project_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.dbt_env_vars WHERE project_uuid='{self.project_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.charts WHERE project_uuid='{self.project_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.spaces WHERE project_uuid='{self.project_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.access WHERE project_uuid='{self.project_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.validation WHERE project_uuid='{self.project_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.dashboards WHERE project_uuid='{self.project_uuid}' AND space_uuid='{self.space_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.queries WHERE project_uuid='{self.project_uuid}' AND space_uuid='{self.space_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.chart_history WHERE chart_uuid='{self.chart_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.chart_config WHERE chart_uuid='{self.chart_uuid}' AND version_uuid='{self.chart_version_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.chart_additional_metrics WHERE chart_uuid='{self.chart_uuid}' AND version_uuid='{self.chart_version_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.chart_table_calculations WHERE chart_uuid='{self.chart_uuid}' AND version_uuid='{self.chart_version_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.scheduler_logs WHERE project_uuid='{self.project_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.scheduler WHERE scheduler_uuid='{self.scheduler_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.scheduler_jobs WHERE scheduler_uuid='{self.scheduler_uuid}'; - """ - self.run_sql(sql) - sql = f""" - SELECT * FROM lightdash_datasource.scheduler_job_status WHERE job_id='{self.job_id}'; - """ - self.run_sql(sql) - - def test_complex_select(self): - sql = """ - SELECT firstName, lastName FROM lightdash_datasource.user; - """ - assert self.run_sql(sql).shape[1] == 2 - sql = f""" - SELECT name FROM lightdash_datasource.project_table WHERE project_uuid='{self.project_uuid}'; - """ - assert self.run_sql(sql).shape[1] == 1 diff --git a/tests/unused/unit/handler_tests/test_luma_handler.py b/tests/unused/unit/handler_tests/test_luma_handler.py deleted file mode 100644 index 9bf31289dc9..00000000000 --- a/tests/unused/unit/handler_tests/test_luma_handler.py +++ /dev/null @@ -1,40 +0,0 @@ -import importlib -import os -import pytest -from mindsdb_sql_parser import parse_sql - -from ..unit.executor_test_base import BaseExecutorTest - -try: - importlib.import_module("requests") - REQUESTS_INSTALLED = True -except ImportError: - REQUESTS_INSTALLED = False - - -@pytest.mark.skipif(not REQUESTS_INSTALLED, reason="requests package is not installed") -class TestLumaHandler(BaseExecutorTest): - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def setup_method(self): - super().setup_method() - self.api_key = os.environ.get("LUMA_API_KEY") - self.run_sql(f""" - CREATE DATABASE mindsdb_luma - WITH ENGINE = 'luma', - PARAMETERS = { - "api_key": '{self.api_key}' - }; - """) - - def test_basic_select_from(self): - sql = "SELECT * FROM mindsdb_luma.events;" - self.run_sql(sql) - - sql = 'SELECT * FROM mindsdb_luma.events where event_id = "evt-HQ36IFDwncocuGy";' - assert self.run_sql(sql).shape[0] == 1 diff --git a/tests/unused/unit/handler_tests/test_milvus_handler.py b/tests/unused/unit/handler_tests/test_milvus_handler.py deleted file mode 100644 index 0e2170ba57c..00000000000 --- a/tests/unused/unit/handler_tests/test_milvus_handler.py +++ /dev/null @@ -1,460 +0,0 @@ -import importlib -from unittest.mock import patch - -import pandas as pd -import pytest -import time -from mindsdb_sql_parser import parse_sql - -from ..unit.executor_test_base import BaseExecutorTest - -try: - pymilvus = importlib.import_module("pymilvus") - MILVUS_INSTALLED = True -except ImportError: - MILVUS_INSTALLED = False - - -@pytest.mark.skipif(not MILVUS_INSTALLED, reason="pymilvus is not installed") -class TestMilvusHandler(BaseExecutorTest): - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def setup_method(self): - super().setup_method() - # create a milvus database - self.run_sql(""" - CREATE DATABASE milvus_test - WITH - ENGINE = 'milvus', - PARAMETERS = { - "uri": "./milvus.db", - "create_embedding_dim": 3 - }; - """) - self.run_sql(""" - CREATE DATABASE milvus_test_auto_id - WITH - ENGINE = 'milvus', - PARAMETERS = { - "uri": "./milvus.db", - "create_embedding_dim": 3, - "create_auto_id": true - }; - """) - - def drop_table(self, table_name): - pymilvus.connections.connect( - uri="./milvus.db", - ) - pymilvus.utility.drop_collection(table_name) - - @pytest.mark.xfail(reason="create table for vectordatabase is not well supported") - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_create_table(self, postgres_handler_mock): - # create an empty table - sql = """ - CREATE TABLE milvus_test.testable; - """ - self.run_sql(sql) - # create a table with the schema definition is not allowed - sql = """ - CREATE TABLE milvus_test.testable ( - id int, - metadata text, - embedding float[] - ); - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_create_with_select(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - - self.set_handler(postgres_handler_mock, "pg", tables={"df": df}) - - self.drop_table("testable") - - sql = """ - CREATE TABLE milvus_test.testable ( - SELECT * FROM pg.df - ) - """ - # this should work - self.run_sql(sql) - - sql = """ - CREATE TABLE milvus_test.testable ( - SELECT * FROM pg.df - ) - """ - # this should work - self.run_sql(sql) - - self.drop_table("testable") - - @pytest.mark.xfail(reason="drop table for vectordatabase is not working") - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_drop_table(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"testable": df}) - - self.drop_table("testable") - - # create a table - sql = """ - CREATE TABLE milvus_test.testable ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - # drop a table - sql = """ - DROP TABLE milvus_test.testable; - """ - self.run_sql(sql) - - # drop a non existent table will raise an error - sql = """ - DROP TABLE milvus_test.test_table2; - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_insert_into(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2", "id3"], - "content": ["this is a test", "this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}, {"test": "test3"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - df2 = pd.DataFrame( - { - "content": ["this is a test", "this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}, {"test": "test3"}], - "embeddings": [ - [1.0, 2.0, 4.0], - [1.0, 2.0, 5.0], - [1.0, 2.0, 3.0], - ], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"df": df, "df2": df2}) - - self.drop_table("testable") - self.drop_table("testableauto") - - # create tables - sql = """ - CREATE TABLE milvus_test.testable ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - sql = """ - CREATE TABLE milvus_test_auto_id.testableauto ( - SELECT * FROM pg.df2 - ) - """ - self.run_sql(sql) - - # insert into a table with values - sql = """ - INSERT INTO milvus_test.testable ( - id,content,metadata,embeddings - ) - VALUES ( - "id4", 'this is a test', '{"test": "test"}', '[1.0, 8.0, 9.0]' - ) - """ - self.run_sql(sql) - - time.sleep(1) # wait for milvus to load the data asynchronously - # check if the data is inserted - sql = """ - SELECT * FROM milvus_test.testable - WHERE id = "id4" - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # insert without specifying id should work in autoid one - sql = """ - INSERT INTO milvus_test_auto_id.testableauto ( - content,metadata,embeddings - ) - VALUES ( - 'this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0]' - ) - """ - self.run_sql(sql) - - time.sleep(1) # wait for milvus to load the data asynchronously - # check if the data is inserted - sql = """ - SELECT * FROM milvus_test_auto_id.testableauto - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 4 - - # insert into a table with a select statement - sql = """ - INSERT INTO milvus_test_auto_id.testableauto (content,metadata,embeddings) - SELECT * FROM pg.df2 - """ - self.run_sql(sql) - - time.sleep(1) # wait for milvus to load the data asynchronously - # check if the data is inserted - sql = """ - SELECT * FROM milvus_test.testableauto - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 7 - - self.drop_table("testable") - self.drop_table("testableauto") - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_select_from(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"testable": df}) - - self.drop_table("testable") - - # create a table - sql = """ - CREATE TABLE milvus_test.testable ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - time.sleep(1) # wait for milvus to load the data asynchronously - # query a table without any filters - sql = """ - SELECT * FROM milvus_test.testable - """ - self.run_sql(sql) - - # query a table with id - sql = """ - SELECT * FROM milvus_test.testable - WHERE id = 'id1' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # query a table with a search vector, without limit - sql = """ - SELECT * FROM milvus_test.testable - WHERE search_vector = '[1.0, 2.0, 3.0]' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # query a table with a search vector, with limit - sql = """ - SELECT * FROM milvus_test.testable - WHERE search_vector = '[1.0, 2.0, 3.0]' - LIMIT 1 - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # query a table with a metadata filter - sql = """ - SELECT * FROM milvus_test.testable - WHERE test = 'test' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # query a table with a metadata filter and a search vector - sql = """ - SELECT * FROM milvus_test.testable - WHERE test = 'test' - AND search_vector = '[1.0, 2.0, 3.0]' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - self.drop_table("testable") - - @pytest.mark.xfail(reason="update for vectordatabase is not implemented") - def test_update(self): - # update a table with a metadata filter - sql = """ - UPDATE milvus_test.testable - SET test = 'test2' - WHERE test = 'test' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM milvus_test.testable - WHERE test = 'test2' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # update the embeddings - sql = """ - UPDATE milvus_test.testable - SET embedding = [3.0, 2.0, 1.0] - WHERE test = 'test2' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM milvus_test.testable - WHERE test = 'test2' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - assert ret.embedding[0] == [3.0, 2.0, 1.0] - - # update multiple columns - sql = """ - UPDATE milvus_test.testable - SET test = 'test3', - embedding = [1.0, 2.0, 3.0] - content = 'this is a test' - WHERE test = 'test2' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM milvus_test.testable - WHERE test = 'test3' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - assert ret.embedding[0] == [1.0, 2.0, 3.0] - assert ret.content[0] == "this is a test" - - # update a table with a search vector filter is not allowed - sql = """ - UPDATE milvus_test.testable - SET `metadata.test = 'test2' - WHERE search_vector = [1.0, 2.0, 3.0] - """ - with pytest.raises(Exception): - self.run_sql(sql) - - # update a table without any filters is allowed - sql = """ - UPDATE milvus_test.testable - SET metadata.test = 'test3' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM milvus_test.testable - WHERE `metadata.test` = 'test3' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # update a table with a search vector filter and a metadata filter is not allowed - sql = """ - UPDATE milvus_test.testable - SET metadata.test = 'test3' - WHERE metadata.test = 'test2' - AND search_vector = [1.0, 2.0, 3.0] - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_delete(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2", "id3"], - "content": ["this is a test", "this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}, {"test": "test3"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - - self.drop_table("testable") - - self.set_handler(postgres_handler_mock, "pg", tables={"testable": df}) - - # create a table - sql = """ - CREATE TABLE milvus_test.testable ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - time.sleep(1) # wait for milvus to load the data asynchronously - # delete by id - sql = """ - DELETE FROM milvus_test.testable - WHERE id IN ('id1') - """ - self.run_sql(sql) - - time.sleep(1) # wait for milvus to load the data asynchronously - # check if the data is deleted - sql = """ - SELECT * FROM milvus_test.testable - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # delete by multiple ids - sql = """ - DELETE FROM milvus_test.testable - WHERE id IN ('id2', 'id3') - """ - self.run_sql(sql) - - time.sleep(1) # wait for milvus to load the data asynchronously - # check if the data is deleted - sql = """ - SELECT * FROM milvus_test.testable - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # delete from a table without any filters is not allowed - sql = """ - DELETE FROM milvus_test.testable - """ - with pytest.raises(Exception): - self.run_sql(sql) - - self.drop_table("testable") diff --git a/tests/unused/unit/handler_tests/test_ms_teams_handler.py b/tests/unused/unit/handler_tests/test_ms_teams_handler.py deleted file mode 100644 index f12e6e3de9d..00000000000 --- a/tests/unused/unit/handler_tests/test_ms_teams_handler.py +++ /dev/null @@ -1,1050 +0,0 @@ -import unittest -from unittest.mock import Mock, patch - -from mindsdb_sql_parser import ast -from mindsdb_sql_parser.ast import Constant, BinaryOperation - -from mindsdb_sql_parser.ast.select.star import Star -from mindsdb_sql_parser.ast.select.identifier import Identifier - -from mindsdb.integrations.handlers.ms_teams_handler.ms_teams_handler import MSTeamsHandler -from mindsdb.integrations.handlers.ms_teams_handler.settings import ms_teams_handler_config -from mindsdb.integrations.handlers.ms_teams_handler.ms_graph_api_teams_client import MSGraphAPITeamsClient -from mindsdb.integrations.handlers.ms_teams_handler.ms_teams_tables import ChatsTable, ChatMessagesTable, ChannelsTable, ChannelMessagesTable - - -class TestMSGraphAPITeamsClient(unittest.TestCase): - @classmethod - def setUpClass(cls): - """ - Set up the tests. - """ - - # mock the api client with a dummy access_token parameter (calls to the API that use this parameter will be mocked) - cls.api_client = MSGraphAPITeamsClient("test_access_token") - - @patch('requests.get') - def test_get_chat_returns_chat_data(self, mock_get): - """ - Test that get_chat returns chat data. - """ - - # configure the mock to return a response with 'status_code' 200 - mock_get.return_value = Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value=ms_teams_handler_config.TEST_CHAT_DATA) - ) - - chat_data = self.api_client.get_chat("test_id") - - # assert the requests.get call was made with the expected arguments - mock_get.assert_called_once_with( - 'https://graph.microsoft.com/v1.0/chats/test_id/', - headers={'Authorization': 'Bearer test_access_token'}, - params={'$expand': 'lastMessagePreview'} - ) - - self.assertEqual(chat_data["id"], "test_id") - self.assertEqual(chat_data["chatType"], "oneOnOne") - - @patch('requests.get') - def test_get_chats_returns_chats_data(self, mock_get): - """ - Test that get_chats returns chats data. - """ - - # configure the mock to return a response with 'status_code' 200 - mock_get.return_value = Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value={'value': ms_teams_handler_config.TEST_CHATS_DATA}) - ) - - chats_data = self.api_client.get_chats() - - # assert the requests.get call was made with the expected arguments - mock_get.assert_called_once_with( - 'https://graph.microsoft.com/v1.0/chats/', - headers={'Authorization': 'Bearer test_access_token'}, - params={'$expand': 'lastMessagePreview', '$top': 20} - ) - - self.assertEqual(chats_data[0]["id"], "test_id") - self.assertEqual(chats_data[0]["chatType"], "oneOnOne") - - @patch('requests.get') - def test_get_chat_message_returns_chat_message_data(self, mock_get): - """ - Test that get_chat_message returns chat message data. - """ - - # configure the mock to return a response with 'status_code' 200 - mock_get.return_value = Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value=ms_teams_handler_config.TEST_CHAT_MESSAGE_DATA) - ) - - chat_message_data = self.api_client.get_chat_message("test_chat_id", "test_id") - - # assert the requests.get call was made with the expected arguments - mock_get.assert_called_once_with( - 'https://graph.microsoft.com/v1.0/chats/test_chat_id/messages/test_id/', - headers={'Authorization': 'Bearer test_access_token'}, - params=None - ) - - self.assertEqual(chat_message_data["id"], "test_id") - self.assertEqual(chat_message_data["messageType"], "message") - self.assertEqual(chat_message_data["chatId"], "test_chat_id") - - @patch('requests.get') - def test_get_chat_messages_returns_chat_messages_data(self, mock_get): - """ - Test that get_chat_messages returns chat messages data. - """ - - # configure the mock to return a response with 'status_code' 200 - mock_get.return_value = Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value={'value': ms_teams_handler_config.TEST_CHAT_MESSAGES_DATA}) - ) - - chat_messages_data = self.api_client.get_chat_messages("test_chat_id") - - # assert the requests.get call was made with the expected arguments - mock_get.assert_called_once_with( - 'https://graph.microsoft.com/v1.0/chats/test_chat_id/messages/', - headers={'Authorization': 'Bearer test_access_token'}, - params={'$top': 20} - ) - - self.assertEqual(chat_messages_data[0]["id"], "test_id") - self.assertEqual(chat_messages_data[0]["messageType"], "message") - self.assertEqual(chat_messages_data[0]["chatId"], "test_chat_id") - - @patch('requests.get') - def test_get_all_chat_messages_returns_all_chat_messages_data(self, mock_get): - """ - Test that get_all_chat_messages returns all chat messages data. - """ - - # configure the mock to return a response with 'status_code' 200 - mock_get.side_effect = [ - Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value={'value': ms_teams_handler_config.TEST_CHATS_DATA}) - ), - Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value={'value': ms_teams_handler_config.TEST_CHAT_MESSAGES_DATA}) - ) - ] - - chat_messages_data = self.api_client.get_all_chat_messages() - - # assert the requests.get calls were made with the expected arguments - mock_get.assert_any_call( - 'https://graph.microsoft.com/v1.0/chats/', - headers={'Authorization': 'Bearer test_access_token'}, - params={'$expand': 'lastMessagePreview', '$top': 20} - ) - - mock_get.assert_any_call( - 'https://graph.microsoft.com/v1.0/chats/test_id/messages/', - headers={'Authorization': 'Bearer test_access_token'}, - params={'$top': 20} - ) - - self.assertEqual(chat_messages_data[0]["id"], "test_id") - self.assertEqual(chat_messages_data[0]["messageType"], "message") - self.assertEqual(chat_messages_data[0]["chatId"], "test_chat_id") - - @patch('requests.post') - def test_send_chat_message_sends_correct_request(self, mock_post): - """ - Test that send_chat_message sends a chat message. - """ - - # configure the mock to return a response with 'status_code' 201 - mock_post.return_value = Mock( - status_code=201, - headers={'Content-Type': 'application/json'}, - ) - - self.api_client.send_chat_message("test_chat_id", "test_message", "test_subject") - - # assert the requests.post call was made with the expected arguments - mock_post.assert_called_once_with( - 'https://graph.microsoft.com/v1.0/chats/test_chat_id/messages/', - headers={'Authorization': 'Bearer test_access_token'}, - json={'subject': 'test_subject', 'body': {'content': 'test_message'}} - ) - - @patch('requests.get') - def test_get_channel_returns_channel_data(self, mock_get): - """ - Test that get_channel returns channel data. - """ - - # configure the mock to return a response with 'status_code' 200 - mock_get.return_value = Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value=ms_teams_handler_config.TEST_CHANNEL_DATA) - ) - - channel_data = self.api_client.get_channel("test_team_id", "test_id") - - # assert the requests.get call was made with the expected arguments - mock_get.assert_called_once_with( - 'https://graph.microsoft.com/v1.0/teams/test_team_id/channels/test_id/', - headers={'Authorization': 'Bearer test_access_token'}, - params=None - ) - - self.assertEqual(channel_data["id"], "test_id") - self.assertEqual(channel_data["displayName"], "test_display_name") - self.assertEqual(channel_data["teamId"], "test_team_id") - - @patch('requests.get') - def test_get_channels_returns_channels_data(self, mock_get): - """ - Test that get_channels returns channels data. - """ - - # check if the group_ids parameter in the API client is set - is_group_ids_set = True if self.api_client._group_ids is not None else False - - # configure the mock to return a response with 'status_code' 200 - # if the group_ids parameter is not set, the mock will return the group data first, then the channels data - if not is_group_ids_set: - mock_get.side_effect = [ - Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value=ms_teams_handler_config.TEST_GROUP_DATA) - ), - Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value={'value': ms_teams_handler_config.TEST_CHANNELS_DATA}) - ) - ] - - # if the group_ids parameter is set, the mock will only return the channels data - else: - mock_get.return_value = Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value={'value': ms_teams_handler_config.TEST_CHANNELS_DATA}) - ) - - channels_data = self.api_client.get_channels() - - # assert the requests.get calls were made with the expected arguments - # if the group_ids parameter is not set, the mock will check the calls to both the groups and channels endpoints - if not is_group_ids_set: - mock_get.assert_any_call( - 'https://graph.microsoft.com/v1.0/groups/', - headers={'Authorization': 'Bearer test_access_token'}, - params={'$select': 'id,resourceProvisioningOptions'} - ) - - mock_get.assert_any_call( - 'https://graph.microsoft.com/v1.0/teams/test_team_id/channels/', - headers={'Authorization': 'Bearer test_access_token'}, - params=None - ) - - # if the group_ids parameter is set, the mock will only check the call to the channels endpoint - else: - mock_get.assert_called_once_with( - 'https://graph.microsoft.com/v1.0/teams/test_team_id/channels/', - headers={'Authorization': 'Bearer test_access_token'}, - params=None - ) - - self.assertEqual(channels_data[0]["id"], "test_id") - self.assertEqual(channels_data[0]["displayName"], "test_display_name") - self.assertEqual(channels_data[0]["teamId"], "test_team_id") - - @patch('requests.get') - def test_get_channel_message_returns_channel_message_data(self, mock_get): - """ - Test that get_channel_message returns channel message data. - """ - - # configure the mock to return a response with 'status_code' 200 - mock_get.return_value = Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value=ms_teams_handler_config.TEST_CHANNEL_MESSAGE_DATA) - ) - - channel_message_data = self.api_client.get_channel_message("test_team_id", "test_channel_id", "test_id") - - # assert the requests.get call was made with the expected arguments - mock_get.assert_called_once_with( - 'https://graph.microsoft.com/v1.0/teams/test_team_id/channels/test_channel_id/messages/test_id/', - headers={'Authorization': 'Bearer test_access_token'}, - params=None - ) - - self.assertEqual(channel_message_data["id"], "test_id") - self.assertEqual(channel_message_data["messageType"], "message") - self.assertEqual(channel_message_data["channelIdentity"]["channelId"], "test_channel_id") - self.assertEqual(channel_message_data["channelIdentity"]["teamId"], "test_team_id") - - @patch('requests.get') - def test_get_channel_messages_returns_channel_messages_data(self, mock_get): - """ - Test that get_channel_messages returns channel messages data. - """ - - # check if the group_ids parameter in the API client is set - is_group_ids_set = True if self.api_client._group_ids is not None else False - - # configure the mocks to return a response with 'status_code' 200 - # if the group_ids parameter is not set, the mocks will return the group data first, then the channel ID data, then the channel messages data - if not is_group_ids_set: - mock_get.side_effect = [ - Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value=ms_teams_handler_config.TEST_GROUP_DATA) - ), - Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value=ms_teams_handler_config.TEST_CHANNEL_ID_DATA) - ), - Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value={'value': ms_teams_handler_config.TEST_CHANNEL_MESSAGES_DATA}) - ), - ] - - # if the group_ids parameter is set, the mocks will only return the channel ID data, then the channel messages data - else: - mock_get.side_effect = [ - Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value=ms_teams_handler_config.TEST_CHANNEL_ID_DATA) - ), - Mock( - status_code=200, - headers={'Content-Type': 'application/json'}, - json=Mock(return_value={'value': ms_teams_handler_config.TEST_CHANNEL_MESSAGES_DATA}) - ), - ] - - channel_messages_data = self.api_client.get_channel_messages() - - # assert the requests.get calls were made with the expected arguments - # if the group_ids parameter is not set, check the calls to the groups endpoint - if not is_group_ids_set: - mock_get.assert_any_call( - 'https://graph.microsoft.com/v1.0/groups/', - headers={'Authorization': 'Bearer test_access_token'}, - params={'$select': 'id,resourceProvisioningOptions'} - ) - - mock_get.assert_any_call( - 'https://graph.microsoft.com/v1.0/teams/test_team_id/channels/', - headers={'Authorization': 'Bearer test_access_token'}, - params=None - ) - - mock_get.assert_any_call( - 'https://graph.microsoft.com/v1.0/teams/test_team_id/channels/test_channel_id/messages/', - headers={'Authorization': 'Bearer test_access_token'}, - params={'$top': 20} - ) - - self.assertEqual(channel_messages_data[0]["id"], "test_id") - self.assertEqual(channel_messages_data[0]["messageType"], "message") - self.assertEqual(channel_messages_data[0]["channelIdentity"]["channelId"], "test_channel_id") - self.assertEqual(channel_messages_data[0]["channelIdentity"]["teamId"], "test_team_id") - - @patch('requests.post') - def test_send_channel_message_sends_correct_request(self, mock_post): - """ - Test that send_channel_message sends a channel message. - """ - - # configure the mock to return a response with 'status_code' 201 - mock_post.return_value = Mock( - status_code=201, - headers={'Content-Type': 'application/json'}, - ) - - self.api_client.send_channel_message("test_team_id", "test_channel_id", "test_message", "test_subject") - - # assert the requests.post call was made with the expected arguments - mock_post.assert_called_once_with( - 'https://graph.microsoft.com/v1.0/teams/test_team_id/channels/test_channel_id/messages/', - headers={'Authorization': 'Bearer test_access_token'}, - json={'subject': 'test_subject', 'body': {'content': 'test_message'}} - ) - - -class TestChatsTable(unittest.TestCase): - """ - Tests for the ChatsTable class. - """ - - @classmethod - def setUpClass(cls): - """ - Set up the tests. - """ - - # mock the api handler - cls.api_handler = Mock(MSTeamsHandler) - - def test_get_columns_returns_all_columns(self): - """ - Test that get_columns returns all columns. - """ - - chats_table = ChatsTable(self.api_handler) - - self.assertListEqual(chats_table.get_columns(), ms_teams_handler_config.CHATS_TABLE_COLUMNS) - - def test_select_star_for_single_chat_returns_all_columns(self): - # patch the api handler to return the chat data - with patch.object(self.api_handler.connect(), 'get_chat', return_value=ms_teams_handler_config.TEST_CHAT_DATA): - chats_table = ChatsTable(self.api_handler) - - select_all = ast.Select( - # select all columns - targets=[Star()], - from_table="chats", - where=BinaryOperation( - op='=', - args=[ - Identifier('id'), - Constant("test_id") - ] - ) - ) - - all_chats = chats_table.select(select_all) - first_chat = all_chats.iloc[0] - - self.assertEqual(all_chats.shape[1], len(ms_teams_handler_config.CHATS_TABLE_COLUMNS)) - self.assertEqual(first_chat["id"], "test_id") - self.assertEqual(first_chat["chatType"], "oneOnOne") - - def test_select_star_for_all_chats_returns_all_columns(self): - # patch the api handler to return the chat data - with patch.object(self.api_handler.connect(), 'get_chats', return_value=ms_teams_handler_config.TEST_CHATS_DATA): - chats_table = ChatsTable(self.api_handler) - - select_all = ast.Select( - # select all columns - targets=[Star()], - from_table="chats", - ) - - all_chats = chats_table.select(select_all) - first_chat = all_chats.iloc[0] - - self.assertEqual(all_chats.shape[1], len(ms_teams_handler_config.CHATS_TABLE_COLUMNS)) - self.assertEqual(first_chat["id"], "test_id") - self.assertEqual(first_chat["chatType"], "oneOnOne") - - def test_select_for_single_chat_returns_only_selected_columns(self): - # patch the api handler to return the chat data - with patch.object(self.api_handler.connect(), 'get_chat', return_value=ms_teams_handler_config.TEST_CHAT_DATA): - chats_table = ChatsTable(self.api_handler) - - select_all = ast.Select( - # select only the id and chatType columns - targets=[ - Identifier('id'), - Identifier('chatType'), - ], - from_table="chats", - where=BinaryOperation( - op='=', - args=[ - Identifier('id'), - Constant("test_id") - ] - ) - ) - - all_chats = chats_table.select(select_all) - first_chat = all_chats.iloc[0] - - self.assertEqual(all_chats.shape[1], 2) - self.assertEqual(first_chat["id"], "test_id") - self.assertEqual(first_chat["chatType"], "oneOnOne") - - def test_select_for_all_chats_returns_only_selected_columns(self): - # patch the api handler to return the chat data - with patch.object(self.api_handler.connect(), 'get_chats', return_value=ms_teams_handler_config.TEST_CHATS_DATA): - chats_table = ChatsTable(self.api_handler) - - select_all = ast.Select( - # select all columns - targets=[ - Identifier('id'), - Identifier('chatType'), - ], - from_table="chats", - ) - - all_chats = chats_table.select(select_all) - first_chat = all_chats.iloc[0] - - self.assertEqual(all_chats.shape[1], 2) - self.assertEqual(first_chat["id"], "test_id") - self.assertEqual(first_chat["chatType"], "oneOnOne") - - -class TestChatMessagesTable(unittest.TestCase): - """ - Tests for the ChatMessagesTable class. - """ - - @classmethod - def setUpClass(cls): - """ - Set up the tests. - """ - - # mock the api handler - cls.api_handler = Mock(MSTeamsHandler) - - def test_get_columns_returns_all_columns(self): - """ - Test that get_columns returns all columns. - """ - - chat_messages_table = ChatMessagesTable(self.api_handler) - - self.assertListEqual(chat_messages_table.get_columns(), ms_teams_handler_config.CHAT_MESSAGES_TABLE_COLUMNS) - - def test_select_star_for_single_chat_returns_all_columns(self): - # patch the api handler to return the chat message data - with patch.object(self.api_handler.connect(), 'get_chat_message', return_value=ms_teams_handler_config.TEST_CHAT_MESSAGE_DATA): - chat_messages_table = ChatMessagesTable(self.api_handler) - - select_all = ast.Select( - # select all columns - targets=[Star()], - from_table="chat_messages", - where=[ - BinaryOperation( - op='=', - args=[ - Identifier('id'), - Constant("test_id") - ] - ), - BinaryOperation( - op='=', - args=[ - Identifier('chatId'), - Constant("test_chat_id") - ] - ) - ] - ) - - all_chat_messages = chat_messages_table.select(select_all) - first_chat_message = all_chat_messages.iloc[0] - - self.assertEqual(all_chat_messages.shape[1], len(ms_teams_handler_config.CHAT_MESSAGES_TABLE_COLUMNS)) - self.assertEqual(first_chat_message["id"], "test_id") - self.assertEqual(first_chat_message["messageType"], "message") - - def test_select_star_for_multiple_chats_returns_all_columns(self): - # patch the api handler to return the chat message data - with patch.object(self.api_handler.connect(), 'get_chat_messages', return_value=ms_teams_handler_config.TEST_CHAT_MESSAGES_DATA): - chat_messages_table = ChatMessagesTable(self.api_handler) - - select_all = ast.Select( - # select all columns - targets=[Star()], - from_table="chat_messages", - where=[ - BinaryOperation( - op='=', - args=[ - Identifier('chatId'), - Constant("test_chat_id") - ] - ) - ] - ) - - all_chat_messages = chat_messages_table.select(select_all) - first_chat_message = all_chat_messages.iloc[0] - - self.assertEqual(all_chat_messages.shape[1], len(ms_teams_handler_config.CHAT_MESSAGES_TABLE_COLUMNS)) - self.assertEqual(first_chat_message["id"], "test_id") - self.assertEqual(first_chat_message["messageType"], "message") - - def test_select_star_for_all_chats_returns_all_columns(self): - # patch the api handler to return the chat message data - with patch.object(self.api_handler.connect(), 'get_all_chat_messages', return_value=ms_teams_handler_config.TEST_CHAT_MESSAGES_DATA): - chat_messages_table = ChatMessagesTable(self.api_handler) - - select_all = ast.Select( - # select all columns - targets=[Star()], - from_table="chat_messages", - ) - - all_chat_messages = chat_messages_table.select(select_all) - first_chat_message = all_chat_messages.iloc[0] - - self.assertEqual(all_chat_messages.shape[1], len(ms_teams_handler_config.CHAT_MESSAGES_TABLE_COLUMNS)) - self.assertEqual(first_chat_message["id"], "test_id") - self.assertEqual(first_chat_message["messageType"], "message") - - def test_select_for_single_chat_returns_only_selected_columns(self): - # patch the api handler to return the chat message data - with patch.object(self.api_handler.connect(), 'get_chat_message', return_value=ms_teams_handler_config.TEST_CHAT_MESSAGE_DATA): - chat_messages_table = ChatMessagesTable(self.api_handler) - - select_all = ast.Select( - # select only the id and messageType columns - targets=[ - Identifier('id'), - Identifier('messageType'), - ], - from_table="chat_messages", - where=[ - BinaryOperation( - op='=', - args=[ - Identifier('id'), - Constant("test_id") - ] - ), - BinaryOperation( - op='=', - args=[ - Identifier('chatId'), - Constant("test_chat_id") - ] - ) - ] - ) - - all_chat_messages = chat_messages_table.select(select_all) - first_chat_message = all_chat_messages.iloc[0] - - self.assertEqual(all_chat_messages.shape[1], 2) - self.assertEqual(first_chat_message["id"], "test_id") - self.assertEqual(first_chat_message["messageType"], "message") - - def test_select_for_multiple_chats_returns_only_selected_columns(self): - # patch the api handler to return the chat message data - with patch.object(self.api_handler.connect(), 'get_chat_messages', return_value=ms_teams_handler_config.TEST_CHAT_MESSAGES_DATA): - chat_messages_table = ChatMessagesTable(self.api_handler) - - select_all = ast.Select( - # select only the id and messageType columns - targets=[ - Identifier('id'), - Identifier('messageType'), - ], - from_table="chat_messages", - where=[ - BinaryOperation( - op='=', - args=[ - Identifier('chatId'), - Constant("test_chat_id") - ] - ) - ] - ) - - all_chat_messages = chat_messages_table.select(select_all) - first_chat_message = all_chat_messages.iloc[0] - - self.assertEqual(all_chat_messages.shape[1], 2) - self.assertEqual(first_chat_message["id"], "test_id") - self.assertEqual(first_chat_message["messageType"], "message") - - def test_select_for_all_chats_returns_only_selected_columns(self): - # patch the api handler to return the chat message data - with patch.object(self.api_handler.connect(), 'get_all_chat_messages', return_value=ms_teams_handler_config.TEST_CHAT_MESSAGES_DATA): - chat_messages_table = ChatMessagesTable(self.api_handler) - - select_all = ast.Select( - # select only the id and messageType columns - targets=[ - Identifier('id'), - Identifier('messageType'), - ], - from_table="chat_messages", - ) - - all_chat_messages = chat_messages_table.select(select_all) - first_chat_message = all_chat_messages.iloc[0] - - self.assertEqual(all_chat_messages.shape[1], 2) - self.assertEqual(first_chat_message["id"], "test_id") - self.assertEqual(first_chat_message["messageType"], "message") - - def test_insert_chat_message_calls_correct_method_in_client(self): - """ - Test that send_chat_message sends a chat message. - """ - - # patch the api handler to return the chat message data - with patch.object(self.api_handler.connect(), 'send_chat_message', return_value=None) as mock_send_chat_message: - chat_messages_table = ChatMessagesTable(self.api_handler) - - insert = ast.Insert( - table="chat_messages", - columns=[ - Identifier('chatId'), - Identifier('body_content'), - Identifier('subject'), - ], - values=[ - ( - "test_chat_id", - "test_message", - "test_subject" - ) - ] - ) - - chat_messages_table.insert(insert) - - # assert the api handler's send_chat_message method was called with the expected arguments - mock_send_chat_message.assert_called_once_with(chat_id='test_chat_id', message='test_message', subject='test_subject') - - -class TestChannelsTable(unittest.TestCase): - """ - Tests for the ChannelsTable class. - """ - - @classmethod - def setUpClass(cls): - """ - Set up the tests. - """ - - # mock the api handler - cls.api_handler = Mock(MSTeamsHandler) - - def test_get_columns_returns_all_columns(self): - """ - Test that get_columns returns all columns. - """ - - channels_table = ChannelsTable(self.api_handler) - - self.assertListEqual(channels_table.get_columns(), ms_teams_handler_config.CHANNELS_TABLE_COLUMNS) - - def test_select_star_for_single_channel_returns_all_columns(self): - # patch the api handler to return the channel data - with patch.object(self.api_handler.connect(), 'get_channel', return_value=ms_teams_handler_config.TEST_CHANNEL_DATA): - channels_table = ChannelsTable(self.api_handler) - - select_all = ast.Select( - # select all columns - targets=[Star()], - from_table="channels", - where=[ - BinaryOperation( - op='=', - args=[ - Identifier('id'), - Constant("test_id") - ] - ), - BinaryOperation( - op='=', - args=[ - Identifier('teamId'), - Constant("test_team_id") - ] - ) - ] - ) - - all_channels = channels_table.select(select_all) - first_channel = all_channels.iloc[0] - - self.assertEqual(all_channels.shape[1], len(ms_teams_handler_config.CHANNELS_TABLE_COLUMNS)) - self.assertEqual(first_channel["id"], "test_id") - self.assertEqual(first_channel["displayName"], "test_display_name") - - def test_select_star_for_all_channels_returns_all_columns(self): - # patch the api handler to return the channel data - with patch.object(self.api_handler.connect(), 'get_channels', return_value=ms_teams_handler_config.TEST_CHANNELS_DATA): - channels_table = ChannelsTable(self.api_handler) - - select_all = ast.Select( - # select all columns - targets=[Star()], - from_table="channels", - ) - - all_channels = channels_table.select(select_all) - first_channel = all_channels.iloc[0] - - self.assertEqual(all_channels.shape[1], len(ms_teams_handler_config.CHANNELS_TABLE_COLUMNS)) - self.assertEqual(first_channel["id"], "test_id") - self.assertEqual(first_channel["displayName"], "test_display_name") - - def test_select_for_single_channel_returns_only_selected_columns(self): - # patch the api handler to return the channel data - with patch.object(self.api_handler.connect(), 'get_channel', return_value=ms_teams_handler_config.TEST_CHANNEL_DATA): - channels_table = ChannelsTable(self.api_handler) - - select_all = ast.Select( - # select only the id and displayName columns - targets=[ - Identifier('id'), - Identifier('displayName'), - ], - from_table="channels", - where=[ - BinaryOperation( - op='=', - args=[ - Identifier('id'), - Constant("test_id") - ] - ), - BinaryOperation( - op='=', - args=[ - Identifier('teamId'), - Constant("test_team_id") - ] - ) - ] - ) - - all_channels = channels_table.select(select_all) - first_channel = all_channels.iloc[0] - - self.assertEqual(all_channels.shape[1], 2) - self.assertEqual(first_channel["id"], "test_id") - self.assertEqual(first_channel["displayName"], "test_display_name") - - def test_select_for_all_channels_returns_only_selected_columns(self): - # patch the api handler to return the channel data - with patch.object(self.api_handler.connect(), 'get_channels', return_value=ms_teams_handler_config.TEST_CHANNELS_DATA): - channels_table = ChannelsTable(self.api_handler) - - select_all = ast.Select( - # select only the id and displayName columns - targets=[ - Identifier('id'), - Identifier('displayName'), - ], - from_table="channels", - ) - - all_channels = channels_table.select(select_all) - first_channel = all_channels.iloc[0] - - self.assertEqual(all_channels.shape[1], 2) - self.assertEqual(first_channel["id"], "test_id") - self.assertEqual(first_channel["displayName"], "test_display_name") - - -class TestChannelMessagesTable(unittest.TestCase): - """ - Tests for the ChannelMessagesTable class. - """ - - @classmethod - def setUpClass(cls): - """ - Set up the tests. - """ - - # mock the api handler - cls.api_handler = Mock(MSTeamsHandler) - - def test_get_columns_returns_all_columns(self): - """ - Test that get_columns returns all columns. - """ - - channel_messages_table = ChannelMessagesTable(self.api_handler) - - self.assertListEqual(channel_messages_table.get_columns(), ms_teams_handler_config.CHANNEL_MESSAGES_TABLE_COLUMNS) - - def test_select_star_for_single_channel_message_returns_all_columns(self): - # patch the api handler to return the channel message data - with patch.object(self.api_handler.connect(), 'get_channel_message', return_value=ms_teams_handler_config.TEST_CHANNEL_MESSAGE_DATA): - channel_messages_table = ChannelMessagesTable(self.api_handler) - - select_all = ast.Select( - # select all columns - targets=[Star()], - from_table="channel_messages", - where=[ - BinaryOperation( - op='=', - args=[ - Identifier('id'), - Constant("test_id") - ] - ), - BinaryOperation( - op='=', - args=[ - Identifier('channelIdentity_channelId'), - Constant("test_channel_id") - ] - ), - BinaryOperation( - op='=', - args=[ - Identifier('channelIdentity_teamId'), - Constant("test_team_id") - ] - ) - ] - ) - - all_channel_messages = channel_messages_table.select(select_all) - first_channel_message = all_channel_messages.iloc[0] - - self.assertEqual(all_channel_messages.shape[1], len(ms_teams_handler_config.CHANNEL_MESSAGES_TABLE_COLUMNS)) - self.assertEqual(first_channel_message["id"], "test_id") - self.assertEqual(first_channel_message["messageType"], "message") - - def test_select_star_for_all_channel_messages_returns_all_columns(self): - # patch the api handler to return the channel message data - with patch.object(self.api_handler.connect(), 'get_channel_messages', return_value=ms_teams_handler_config.TEST_CHANNEL_MESSAGES_DATA): - channel_messages_table = ChannelMessagesTable(self.api_handler) - - select_all = ast.Select( - # select all columns - targets=[Star()], - from_table="channel_messages", - ) - - all_channel_messages = channel_messages_table.select(select_all) - first_channel_message = all_channel_messages.iloc[0] - - self.assertEqual(all_channel_messages.shape[1], len(ms_teams_handler_config.CHANNEL_MESSAGES_TABLE_COLUMNS)) - self.assertEqual(first_channel_message["id"], "test_id") - self.assertEqual(first_channel_message["messageType"], "message") - - def test_select_for_single_channel_message_returns_only_selected_columns(self): - # patch the api handler to return the channel message data - with patch.object(self.api_handler.connect(), 'get_channel_message', return_value=ms_teams_handler_config.TEST_CHANNEL_MESSAGE_DATA): - channel_messages_table = ChannelMessagesTable(self.api_handler) - - select_all = ast.Select( - # select only the id and messageType columns - targets=[ - Identifier('id'), - Identifier('messageType'), - ], - from_table="channel_messages", - where=[ - BinaryOperation( - op='=', - args=[ - Identifier('id'), - Constant("test_id") - ] - ), - BinaryOperation( - op='=', - args=[ - Identifier('channelIdentity_channelId'), - Constant("test_channel_id") - ] - ), - BinaryOperation( - op='=', - args=[ - Identifier('channelIdentity_teamId'), - Constant("test_team_id") - ] - ) - ] - ) - - all_channel_messages = channel_messages_table.select(select_all) - first_channel_message = all_channel_messages.iloc[0] - - self.assertEqual(all_channel_messages.shape[1], 2) - self.assertEqual(first_channel_message["id"], "test_id") - self.assertEqual(first_channel_message["messageType"], "message") - - def test_select_for_all_channel_messages_returns_only_selected_columns(self): - # patch the api handler to return the channel message data - with patch.object(self.api_handler.connect(), 'get_channel_messages', return_value=ms_teams_handler_config.TEST_CHANNEL_MESSAGES_DATA): - channel_messages_table = ChannelMessagesTable(self.api_handler) - - select_all = ast.Select( - # select only the id and messageType columns - targets=[ - Identifier('id'), - Identifier('messageType'), - ], - from_table="channel_messages", - ) - - all_channel_messages = channel_messages_table.select(select_all) - first_channel_message = all_channel_messages.iloc[0] - - self.assertEqual(all_channel_messages.shape[1], 2) - self.assertEqual(first_channel_message["id"], "test_id") - self.assertEqual(first_channel_message["messageType"], "message") - - def test_insert_channel_message_calls_correct_method_in_client(self): - """ - Test that insert_channel_message calls the correct method in the client. - """ - - # patch the api handler to return the channel message data - with patch.object(self.api_handler.connect(), 'send_channel_message', return_value=None) as mock_send_channel_message: - channel_messages_table = ChannelMessagesTable(self.api_handler) - - insert = ast.Insert( - table="channel_messages", - columns=[ - Identifier('channelIdentity_teamId'), - Identifier('channelIdentity_channelId'), - Identifier('body_content'), - Identifier('subject'), - ], - values=[ - ( - "test_team_id", - "test_channel_id", - "test_message", - "test_subject" - ) - ] - ) - - channel_messages_table.insert(insert) - - # assert the api handler's send_channel_message method was called with the expected arguments - mock_send_channel_message.assert_called_once_with(group_id='test_team_id', channel_id='test_channel_id', message='test_message', subject='test_subject') - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unused/unit/handler_tests/test_npm_handler.py b/tests/unused/unit/handler_tests/test_npm_handler.py deleted file mode 100644 index 44fffb13f4f..00000000000 --- a/tests/unused/unit/handler_tests/test_npm_handler.py +++ /dev/null @@ -1,86 +0,0 @@ -import importlib - -import pytest -from mindsdb_sql_parser import parse_sql - -from ..unit.executor_test_base import BaseExecutorTest - -try: - importlib.import_module("requests") - REQUESTS_INSTALLED = True -except ImportError: - REQUESTS_INSTALLED = False - - -@pytest.mark.skipif(not REQUESTS_INSTALLED, reason="requests package is not installed") -class TestNPMHandler(BaseExecutorTest): - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def setup_method(self): - super().setup_method() - self.test_package_name = "handlebars" - self.run_sql(""" - CREATE DATABASE npm_test - WITH ENGINE = "npm"; - """) - - def test_basic_select_from(self): - # Select from metadata table - sql = f""" - SELECT * FROM npm_test.metadata WHERE package='{self.test_package_name}'; - """ - assert self.run_sql(sql).shape[0] == 1 - - # Select from maintainers table - sql = f""" - SELECT * FROM npm_test.maintainers WHERE package='{self.test_package_name}'; - """ - self.run_sql(sql) - - # Select from keywords table - sql = f""" - SELECT * FROM npm_test.keywords WHERE package='{self.test_package_name}'; - """ - self.run_sql(sql) - - # Select from dependencies table - sql = f""" - SELECT * FROM npm_test.dependencies WHERE package='{self.test_package_name}'; - """ - self.run_sql(sql) - - # Select from dev_dependencies table - sql = f""" - SELECT * FROM npm_test.dev_dependencies WHERE package='{self.test_package_name}'; - """ - self.run_sql(sql) - - # Select from optional_dependencies table - sql = f""" - SELECT * FROM npm_test.optional_dependencies WHERE package='{self.test_package_name}'; - """ - self.run_sql(sql) - - # Select from github_stats table - sql = f""" - SELECT * FROM npm_test.github_stats WHERE package='{self.test_package_name}'; - """ - assert self.run_sql(sql).shape[0] == 1 - - def test_complex_select(self): - # Select email maintainers table - sql = f""" - SELECT email FROM npm_test.maintainers WHERE package='{self.test_package_name}'; - """ - assert self.run_sql(sql).shape[1] == 1 - - # Select single dependency - sql = f""" - SELECT * FROM npm_test.dependencies WHERE package='{self.test_package_name}' LIMIT 1; - """ - assert self.run_sql(sql).shape[0] == 1 diff --git a/tests/unused/unit/handler_tests/test_oilpriceapi_handler.py b/tests/unused/unit/handler_tests/test_oilpriceapi_handler.py deleted file mode 100644 index 907a171cab0..00000000000 --- a/tests/unused/unit/handler_tests/test_oilpriceapi_handler.py +++ /dev/null @@ -1,47 +0,0 @@ -import importlib -import os -import pytest -from mindsdb_sql_parser import parse_sql - -from ..unit.executor_test_base import BaseExecutorTest - -try: - importlib.import_module("requests") - REQUESTS_INSTALLED = True -except ImportError: - REQUESTS_INSTALLED = False - - -@pytest.mark.skipif(not REQUESTS_INSTALLED, reason="requests package is not installed") -class TestOilPriceAPIHandler(BaseExecutorTest): - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def setup_method(self): - super().setup_method() - self.api_key = os.environ.get("OILPRICEAPI_KEY") - self.run_sql(f""" - CREATE DATABASE mindsdb_oilpriceapi - WITH ENGINE = 'oilpriceapi', - PARAMETERS = { - "api_key": '{self.api_key}' - }; - """) - - def test_basic_select_from(self): - sql = "SELECT * FROM mindsdb_oilpriceapi.latest_price;" - assert self.run_sql(sql).shape[0] == 1 - - sql = "SELECT * FROM mindsdb_oilpriceapi.past_day_price;" - assert self.run_sql(sql).shape[0] == 20 - - def test_complex_select(self): - sql = 'SELECT price FROM mindsdb_oilpriceapi.latest_price where by_type="daily_average_price" and by_code="WTI_USD";' - assert self.run_sql(sql).shape[1] == 1 - - sql = "SELECT * FROM npm_test.past_day_price LIMIT 1;" - assert self.run_sql(sql).shape[0] == 1 diff --git a/tests/unused/unit/handler_tests/test_paypal_handler.py b/tests/unused/unit/handler_tests/test_paypal_handler.py deleted file mode 100644 index cc02e8bb676..00000000000 --- a/tests/unused/unit/handler_tests/test_paypal_handler.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -import unittest - -from mindsdb.integrations.handlers.paypal_handler.paypal_handler import PayPalHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class PayPalHandlerTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.kwargs = { - "connection_data": { - "mode": os.environ.get('MODE'), - "client_id": os.environ.get('CLIENT_ID'), - "client_secret": os.environ.get('CLIENT_SECRET'), - } - } - cls.handler = PayPalHandler('test_paypal_handler', **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_2_select_payments_query(self): - query = "SELECT * FROM test_paypal_handler.payments" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_3_select_invoices_query(self): - query = "SELECT * FROM test_paypal_handler.invoices" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_4_select_subscriptions_query(self): - query = "SELECT * FROM test_paypal_handler.subscriptions" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_5_select_orders_query(self): - query = "SELECT * FROM test_paypal_handler.orders WHERE ids = ('')" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE diff --git a/tests/unused/unit/handler_tests/test_pgvector_handler.py b/tests/unused/unit/handler_tests/test_pgvector_handler.py index cc520bc4ee8..3eaa5316317 100644 --- a/tests/unused/unit/handler_tests/test_pgvector_handler.py +++ b/tests/unused/unit/handler_tests/test_pgvector_handler.py @@ -82,53 +82,3 @@ def test_select(self, handler): assert not result.empty for col in COLUMN_NAMES: assert col in result.columns - - def test_hybrid_search_with_keywords(self, handler): - result = handler.hybrid_search( - TEST_TABLE_NAME, - # Embeddings (semantic) search. - [7.0, 8.0, 9.0], - # Keyword search. - query='cat rat' - ) - # Top result is an exact embeddings match. - assert result.iloc[0]['embeddings'].tolist() == [7.0, 8.0, 9.0] - # Top result should include both keywords. - assert 'cat' in result.iloc[0]['content'] - assert 'rat' in result.iloc[0]['content'] - - def test_hybrid_search_with_metadata(self, handler): - result = handler.hybrid_search( - TEST_TABLE_NAME, - # Embeddings (semantic) search. - [4.0, 5.0, 6.0], - # Metadata filters. - metadata={'location': 'Wonderland', 'author': 'Taishan'} - ) - # Only two items match metadata filters. - assert len(result.index) == 2 - # Top result is an exact embeddings match. - assert result.iloc[0]['embeddings'].tolist() == [4.0, 5.0, 6.0] - - def test_hybrid_search_with_keywords_and_metadata(self, handler): - result = handler.hybrid_search( - TEST_TABLE_NAME, - # Embeddings (semantic) search. - [4.0, 5.0, 6.0], - # Keyword search. - query='fat cat', - # Metadata filters. - metadata={'location': 'Wonderland', 'author': 'Taishan'} - ) - # Only two items match metadata filters. - assert len(result.index) == 2 - # Top result is actually a keyword match because embeddings are close. - assert result.iloc[0]['embeddings'].tolist() == [1.0, 2.0, 3.0] - - def test_hybrid_search_no_query_or_metadata(self, handler): - with pytest.raises(ValueError): - _ = handler.hybrid_search( - TEST_TABLE_NAME, - # Embeddings (semantic) search. - [4.0, 5.0, 6.0], - ) diff --git a/tests/unused/unit/handler_tests/test_pinecone_handler.py b/tests/unused/unit/handler_tests/test_pinecone_handler.py deleted file mode 100644 index 46def1b070a..00000000000 --- a/tests/unused/unit/handler_tests/test_pinecone_handler.py +++ /dev/null @@ -1,274 +0,0 @@ -# check if chroma_db is installed -import importlib -from unittest.mock import patch -import os - -import pandas as pd -import pytest -from mindsdb_sql_parser import parse_sql - -from tests.unit.executor_test_base import BaseExecutorTest - -try: - pinecone = importlib.import_module("pinecone") - PINECONE_CLIENT_INSTALLED = True -except ImportError: - PINECONE_CLIENT_INSTALLED = False - - -# NOTE: These tests might fail since pinecone is eventually consistent. Some queries return wrong result when tested - -@pytest.mark.skipif(not PINECONE_CLIENT_INSTALLED, reason="pinecone client is not installed") -class TestPineconeHandler(BaseExecutorTest): - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def setup_method(self): - super().setup_method() - # Replace with your pinecone key - self.api_key = os.environ['PINECONE_API_KEY'] - self.environment = os.environ['PINECONE_ENV'] - self.run_sql(f""" - CREATE DATABASE pinecone_test - WITH ENGINE = "pinecone", - PARAMETERS = {{ - "api_key": "{self.api_key}", - "environment": "{self.environment}" - }} - """) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_create_with_select(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], [1.0, 2.0, 3.0, 5.0, 6.0, 8.0, 9.0, 3.0]], - } - ) - - self.set_handler(postgres_handler_mock, "pg", tables={"df": df}) - sql = """ - CREATE TABLE pinecone_test.testtable ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_select_from(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], [1.0, 2.0, 3.0, 5.0, 6.0, 8.0, 9.0, 3.0]], - } - ) - - self.set_handler(postgres_handler_mock, "pg", tables={"testtable": df}) - sql = """ - CREATE TABLE pinecone_test.testtable ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - # query a table with id - sql = """ - SELECT * FROM pinecone_test.testtable - WHERE id = 'id1' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # query a table with a search vector, with out limit - sql = """ - SELECT * FROM pinecone_test.testtable - WHERE search_vector = '[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # query a table with a search vector, with limit - sql = """ - SELECT * FROM pinecone_test.testtable - WHERE search_vector = '[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]' - LIMIT 1 - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # query a table with a metadata filter and a search vector - sql = """ - SELECT * FROM pinecone_test.testtable - WHERE testable.metadata.test = 'test' - AND search_vector = '[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_insert_into(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], [1.0, 2.0, 3.0, 5.0, 6.0, 8.0, 9.0, 3.0]], - } - ) - - df2 = pd.DataFrame( - { - "id": ["id1", "id2", "id3"], - "content": ["this is a test", "this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}, {"test": "test3"}], - "embeddings": [ - [1.0, 2.0, 3.0, 4.0, 3.0, 5.0, 2.0, 8.1], - [4.0, 2.0, 7.0, 4.0, 2.0, 5.0, 2.0, 9.0], - [5.0, 2.0, 3.0, 2.0, 3.0, 3.0, 2.0, 7.0], - ], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"df": df, "df2": df2}) - - sql = """ - CREATE TABLE pinecone_test.testtable ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - sql = """ - INSERT INTO pinecone_test.testtable ( - id,content,metadata,embeddings - ) - VALUES ( - 'some_unique_id', 'this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]' - ) - """ - self.run_sql(sql) - # check if the data is inserted - sql = """ - SELECT * FROM pinecone_test.testtable - WHERE id = 'some_unique_id' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 3 - - # insert into a table with existing id, shall work - sql = """ - INSERT INTO pinecone_test.testtable ( - id,content,metadata,embeddings - ) - VALUES ( - 'id1', 'this is a test', '{"test": "tester"}', '[1.0, 2.0, 3.0, 4.0, 6.0, 7.0, 8.0, 9.0]' - ) - """ - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_delete(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}], - "embeddings": [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], [1.0, 2.0, 3.0, 5.0, 6.0, 8.0, 9.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"testtable": df}) - - # create a table - sql = """ - CREATE TABLE pinecone_test.testtable ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - # delete from a table with a metadata filter - sql = """ - DELETE FROM pinecone_test.testtable - WHERE testtable.metadata.test = 'test1' - """ - self.run_sql(sql) - - # delete by id - sql = """ - DELETE FROM pinecone_test.testtable - WHERE id = 'id2' - """ - self.run_sql(sql) - - # delete from a table without any filters is not allowed - sql = """ - DELETE FROM pinecone_test.testtable - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @pytest.mark.xfail(reason="update for pinecone is not implemented, use insert") - def test_update(self): - # update a table with a metadata filter - sql = """ - UPDATE pinecone_test.testtable - SET metadata.test = 'test2' - WHERE metadata.test = 'test' - """ - # sql shoudl fail - with pytest.raises(Exception): - self.run_sql(sql) - - @pytest.mark.xfail(reason="create table for vectordatabase is not well supported") - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_create_table(self, postgres_handler_mock): - # create an empty table - sql = """ - CREATE TABLE pinecone_test.testtable; - """ - self.run_sql(sql) - # create a table with the schema definition is not allowed - sql = """ - CREATE TABLE pinecone_test.testtable ( - id int, - metadata text, - embedding float[] - ); - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @pytest.mark.xfail(reason="drop table for vectordatabase is not working") - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_drop_table(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], [1.0, 2.0, 3.0, 5.0, 6.0, 8.0, 9.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"testtable": df}) - sql = """ - CREATE TABLE piecone_test.testtable( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - sql = """ - DROP TABLE pinecone_test.testtable; - """ - self.run_sql(sql) - sql = """ - DROP TABLE pinecone_test.testtable2; - """ - with pytest.raises(Exception): - self.run_sql(sql) diff --git a/tests/unused/unit/handler_tests/test_qdrant_handler.py b/tests/unused/unit/handler_tests/test_qdrant_handler.py deleted file mode 100644 index 205fb9db392..00000000000 --- a/tests/unused/unit/handler_tests/test_qdrant_handler.py +++ /dev/null @@ -1,442 +0,0 @@ -from unittest.mock import patch - -import pytest -import pandas as pd -from mindsdb_sql_parser import parse_sql - -from tests.unit.executor_test_base import BaseExecutorTest - - -class TestQdrantHandler(BaseExecutorTest): - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def setup_method(self): - super().setup_method() - # connect to a Qdrant instance and create a database - self.run_sql( - """ - CREATE DATABASE qtest - WITH ENGINE = "qdrant", - PARAMETERS = { - "location": ":memory:", - "collection_config": { - "size": 3, - "distance": "Cosine" - } - } - """ - ) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_create_with_select(self, postgres_handler_mock): - df = pd.DataFrame( - { - - "id": [1, 2], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - - self.set_handler(postgres_handler_mock, "pg", tables={"test_table": df}) - - sql = """ - CREATE TABLE qtest.test_table_1 ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - @pytest.mark.xfail(reason="drop table for vectordatabase is not working") - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_drop_table(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": [32, 13], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"test_table": df}) - - # create a table - sql = """ - CREATE TABLE qtest.test_table_2 ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - sql = """ - DROP TABLE qtest.test_table_2; - """ - self.run_sql(sql) - - sql = """ - DROP TABLE qtest.test_table_22; - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_insert_into(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": [81, 24, 33], - "content": ["this is a test", "this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}, {"test": "test3"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - df2 = pd.DataFrame( - { - "id": [1, 2, 3], - "content": ["this is a test", "this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}, {"test": "test3"}], - "embeddings": [ - [1.0, 2.0, 3.0, 4.0], - [1.0, 2.0], - [1.0, 2.0, 3.0], - ], # different dimensions - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"df": df, "df2": df2}) - num_record = df.shape[0] - - # create a table - sql = """ - CREATE TABLE qtest.test_table_3 ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - # insert into a table with values - sql = """ - INSERT INTO qtest.test_table_3 ( - id,content,metadata,embeddings - ) - VALUES ( - 4, 'this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0]' - ) - """ - self.run_sql(sql) - # check if the data is inserted - sql = """ - SELECT * FROM qtest.test_table_3 - WHERE id = 4 AND search_vector = '[1.0, 2.0, 3.0]' AND search_vector= '[1.0, 2.0, 4.0]' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # insert without ids should auto-generate ids - sql = """ - INSERT INTO qtest.test_table_3 ( - content,metadata,embeddings - ) - VALUES ( - 'this is a test without ID', '{"test": "test"}', '[1.0, 2.0, 3.0]' - ) - """ - self.run_sql(sql) - - # check if the data is inserted - sql = """ - SELECT * FROM qtest.test_table_3 - """ - ret = self.run_sql(sql) - assert ret.shape[0] == num_record + 2 - - # insert into a table with a select statement - sql = """ - INSERT INTO qtest.test_table_3 ( - content,metadata,embeddings - ) - SELECT - content,metadata,embeddings - FROM - pg.df - """ - self.run_sql(sql) - - # check if the data is inserted - sql = """ - SELECT * FROM qtest.test_table_3 - """ - ret = self.run_sql(sql) - assert ret.shape[0] == num_record * 2 + 2 - - # insert into a table with a select statement, but wrong columns - sql = """ - INSERT INTO qtest.test_table_3 - SELECT - content,metadata,embeddings as wrong_column - FROM - pg.df - """ - with pytest.raises(Exception): - self.run_sql(sql) - - # insert into a table with a select statement, missing metadata column - sql = """ - INSERT INTO qtest.test_table_3 - SELECT - content,embeddings - FROM - pg.df - """ - self.run_sql(sql) - - # insert into a table with a select statement, missing embedding column, shall raise an error - sql = """ - INSERT INTO qtest.test_table_3 - SELECT - content,metadata - FROM - pg.df - """ - with pytest.raises(Exception): - self.run_sql(sql) - - # insert into a table with a select statement, with different embedding dimensions, shall raise an error - sql = """ - INSERT INTO qtest.test_table_3 - SELECT - content,metadata,embeddings - FROM - pg.df2 - """ - with pytest.raises(Exception): - self.run_sql(sql) - - # insert into a table with existing id overwrites the existing record - sql = """ - INSERT INTO qtest.test_table_3 ( - id,content,metadata,embeddings - ) - VALUES ( - 4, 'this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0]' - ) - """ - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_select_from(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": [32, 33], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "Info"}, {"test": "Info"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"test_table": df}) - # create a table - sql = """ - CREATE TABLE qtest.test_table_4 ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - # query a table without any filters - sql = """ - SELECT * FROM qtest.test_table_4 - """ - self.run_sql(sql) - - # query a table with id - sql = """ - SELECT * FROM qtest.test_table_4 - WHERE id = 32 - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # query a table with a search vector, without limit - sql = """ - SELECT * FROM qtest.test_table_4 - WHERE search_vector = '[1.0, 2.0, 3.0]' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # query a table with a search vector, with limit - sql = """ - SELECT * FROM qtest.test_table_4 - WHERE search_vector = '[1.0, 2.0, 3.0]' - LIMIT 1 - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # query a table with a metadata filter - sql = """ - SELECT * FROM qtest.test_table_4 - WHERE `metadata.test` = 'Info'; - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - sql = """ - SELECT * FROM qtest.test_table_4 - WHERE `metadata.test` = 'Info' - AND search_vector = '[1.0, 2.0, 3.0]' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - @pytest.mark.xfail(reason="upsert for vectordatabase is not implemented") - def test_update(self): - # update a table with a metadata filter - sql = """ - UPDATE qtest.test_table_5 - SET `metadata.test` = 'test2' - WHERE `metadata.test` = 'test' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM qtest.test_table_5 - WHERE `metadata.test` = 'test2' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # update the embeddings - sql = """ - UPDATE qtest.test_table_5 - SET embedding = [3.0, 2.0, 1.0] - WHERE `metadata.test` = 'test2' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM qtest.test_table_5 - WHERE `metadata.test` = 'test2' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - assert ret.embedding[0] == [3.0, 2.0, 1.0] - - # update multiple columns - sql = """ - UPDATE qtest.test_table_5 - SET `metadata.test` = 'test3', - embedding = [1.0, 2.0, 3.0] - content = 'this is a test' - WHERE `metadata.test` = 'test2' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM qtest.test_table_5 - WHERE `metadata.test` = 'test3' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - assert ret.embedding[0] == [1.0, 2.0, 3.0] - assert ret.content[0] == "this is a test" - - # update a table with a search vector filter is not allowed - sql = """ - UPDATE qtest.test_table_5 - SET `metadata.test = 'test2' - WHERE search_vector = [1.0, 2.0, 3.0] - """ - with pytest.raises(Exception): - self.run_sql(sql) - - # update a table without any filters is allowed - sql = """ - UPDATE qtest.test_table_5 - SET metadata.test = 'test3' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM qtest.test_table_5 - WHERE `metadata.test` = 'test3' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # update a table with a search vector filter and a metadata filter is not allowed - sql = """ - UPDATE qtest.test_table_5 - SET metadata.test = 'test3' - WHERE metadata.test = 'test2' - AND search_vector = [1.0, 2.0, 3.0] - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_delete(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": [1, 2], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"test_table": df}) - - # create a table - sql = """ - CREATE TABLE qtest.test_table_6 ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - # delete from a table with a metadata filter - sql = """ - DELETE FROM qtest.test_table_6 - WHERE `metadata.test` = 'test1' - """ - self.run_sql(sql) - # check if the data is deleted - sql = """ - SELECT * FROM qtest.test_table_6 - WHERE `metadata.test` = 'test2' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # delete by id - sql = """ - DELETE FROM qtest.test_table_6 - WHERE id = 2 - """ - self.run_sql(sql) - # check if the data is deleted - sql = """ - SELECT * FROM qtest.test_table_6 - WHERE id = 2 - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 0 - - # delete from a table with a search vector filter is not allowed - sql = """ - DELETE FROM qtest.test_table_6 - WHERE search_vector = [1.0, 2.0, 3.0] - """ - with pytest.raises(Exception): - self.run_sql(sql) - - # delete from a table without any filters is not allowed - sql = """ - DELETE FROM qtest.test_table_6 - """ - with pytest.raises(Exception): - self.run_sql(sql) diff --git a/tests/unused/unit/handler_tests/test_rag_pipelines.py b/tests/unused/unit/handler_tests/test_rag_pipelines.py deleted file mode 100644 index d2f884b8322..00000000000 --- a/tests/unused/unit/handler_tests/test_rag_pipelines.py +++ /dev/null @@ -1,53 +0,0 @@ -import pytest -import yaml -from langchain_core.documents import Document -from langchain_openai import ChatOpenAI, OpenAIEmbeddings - -from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG -from pathlib import Path - -from mindsdb.integrations.utilities.rag.settings import DEFAULT_LLM_MODEL, RAGPipelineModel - -DEFAULT_LLM = ChatOpenAI(model_name=DEFAULT_LLM_MODEL, temperature=0) -DEFAULT_EMBEDDINGS = OpenAIEmbeddings() - -path = Path(__file__).parent -config_path = path / "data" / "rag_pipelines" -pipeline_configs = list(config_path.glob('*.yml')) - - -def create_test_documents(): - return [ - Document( - page_content="This is a test document", - metadata={"doc_id": "1"} - ), - Document( - page_content="This is also a test document", - metadata={"doc_id": "2"} - ), - Document( - page_content="This is another test document", - metadata={"doc_id": "3"} - ) - ] - - -@pytest.fixture(params=pipeline_configs, ids=lambda x: x.stem, scope='module') -def config(request): - with open(request.param, 'r') as file: - config = yaml.safe_load(file) - config['documents'] = create_test_documents() - config['llm'] = DEFAULT_LLM - config['embedding_model'] = DEFAULT_EMBEDDINGS - - return RAGPipelineModel(**config) - - -def test_rag_pipeline_creation(config): - rag = RAG(config) - result = rag.pipeline.invoke('test document') - - assert result is not None - assert isinstance(result, dict) - assert all(key in result for key in ['answer', 'context', 'question']) diff --git a/tests/unused/unit/handler_tests/test_rocket_chat_handler.py b/tests/unused/unit/handler_tests/test_rocket_chat_handler.py deleted file mode 100644 index 575ab17ab9a..00000000000 --- a/tests/unused/unit/handler_tests/test_rocket_chat_handler.py +++ /dev/null @@ -1,96 +0,0 @@ -from mindsdb.integrations.handlers.rocket_chat_handler.rocket_chat_tables import ( - RocketChatMessagesTable, -) -from mindsdb.integrations.handlers.rocket_chat_handler.rocket_chat_handler import ( - RocketChatHandler, -) -from mindsdb_sql_parser import ast -from mindsdb_sql_parser.ast.select.star import Star -from mindsdb_sql_parser.ast.select.identifier import Identifier - -from unittest.mock import Mock - -import pandas as pd -import unittest - - -class RocketChatMessagesTableTest(unittest.TestCase): - def test_get_columns_returns_all_columns(self): - api_handler = Mock(RocketChatHandler) - messages_table = RocketChatMessagesTable(api_handler) - # Order matters. - expected_columns = [ - "id", - "room_id", - "bot_id", - "text", - "username", - "name", - "sent_at", - ] - self.assertListEqual(messages_table.get_columns(), expected_columns) - - def test_select_returns_all_columns(self): - api_handler = Mock(RocketChatHandler) - api_handler.call_rocket_chat_api.return_value = pd.DataFrame( - [ - [ - "message_id_1", # id - "GENERAL", # room_id - "bot_id_1", # bot_id - "YEWWWWW", # text - "shoresey", # username - "Shore Keeso", # name - "2023-05-05T00:31:46.825Z", # sent_at - ] - ] - ) - messages_table = RocketChatMessagesTable(api_handler) - - select_all = ast.Select( - targets=[Star()], from_table="channel_messages", where='room_id = "GENERAL"' - ) - - all_messages = messages_table.select(select_all) - first_message = all_messages.iloc[0] - - self.assertEqual(all_messages.shape[1], 7) - self.assertEqual(first_message["id"], "message_id_1") - self.assertEqual(first_message["room_id"], "GENERAL") - self.assertEqual(first_message["bot_id"], "bot_id_1") - self.assertEqual(first_message["text"], "YEWWWWW") - self.assertEqual(first_message["username"], "shoresey") - self.assertEqual(first_message["name"], "Shore Keeso") - self.assertEqual(first_message["sent_at"], "2023-05-05T00:31:46.825Z") - - def test_select_returns_only_selected_columns(self): - api_handler = Mock(RocketChatHandler) - api_handler.call_rocket_chat_api.return_value = pd.DataFrame( - [ - [ - "message_id_1", # id - "GENERAL", # room_id - "bot_id_1", # bot_id - "YEWWWWW", # text - "shoresey", # username - "Shore Keeso", # name - "2023-05-05T00:31:46.825Z", # sent_at - ] - ] - ) - messages_table = RocketChatMessagesTable(api_handler) - - room_id_identifier = Identifier(path_str="room_id") - text_identifier = Identifier(path_str="text") - select_basic = ast.Select( - targets=[room_id_identifier, text_identifier], - from_table="channel_messages", - where='room_id = "GENERAL"', - ) - - all_messages = messages_table.select(select_basic) - first_message = all_messages.iloc[0] - - self.assertEqual(all_messages.shape[1], 2) - self.assertEqual(first_message["room_id"], "GENERAL") - self.assertEqual(first_message["text"], "YEWWWWW") diff --git a/tests/unused/unit/handler_tests/test_sap_erp_handler.py b/tests/unused/unit/handler_tests/test_sap_erp_handler.py deleted file mode 100644 index 4568907c150..00000000000 --- a/tests/unused/unit/handler_tests/test_sap_erp_handler.py +++ /dev/null @@ -1,240 +0,0 @@ -import importlib -import os - -import pytest -from mindsdb_sql_parser import parse_sql - -from ..unit.executor_test_base import BaseExecutorTest - -try: - importlib.import_module("requests") - REQUESTS_INSTALLED = True -except ImportError: - REQUESTS_INSTALLED = False - - -@pytest.mark.skipif(not REQUESTS_INSTALLED, reason="requests package is not installed") -class TestSAPERPHandler(BaseExecutorTest): - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def setup_method(self): - super().setup_method() - self.base_url = os.environ.get("SAP_ERP_BASE_URL") - self.api_key = os.environ.get("SAP_ERP_API_KEY") - self.run_sql(f""" - CREATE DATABASE sap_datasource - WITH ENGINE = "sap_erp", - PARAMETERS = {{ - "api_key": '{self.api_key}', - "base_url": '{self.base_url}' - }}; - """) - - def test_basic_select_from(self): - sql = """ - SELECT * FROM sap_datasource.address_email_address; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.address_fax_number; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.address_home_page; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.address_phone_number; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.bp_addr_depdnt_intl_loc_number; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.bp_contact_to_address; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.bp_contact_to_func_and_dept; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.bp_credit_worthiness; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.bp_data_controller; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.bp_financial_services_extn; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.bp_financial_services_reporting; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.bp_fiscal_year_information; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.bp_relationship; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.bu_pa_address_usage; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.bu_pa_identification; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.bu_pa_industry; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.business_partner; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.business_partner_address; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.business_partner_contact; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.business_partner_payment_card; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.business_partner_rating; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.business_partner_role; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.business_partner_tax_number; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.business_partner_address_dependent_tax_number; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.cust_addr_depdnt_ext_identifier; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer_company; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer_company_text; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer_dunning; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer_sales_area; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer_sales_area_tax; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer_sales_area_text; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer_tax_grouping; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer_text; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer_unloading_point; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer_withholding_tax; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer_sales_partner_func; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer_sales_area_addr_depdnt_info; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer_sales_area_addr_depdnt_tax_info; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.customer_unloading_point_addr_depdnt_info; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.supplier; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.supplier_company; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.supplier_company_text; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.supplier_dunning; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.supplier_partner_func; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.supplier_purchasing_org; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.supplier_purchasing_org_text; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.supplier_text; - """ - self.run_sql(sql) - sql = """ - SELECT * FROM sap_datasource.supplier_withholding_tax; - """ - self.run_sql(sql) - - def test_complex_select(self): - sql = """ - SELECT AddressID FROM sap_datasource.address_email_address; - """ - assert self.run_sql(sql).shape[1] == 1 diff --git a/tests/unused/unit/handler_tests/test_storage_handler.py b/tests/unused/unit/handler_tests/test_storage_handler.py deleted file mode 100644 index 1aba0fd5a35..00000000000 --- a/tests/unused/unit/handler_tests/test_storage_handler.py +++ /dev/null @@ -1,23 +0,0 @@ -import unittest - -from mindsdb.utilities.config import Config -from mindsdb.integrations.libs.storage_handler import RedisStorageHandler, SqliteStorageHandler - - -class StorageHandlerTest(unittest.TestCase): - def test_1_redis_storage(self): - store = RedisStorageHandler({'test_1_redis_storage': 'value'}, config={'host': 'localhost', 'port': '6379'}) - store.set('test_key', 42) - self.assertTrue(store.get('test_key') == 42) - self.assertRaises(Exception, store.get('test_key2')) - - def test_2_sqlite_storage(self): - config = Config() - name = 'test_2_sqlite_storage' - store = SqliteStorageHandler(context=name, config={ - 'path': config['paths']['root'], - 'name': name - }) - store.set('test_key', 42) - self.assertTrue(store.get('test_key') == 42) - self.assertRaises(Exception, store.get('test_key2')) diff --git a/tests/unused/unit/handler_tests/test_symbl_handler.py b/tests/unused/unit/handler_tests/test_symbl_handler.py deleted file mode 100644 index 2911d433674..00000000000 --- a/tests/unused/unit/handler_tests/test_symbl_handler.py +++ /dev/null @@ -1,57 +0,0 @@ -import importlib -import os -import pytest -from mindsdb_sql_parser import parse_sql - -from ..unit.executor_test_base import BaseExecutorTest - -try: - importlib.import_module("symbl") - SYMBL_INSTALLED = True -except ImportError: - SYMBL_INSTALLED = False - - -@pytest.mark.skipif(not SYMBL_INSTALLED, reason="symbl package is not installed") -class TestSymblAPIHandler(BaseExecutorTest): - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def setup_method(self): - super().setup_method() - self.app_id = os.environ.get("SYMBL_APP_ID") - self.app_secret = os.environ.get("SYMBL_APP_SECRET") - self.run_sql(f""" - CREATE DATABASE mindsdb_symbl - WITH ENGINE = 'symbl', - PARAMETERS = { - "app_id": '{self.app_id}', - "app_secret": '{self.app_secret}' - }; - """) - - def test_basic_select_from(self): - - conversation_id = "5682305049034752" - - sql = f'SELECT * FROM mindsdb_symbl.get_messages where conversation_id="{conversation_id}"' - self.run_sql(sql) - - sql = f'SELECT * FROM mindsdb_symbl.get_topics where conversation_id="{conversation_id}"' - self.run_sql(sql) - - sql = f'SELECT * FROM mindsdb_symbl.get_questions where conversation_id="{conversation_id}"' - self.run_sql(sql) - - sql = f'SELECT * FROM mindsdb_symbl.get_analytics where conversation_id="{conversation_id}"' - self.run_sql(sql) - - sql = f'SELECT * FROM mindsdb_symbl.get_action_items where conversation_id="{conversation_id}"' - self.run_sql(sql) - - sql = f'SELECT * FROM mindsdb_symbl.get_follow_ups where conversation_id="{conversation_id}"' - self.run_sql(sql) diff --git a/tests/unused/unit/handler_tests/test_tripadvisor_handler.py b/tests/unused/unit/handler_tests/test_tripadvisor_handler.py deleted file mode 100644 index 51d7a02b833..00000000000 --- a/tests/unused/unit/handler_tests/test_tripadvisor_handler.py +++ /dev/null @@ -1,324 +0,0 @@ -from mindsdb.integrations.handlers.tripadvisor_handler.tripadvisor_handler import ( - TripAdvisorHandler, -) - -from mindsdb.integrations.handlers.tripadvisor_handler.tripadvisor_table import ( - SearchLocationTable, - LocationDetailsTable, - ReviewsTable, - PhotosTable, - NearbyLocationTable, -) - -from mindsdb_sql_parser import ast -from mindsdb_sql_parser.ast.select.star import Star -from mindsdb_sql_parser.ast.select.identifier import Identifier - -from unittest.mock import Mock - -import pandas as pd -import unittest - - -class SearchLocationTableTest(unittest.TestCase): - def test_get_columns_returns_all_columns(self): - api_handler = Mock(TripAdvisorHandler) - trades_table = SearchLocationTable(api_handler) - # Order matters. - expected_columns = [ - "location_id", - "name", - "distance", - "rating", - "bearing", - "street1", - "street2", - "city", - "state", - "country", - "postalcode", - "address_string", - "phone", - "latitude", - "longitude", - ] - self.assertListEqual(trades_table.get_columns(), expected_columns) - - def test_select_returns_some_columns(self): - api_handler = Mock(TripAdvisorHandler) - api_handler.call_tripadvisor_searchlocation_api.return_value = pd.DataFrame( - [ - [ - "186338", # locationId - "London", # name - "United Kingdom", # country - "London England", # address_string - ] - ] - ) - tripadvisor_table = SearchLocationTable(api_handler) - - select_all = ast.Select( - targets=[Star()], - from_table="searchLocationTable", - where='searchQuery = "London"', - ) - - all_location_data = tripadvisor_table.select(select_all) - first_data = all_location_data.iloc[0] - - self.assertEqual(first_data["locationId"], "186338") - self.assertEqual(first_data["name"], "London") - self.assertEqual(first_data["country"], "United Kingdom") - self.assertEqual(first_data["address_string"], "London England") - - -class LocationDetailsTableTest(unittest.TestCase): - def test_get_columns_returns_all_columns(self): - api_handler = Mock(TripAdvisorHandler) - trades_table = LocationDetailsTable(api_handler) - # Order matters. - expected_columns = [ - "location_id", - "distance", - "name", - "description", - "web_url", - "street1", - "street2", - "city", - "state", - "country", - "postalcode", - "address_string", - "latitude", - "longitude", - "timezone", - "email", - "phone", - "website", - "write_review", - "ranking_data", - "rating", - "rating_image_url", - "num_reviews", - "photo_count", - "see_all_photos", - "price_level", - "brand", - "parent_brand", - "ancestors", - "periods", - "weekday", - "features", - "cuisines", - "amenities", - "trip_types", - "styles", - "awards", - "neighborhood_info", - "parent_brand", - "brand", - "groups", - ] - self.assertListEqual(trades_table.get_columns(), expected_columns) - - def test_select_returns_some_columns(self): - api_handler = Mock(TripAdvisorHandler) - api_handler.call_tripadvisor_searchlocation_api.return_value = pd.DataFrame( - [ - [ - "23322232", # locationId - "La Polleria Alicante", # name - "We are revolutionizing Alicante! Come and try the waffle that is on everyone's mouth. Come and get your #pollofre or #pollolo, we are waiting for you!", # description - "https://www.tripadvisor.com/Restaurant_Review-g1064230-d23322232-Reviews-La_Polleria_Alicante-Alicante_Costa_Blanca_Province_of_Alicante_Valencian_Commu.html?m=66827", # web_url - ] - ] - ) - - tripadvisor_table = LocationDetailsTable(api_handler) - - select_all = ast.Select( - targets=[Star()], - from_table="locationDetailsTable", - where='searchQuery = "23322232"', - ) - - all_location_data = tripadvisor_table.select(select_all) - first_data = all_location_data.iloc[0] - - self.assertEqual(first_data["locationId"], "23322232") - self.assertEqual(first_data["name"], "La Polleria Alicante") - self.assertEqual( - first_data["description"], - "We are revolutionizing Alicante! Come and try the waffle that is on everyone's mouth. Come and get your #pollofre or #pollolo, we are waiting for you!", - ) - self.assertEqual( - first_data["web_url"], - "https://www.tripadvisor.com/Restaurant_Review-g1064230-d23322232-Reviews-La_Polleria_Alicante-Alicante_Costa_Blanca_Province_of_Alicante_Valencian_Commu.html?m=66827", - ) - - -class ReviewTableTest(unittest.TestCase): - def test_get_columns_returns_all_columns(self): - api_handler = Mock(TripAdvisorHandler) - trades_table = ReviewsTable(api_handler) - # Order matters. - expected_columns = [ - "id", - "lang", - "location_id", - "published_date", - "rating", - "helpful_votes", - "rating_image_url", - "url", - "trip_type", - "travel_date", - "text_review", - "title", - "owner_response", - "is_machine_translated", - "user", - "subratings", - ] - self.assertListEqual(trades_table.get_columns(), expected_columns) - - def test_select_returns_some_columns(self): - api_handler = Mock(TripAdvisorHandler) - api_handler.call_tripadvisor_searchlocation_api.return_value = pd.DataFrame( - [ - [ - "921095426", # id - "en", # lang - "99288", # location_id - "2023-10-13T09:48:46Z", # published_date - "1", # rating - "Check in is at 4:00 - my room wasn't ready until 9:00. I had no A/C on my first night and it was too late for engineering to fix it. I was offered a $40 discount from some ridiculous hoteL fee - that was a joke on a 700/night stay. I would not recommend this place at all.", # text review - "POOR CHECK -IN - NO A/C AND NO DISCOUNT", # title - ] - ] - ) - tripadvisor_table = ReviewsTable(api_handler) - - id_identifier = Identifier(path_str='id') - lang_identifier = Identifier(path_str='en') - location_id_identifier = Identifier(path_str='location_id') - published_date_identifier = Identifier(path_str='published_date') - rating_identifier = Identifier(path_str='rating') - text_review_identifier = Identifier(path_str='room_id') - title_identifier = Identifier(path_str='text') - select_all = ast.Select( - targets=[id_identifier, lang_identifier, location_id_identifier, published_date_identifier, rating_identifier, text_review_identifier, title_identifier], - from_table="reviewsTable", - where='locationId = "99288"', - ) - - review_data = tripadvisor_table.select(select_all) - first_data = review_data.iloc[0] - - self.assertEqual(review_data.shape[1], 7) - self.assertEqual(first_data["id"], "921095426") - self.assertEqual(first_data["en"], "en") - self.assertEqual(first_data["rating"], "1") - self.assertEqual(first_data["title"], "POOR CHECK -IN - NO A/C AND NO DISCOUNT") - - -class PhotosTableTest(unittest.TestCase): - def test_get_columns_returns_all_columns(self): - api_handler = Mock(TripAdvisorHandler) - trades_table = PhotosTable(api_handler) - # Order matters. - expected_columns = [ - "id", - "is_blessed", - "album", - "caption", - "published_date", - "images", - "source", - "user", - ] - self.assertListEqual(trades_table.get_columns(), expected_columns) - - def test_select_returns_some_columns(self): - api_handler = Mock(TripAdvisorHandler) - api_handler.call_tripadvisor_searchlocation_api.return_value = pd.DataFrame( - [ - [ - "673312657", # id - "false", # is_blessed - "Hotel & Grounds", # album - "{'name': 'Management', 'localized_name': 'Management'}", # source - ] - ] - ) - tripadvisor_table = PhotosTable(api_handler) - - id_identifier = Identifier(path_str='id') - is_blessed_identifier = Identifier(path_str='is_blessed') - album_identifier = Identifier(path_str='album') - source_identifier = Identifier(path_str='source') - select_all = ast.Select( - targets=[id_identifier, is_blessed_identifier, album_identifier, source_identifier], - from_table="photosTable", - where='locationId = "99288"', - ) - - all_location_data = tripadvisor_table.select(select_all) - first_data = all_location_data.iloc[0] - - self.assertEqual(all_location_data.shape[1], 4) - self.assertEqual(first_data["id"], "673312657") - self.assertEqual(first_data["is_blessed"], "false") - self.assertEqual(first_data["album"], "Hotel & Grounds") - self.assertEqual(first_data["source"], "{'name': 'Management', 'localized_name': 'Management'}") - - -class NearbySearchTableTest(unittest.TestCase): - def test_get_columns_returns_all_columns(self): - api_handler = Mock(TripAdvisorHandler) - trades_table = NearbyLocationTable(api_handler) - # Order matters. - expected_columns = [ - "location_id", - "name", - "distance", - "rating", - "bearing", - "address_obj", - ] - self.assertListEqual(trades_table.get_columns(), expected_columns) - - def test_select_returns_some_columns(self): - api_handler = Mock(TripAdvisorHandler) - api_handler.call_tripadvisor_searchlocation_api.return_value = pd.DataFrame( - [ - [ - "210108", # location_id - "American Museum of Natural History", # name - "0.039615104835680856", # distance - "{'street1': '79th Street', 'street2': 'Central Park West', 'city': 'New York City', 'state': 'New York', 'country': 'United States', 'postalcode': '10024', 'address_string': '79th Street Central Park West, New York City, NY 10024'}", # address_obj - ] - ] - ) - tripadvisor_table = NearbyLocationTable(api_handler) - - location_id_identifier = Identifier(path_str='location_id') - name_identifier = Identifier(path_str='name') - distance_identifier = Identifier(path_str='distance') - address_obj_identifier = Identifier(path_str='address_obj') - select_all = ast.Select( - targets=[location_id_identifier, name_identifier, distance_identifier, address_obj_identifier], - from_table="nearbyLocationTable", - where='latLong = "40.780825, -73.972781"', - ) - - all_location_data = tripadvisor_table.select(select_all) - first_data = all_location_data.iloc[0] - - self.assertEqual(all_location_data.shape[1], 4) - self.assertEqual(first_data["location_id"], "210108") - self.assertEqual(first_data["name"], "American Museum of Natural History") - self.assertEqual(first_data["distance"], "0.039615104835680856") - self.assertEqual(first_data["address_obj"], "{'street1': '79th Street', 'street2': 'Central Park West', 'city': 'New York City', 'state': 'New York', 'country': 'United States', 'postalcode': '10024', 'address_string': '79th Street Central Park West, New York City, NY 10024'}") diff --git a/tests/unused/unit/handler_tests/test_vectordatabase_dispatch.py b/tests/unused/unit/handler_tests/test_vectordatabase_dispatch.py deleted file mode 100644 index 3bf6b2d6b9a..00000000000 --- a/tests/unused/unit/handler_tests/test_vectordatabase_dispatch.py +++ /dev/null @@ -1,282 +0,0 @@ -from unittest.mock import Mock - -import pandas as pd -import pytest -from mindsdb_sql_parser import parse_sql -from pandas.testing import assert_frame_equal - -from mindsdb.integrations.libs.vectordatabase_handler import ( - FilterCondition, - FilterOperator, - VectorStoreHandler, -) - - -@pytest.fixture -def vector_store_handler(): - # patch the actual "execute" methods of the handler with Mock - vector_store_handler: VectorStoreHandler = VectorStoreHandler("test") - vector_store_handler.create_table = Mock() - vector_store_handler.drop_table = Mock() - vector_store_handler.insert = Mock() - vector_store_handler.update = Mock() - vector_store_handler.delete = Mock() - vector_store_handler.select = Mock() - - return vector_store_handler - - -def test_vectordatabase_parsing(vector_store_handler): - # create a table - # due to the limitation of the parser - # we can only create a table with a select statement - sql = """ - CREATE TABLE chroma_db.test_table ( - SELECT * - FROM chroma_db.test_table - ) - """ - query = parse_sql(sql) - vector_store_handler._dispatch(query) - vector_store_handler.create_table.assert_called_once() - vector_store_handler.create_table.assert_called_with( - "test_table", if_not_exists=False - ) - - # drop a table - sql = """ - DROP TABLE chroma_db.test_table - """ - query = parse_sql(sql) - vector_store_handler._dispatch(query) - vector_store_handler.drop_table.assert_called_once() - vector_store_handler.drop_table.assert_called_with("test_table", if_exists=False) - - # insert into a table - sql = """ - INSERT INTO chroma_db.test_table ( - id,content,metadata,embeddings - ) - VALUES ( - 1, 'test', '{"some_field": "some_value"}', '[1,2,3]' - ), - ( - 2, 'test', '{"some_field": "some_value"}', '[1,2,3]' - ) - """ - - data = pd.DataFrame( - { - "id": [1, 2], - "content": ["test", "test"], - "embeddings": [[1, 2, 3], [1, 2, 3]], - "metadata": [{"some_field": "some_value"}, {"some_field": "some_value"}], - } - ) - - query = parse_sql(sql) - vector_store_handler._dispatch(query) - vector_store_handler.insert.assert_called_once() - # get the args passed to the insert method - args, kwargs = vector_store_handler.insert.call_args - # get the data passed to the insert method - assert args[0] == "test_table" - data = args[1] - # assert the data is the same - assert_frame_equal(data, data) - assert kwargs["columns"] == ["id", "content", "metadata", "embeddings"] - - # select from a table - # select without filters - sql = """ - SELECT * - FROM chroma_db.test_table - """ - query = parse_sql(sql) - vector_store_handler._dispatch(query) - vector_store_handler.select.assert_called_once() - vector_store_handler.select.assert_called_with( - "test_table", - columns=["id", "content", "embeddings", "metadata"], - conditions=None, - offset=None, - limit=None, - ) - # select with search_vector filter - sql = """ - SELECT * - FROM chroma_db.test_table - WHERE search_vector = '[1, 2, 3]' - LIMIT 10 - """ - query = parse_sql(sql) - # reset the mock - vector_store_handler.select.reset_mock() - vector_store_handler._dispatch(query) - vector_store_handler.select.assert_called_once() - vector_store_handler.select.assert_called_with( - "test_table", - columns=["id", "content", "embeddings", "metadata"], - conditions=[ - FilterCondition( - column="search_vector", op=FilterOperator.EQUAL, value=[1, 2, 3] - ) - ], - limit=10, - offset=None, - ) - - # select with limit and offset - sql = """ - SELECT * - FROM chroma_db.test_table - LIMIT 10 - OFFSET 5 - """ - query = parse_sql(sql) - # reset the mock - vector_store_handler.select.reset_mock() - vector_store_handler._dispatch(query) - vector_store_handler.select.assert_called_once() - vector_store_handler.select.assert_called_with( - "test_table", - columns=["id", "content", "embeddings", "metadata"], - conditions=None, - limit=10, - offset=5, - ) - - # select with a subset of columns - sql = """ - SELECT id, content - FROM chroma_db.test_table - """ - query = parse_sql(sql) - # reset the mock - vector_store_handler.select.reset_mock() - vector_store_handler._dispatch(query) - vector_store_handler.select.assert_called_once() - vector_store_handler.select.assert_called_with( - "test_table", - columns=["id", "content"], - conditions=None, - limit=None, - offset=None, - ) - - # select with metadata filter - sql = """ - SELECT * - FROM chroma_db.test_table - WHERE metadata.created_at = '2021-01-01' - AND metadata.some_field in ('some_value', 'some_other_value') - AND search_vector = '[1, 2, 3]' - """ - query = parse_sql(sql) - # reset the mock - vector_store_handler.select.reset_mock() - vector_store_handler._dispatch(query) - vector_store_handler.select.assert_called_once() - vector_store_handler.select.assert_called_with( - "test_table", - columns=["id", "content", "embeddings", "metadata"], - conditions=[ - FilterCondition( - column="metadata.created_at", - op=FilterOperator.EQUAL, - value="2021-01-01", - ), - FilterCondition( - column="metadata.some_field", - op=FilterOperator.IN, - value=["some_value", "some_other_value"], - ), - FilterCondition( - column="search_vector", op=FilterOperator.EQUAL, value=[1, 2, 3] - ), - ], - limit=None, - offset=None, - ) - - # delete from a table - sql = """ - DELETE FROM chroma_db.test_table - WHERE id = 1 - """ - query = parse_sql(sql) - vector_store_handler._dispatch(query) - vector_store_handler.delete.assert_called_once() - vector_store_handler.delete.assert_called_with( - "test_table", - conditions=[FilterCondition(column="id", op=FilterOperator.EQUAL, value=1)], - ) - - -def test_unsupported_ops(vector_store_handler): - # select unsupported columns - sql = """ - SELECT id, some_column_not_supported - FROM chroma_db.test_table - """ - query = parse_sql(sql) - with pytest.raises(Exception) as e: - vector_store_handler._dispatch(query) - assert "not allowed" in str(e.value) - - # insert unsupported columns - sql = """ - INSERT INTO chroma_db.test_table ( - id, some_column_not_supported - ) - VALUES ( - 1, 'test' - ) - """ - query = parse_sql(sql) - with pytest.raises(Exception) as e: - vector_store_handler._dispatch(query) - assert "not allowed" in str(e.value) - - # unsupported filter - sql = """ - SELECT * - FROM chroma_db.test_table - WHERE metadata.created_at > '2021-01-01' - AND unknown_column = 'some_value' - """ - query = parse_sql(sql) - # reset the mock - vector_store_handler.select.reset_mock() - vector_store_handler._dispatch(query) - vector_store_handler.select.assert_called_once() - vector_store_handler.select.assert_called_with( - "test_table", - columns=["id", "content", "embeddings", "metadata"], - conditions=[ - FilterCondition( - column="metadata.created_at", - op=FilterOperator.GREATER_THAN, - value="2021-01-01", - ), - FilterCondition( - column="metadata.unknown_column", # we will treat this as a metadata filter - op=FilterOperator.EQUAL, - value="some_value", - ), - ], - limit=None, - offset=None, - ) - - -@pytest.mark.xfail(reason="not implemented yet") -def test_unimplemented_yet(vector_store_handler): - # select count(*) is not implemented yet - sql = """ - SELECT count(*) - FROM chroma_db.test_table - """ - query = parse_sql(sql) - with pytest.raises(Exception): - vector_store_handler._dispatch(query) diff --git a/tests/unused/unit/handler_tests/test_weaviate_handler.py b/tests/unused/unit/handler_tests/test_weaviate_handler.py deleted file mode 100644 index cf71e1dbc7f..00000000000 --- a/tests/unused/unit/handler_tests/test_weaviate_handler.py +++ /dev/null @@ -1,503 +0,0 @@ -# check if weaviate is installed -import re -import importlib -from unittest.mock import patch -import tempfile -import psutil -import pandas as pd -import pytest -from mindsdb_sql_parser import parse_sql - -from ..unit.executor_test_base import BaseExecutorTest - -try: - importlib.import_module("weaviate") - WEAVIATE_INSTALLED = True -except ImportError: - WEAVIATE_INSTALLED = False - - -@pytest.mark.skipif(not WEAVIATE_INSTALLED, reason="weaviate is not installed") -class TestWeaviateHandler(BaseExecutorTest): - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def setup_method(self): - super().setup_method() - # create a weaviate database connection - tmp_directory = tempfile.mkdtemp() - self.run_sql( - f""" - CREATE DATABASE weaviate_test - WITH ENGINE = "weaviate", - PARAMETERS = {{ - "persistence_directory": "{tmp_directory}" - }} - """ - ) - - @staticmethod - def teardown_class(cls): - super().teardown_class(cls) - for proc in psutil.process_iter(): - # check whether the process name matches (kill orphan processes) - if re.search("weaviate*", proc.name()): - proc.kill() - - @pytest.mark.xfail(reason="create table for vectordatabase is not well supported") - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_create_table(self, postgres_handler_mock): - # create an empty table - sql = """ - CREATE TABLE weaviate_test.test_table; - """ - self.run_sql(sql) - - # create a table with the schema definition is not allowed - sql = """ - CREATE TABLE weaviate_test.test_table ( - id int, - metadata text, - embedding float[] - ); - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_create_with_select(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": [ - "6af613b6-569c-5c22-9c37-2ed93f31d3af", - "b04965e6-a9bb-591f-8f8a-1adcb2c8dc39", - ], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - - self.set_handler(postgres_handler_mock, "weaviate", tables={"test_table2": df}) - - sql = """ - CREATE TABLE weaviate_test.test_table2 ( - SELECT * FROM weaviate.df - ) - """ - # this should work - self.run_sql(sql) - - @pytest.mark.xfail(reason="drop table for vectordatabase is not working") - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_drop_table(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": [ - "4b166dbe-d99d-5091-abdd-95b83330ed3a", - "98123fde-012f-5ff3-8b50-881449dac91a", - ], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "weaviate", tables={"test_table3": df}) - - # create a table - sql = """ - CREATE TABLE weaviate_test.test_table3 ( - SELECT * FROM weaviate.df - ) - """ - with pytest.raises(Exception): - self.run_sql(sql) - - # drop a table - sql = """ - DROP TABLE weaviate_test.test_table3; - """ - self.run_sql(sql) - - # drop a non existent table will raise an error - sql = """ - DROP TABLE weaviate_test.test_table4; - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_insert_into(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": [ - "6ed955c6-506a-5343-9be4-2c0afae02eef", - "c8691da2-158a-5ed6-8537-0e6f140801f2", - "a6c4fc8f-6950-51de-a9ae-2c519c465071", - ], - "content": ["this is a test", "this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}, {"test": "test3"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - df2 = pd.DataFrame( - { - "id": [ - "a9f96b98-dd44-5216-ab0d-dbfc6b262edf", - "e99caacd-6c45-5906-bd9f-b79e62f25963", - "e4d80b30-151e-51b5-9f4f-18a3b82718e6", - ], - "content": ["this is a test", "this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}, {"test": "test3"}], - "embeddings": [ - [1.0, 2.0, 3.0, 4.0], - [1.0, 2.0], - [1.0, 2.0, 3.0], - ], # different dimensions - } - ) - self.set_handler( - postgres_handler_mock, "weaviate", tables={"df": df, "df2": df2} - ) - num_record = df.shape[0] - - # create a table - sql = """ - CREATE TABLE weaviate_test.test_table5 ( - SELECT * FROM weaviate.df - ) - """ - self.run_sql(sql) - - # insert into a table with values - sql = """ - INSERT INTO weaviate_test.test_table5 ( - id,content,metadata,embeddings - ) - VALUES ( - '0159d6c7-973f-5e7a-a9a0-d195d0ea6fe2', 'this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0]' - ) - """ - self.run_sql(sql) - # check if the data is inserted - sql = """ - SELECT * FROM weaviate_test.test_table5 - WHERE id = '0159d6c7-973f-5e7a-a9a0-d195d0ea6fe2' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # insert without specifying id should also work - sql = """ - INSERT INTO weaviate_test.test_table5 ( - content,metadata,embeddings - ) - VALUES ( - 'this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0]' - ) - """ - self.run_sql(sql) - # check if the data is inserted - sql = """ - SELECT * FROM weaviate_test.test_table5 - """ - ret = self.run_sql(sql) - assert ret.shape[0] == num_record + 2 - - # insert into a table with a select statement - sql = """ - INSERT INTO weaviate_test.test_table5 ( - content,metadata,embeddings - ) - SELECT - content,metadata,embeddings - FROM - weaviate.df - """ - self.run_sql(sql) - # check if the data is inserted - sql = """ - SELECT * FROM weaviate_test.test_table5 - """ - ret = self.run_sql(sql) - assert ret.shape[0] == num_record * 2 + 2 - - # insert into a table with a select statement, but wrong columns - with pytest.raises(Exception): - sql = """ - INSERT INTO weaviate_test.test_table5 - SELECT - content,metadata,embeddings as wrong_column - FROM - weaviate.df - """ - self.run_sql(sql) - - # insert into a table with a select statement, missing metadata column - sql = """ - INSERT INTO weaviate_test.test_table5 - SELECT - content,embeddings - FROM - weaviate.df - """ - self.run_sql(sql) - - # insert into a table with a select statement, missing embedding column, shall raise an error - with pytest.raises(Exception): - sql = """ - INSERT INTO weaviate_test.test_table5 - SELECT - content,metadata - FROM - weaviate.df - """ - self.run_sql(sql) - - # insert into a table with a select statement, with different embedding dimensions, shall raise an error - sql = """ - INSERT INTO weaviate_test.test_table5 - SELECT - content,metadata,embeddings - FROM - weaviate.df2 - """ - with pytest.raises(Exception): - self.run_sql(sql) - - # insert into a table with existing id, shall raise an error - sql = """ - INSERT INTO weaviate_test.test_table5 ( - id,content,metadata,embeddings - ) - VALUES ( - '6ed955c6-506a-5343-9be4-2c0afae02eef', 'this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0]' - ) - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_select_from(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": [ - "7fef88f7-411d-5669-b42d-bf5fc7f9b58b", - "52524d6e-10dc-5261-aa36-8b2efcbaa5f0", - ], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "weaviate", tables={"test_table6": df}) - # create a table - sql = """ - CREATE TABLE weaviate_test.test_table6 ( - SELECT * FROM weaviate.df - ) - """ - self.run_sql(sql) - - # query a table without any filters - sql = """ - SELECT * FROM weaviate_test.test_table6 - """ - self.run_sql(sql) - - # query a table with id - sql = """ - SELECT * FROM weaviate_test.test_table6 - WHERE id = '7fef88f7-411d-5669-b42d-bf5fc7f9b58b' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # query a table with a search vector, without limit - sql = """ - SELECT * FROM weaviate_test.test_table6 - WHERE search_vector = '[1.0, 2.0, 3.0]' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # query a table with a search vector, with limit - sql = """ - SELECT * FROM weaviate_test.test_table6 - WHERE search_vector = '[1.0, 2.0, 3.0]' - LIMIT 1 - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # query a table with a metadata filter - sql = """ - SELECT * FROM weaviate_test.test_table6 - WHERE `metadata.test` = 'test' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # query a table with a metadata filter and a search vector - sql = """ - SELECT * FROM weaviate_test.test_table6 - WHERE `metadata.test` = 'test' - AND search_vector = '[1.0, 2.0, 3.0]' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - @pytest.mark.xfail(reason="upsert for vectordatabase is not implemented") - def test_update(self): - # update a table with a metadata filter - sql = """ - UPDATE weaviate_test.test_table6 - SET `metadata.test` = 'test2' - WHERE `metadata.test` = 'test' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM weaviate_test.test_table6 - WHERE `metadata.test` = 'test2' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # update the embeddings - sql = """ - UPDATE weaviate_test.test_table6 - SET embedding = [3.0, 2.0, 1.0] - WHERE `metadata.test` = 'test2' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM weaviate_test.test_table6 - WHERE `metadata.test` = 'test2' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - assert ret.embedding[0] == [3.0, 2.0, 1.0] - - # update multiple columns - sql = """ - UPDATE weaviate_test.test_table6 - SET `metadata.test` = 'test3', - embedding = [1.0, 2.0, 3.0] - content = 'this is a test' - WHERE `metadata.test` = 'test2' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM weaviate_test.test_table6 - WHERE `metadata.test` = 'test3' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - assert ret.embedding[0] == [1.0, 2.0, 3.0] - assert ret.content[0] == "this is a test" - - # update a table with a search vector filter is not allowed - sql = """ - UPDATE weaviate_test.test_table6 - SET `metadata.test = 'test2' - WHERE search_vector = [1.0, 2.0, 3.0] - """ - with pytest.raises(Exception): - self.run_sql(sql) - - # update a table without any filters is allowed - sql = """ - UPDATE weaviate_test.test_table6 - SET metadata.test = 'test3' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM weaviate_test.test_table6 - WHERE `metadata.test` = 'test3' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # update a table with a search vector filter and a metadata filter is not allowed - sql = """ - UPDATE weaviate_test.test_table6 - SET metadata.test = 'test3' - WHERE metadata.test = 'test2' - AND search_vector = [1.0, 2.0, 3.0] - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_delete(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": [ - "91c274f2-9a0d-5ce6-ac3d-7529f452df21", - "0ff1e264-520d-543a-87dd-181a491e667e", - ], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "weaviate", tables={"test_table7": df}) - - # create a table - sql = """ - CREATE TABLE weaviate_test.test_table7 ( - SELECT * FROM weaviate.df - ) - """ - self.run_sql(sql) - - # delete from a table with a metadata filter - sql = """ - DELETE FROM weaviate_test.test_table7 - WHERE `metadata.test` = 'test1' - """ - self.run_sql(sql) - # check if the data is deleted - sql = """ - SELECT * FROM weaviate_test.test_table7 - WHERE `metadata.test` = 'test2' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # delete by id - sql = """ - DELETE FROM weaviate_test.test_table7 - WHERE id = '0ff1e264-520d-543a-87dd-181a491e667e' - """ - self.run_sql(sql) - # check if the data is deleted - sql = """ - SELECT * FROM weaviate_test.test_table7 - WHERE id = '0ff1e264-520d-543a-87dd-181a491e667e' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 0 - - # delete from a table with a search vector filter is not allowed - sql = """ - DELETE FROM weaviate_test.test_table7 - WHERE search_vector = [1.0, 2.0, 3.0] - """ - with pytest.raises(Exception): - self.run_sql(sql) - - # delete from a table without any filters is not allowed - sql = """ - DELETE FROM weaviate_test.test_table7 - """ - with pytest.raises(Exception): - self.run_sql(sql) diff --git a/tests/unused/unit/handler_tests/test_webz_handler.py b/tests/unused/unit/handler_tests/test_webz_handler.py deleted file mode 100644 index 6cc22fc7e69..00000000000 --- a/tests/unused/unit/handler_tests/test_webz_handler.py +++ /dev/null @@ -1,227 +0,0 @@ -import unittest -from unittest.mock import Mock - -import pandas as pd -from mindsdb_sql_parser import parse_sql - -from mindsdb.integrations.handlers.webz_handler.webz_handler import WebzHandler -from mindsdb.integrations.handlers.webz_handler.webz_tables import ( - WebzPostsTable, - WebzReviewsTable, -) - -COLUMNS_POST = [ - "thread__uuid", - "thread__url", - "thread__site_full", - "thread__site", - "thread__site_section", - "thread__section_title", - "thread__title", - "thread__title_full", - "thread__published", - "thread__replies_count", - "thread__participants_count", - "thread__site_type", - "thread__main_image", - "thread__country", - "thread__site_categories", - "thread__social__facebook__likes", - "thread__social__facebook__shares", - "thread__social__facebook__comments", - "thread__social__gplus__shares", - "thread__social__pinterest__shares", - "thread__social__linkedin__shares", - "thread__social__stumbledupon__shares", - "thread__social__vk__shares", - "thread__performance_score", - "thread__domain_rank", - "thread__domain_rank_updated", - "thread__reach__per_million", - "thread__reach__page_views", - "thread__reach__updated", - "uuid", - "url", - "ord_in_thread", - "parent_url", - "author", - "published", - "title", - "text", - "language", - "external_links", - "external_images", - "rating", - "entities__persons", - "entities__organizations", - "entities__locations", - "crawled", -] - - -SAMPLE_POST = { - "thread__uuid": "e893796adad8a85e6ab5202ac34b5791c8fbb017", - "thread__url": "https://www.economist.com/business/2023/06/06/generative-ai-could-radically-alter-the-practice-of-law", - "thread__site_full": "https://www.economist.com", - "thread__site": "economist.com", -} - - -class WebzPostsTableTest(unittest.TestCase): - def test_get_columns_returns_all_columns(self): - webz_handler = Mock(WebzHandler) - posts_table = WebzPostsTable(webz_handler) - # Order matters. - expected_columns = COLUMNS_POST - self.assertListEqual(posts_table.get_columns(), expected_columns) - - def test_select_with_query_order_by_and_limit(self): - webz_handler = Mock(WebzHandler) - webz_handler.call_webz_api.return_value = pd.DataFrame([SAMPLE_POST]) - posts_table = WebzPostsTable(webz_handler) - query = parse_sql( - "SELECT * FROM posts WHERE query='language:english' ORDER BY posts.relevancy LIMIT 10", - ) - results = posts_table.select(query) - first_result = results.iloc[0] - webz_handler.call_webz_api.assert_called_once_with( - method_name="posts", - params={ - "q": "language:english", - "sort": "relevancy", - "order": "default", - "size": 10, - }, - ) - self.assertEqual(results.shape[1], len(COLUMNS_POST)) - for column in COLUMNS_POST: - self.assertEqual(first_result[column], SAMPLE_POST.get(column, None)) - - def test_select_with_targets(self): - webz_handler = Mock(WebzHandler) - webz_handler.call_webz_api.return_value = pd.DataFrame([SAMPLE_POST]) - posts_table = WebzPostsTable(webz_handler) - target_field = "thread__uuid" - query = parse_sql( - f"SELECT {target_field} FROM posts", - ) - results = posts_table.select(query) - first_result = results.iloc[0] - webz_handler.call_webz_api.assert_called_once_with( - method_name="posts", - params={}, - ) - self.assertEqual(results.shape[1], 1) - self.assertEqual(first_result[target_field], SAMPLE_POST[target_field]) - - def test_select_with_invalid_order_by_field_fails(self): - webz_handler = Mock(WebzHandler) - webz_handler.call_webz_api.return_value = pd.DataFrame([SAMPLE_POST]) - posts_table = WebzPostsTable(webz_handler) - query = parse_sql( - "SELECT thread__uuid FROM posts ORDER BY posts.invalid_field", - ) - with self.assertRaises(ValueError) as context: - posts_table.select(query) - self.assertEqual( - str(context.exception), "Order by unknown column invalid_field" - ) - - -COLUMNS_REVIEW = [ - "item__uuid", - "item__url", - "item__site_full", - "item__site", - "item__site_section", - "item__section_title", - "item__title", - "item__title_full", - "item__published", - "item__reviews_count", - "item__reviewers_count", - "item__main_image", - "item__country", - "item__site_categories", - "item__domain_rank", - "item__domain_rank_updated", - "uuid", - "url", - "ord_in_thread", - "author", - "published", - "title", - "text", - "language", - "external_links", - "rating", - "crawled", -] - - -SAMPLE_REVIEW = { - "item__uuid": "3bf76e1cee69da4e89ec88d5f0f23d0b66f4b5c6", - "item__url": "https://www.watsons.com.my/baking-soda-laundry-detergent/p/BP_56634", - "item__site_full": "www.watsons.com.my", - "uuid": "d6afd2c04c995f34d9003cea353d7f5e01fcce0b", -} - - -class WebzReviewsTableTest(unittest.TestCase): - def test_get_columns_returns_all_columns(self): - webz_handler = Mock(WebzHandler) - reviews_table = WebzReviewsTable(webz_handler) - expected_columns = COLUMNS_REVIEW - self.assertListEqual(reviews_table.get_columns(), expected_columns) - - def test_select_with_query_order_by_and_limit(self): - webz_handler = Mock(WebzHandler) - webz_handler.call_webz_api.return_value = pd.DataFrame([SAMPLE_REVIEW]) - reviews_table = WebzReviewsTable(webz_handler) - query = parse_sql( - "SELECT * FROM reviews WHERE query='language:english' ORDER BY reviews.reviews_count LIMIT 10", - ) - results = reviews_table.select(query) - first_result = results.iloc[0] - webz_handler.call_webz_api.assert_called_once_with( - method_name="reviews", - params={ - "q": "language:english", - "sort": "reviews_count", - "order": "default", - "size": 10, - }, - ) - self.assertEqual(results.shape[1], len(COLUMNS_REVIEW)) - for column in COLUMNS_REVIEW: - self.assertEqual(first_result[column], SAMPLE_REVIEW.get(column, None)) - - def test_select_with_targets(self): - webz_handler = Mock(WebzHandler) - webz_handler.call_webz_api.return_value = pd.DataFrame([SAMPLE_REVIEW]) - reviews_table = WebzReviewsTable(webz_handler) - target_field = "uuid" - query = parse_sql( - f"SELECT {target_field} FROM reviews", - ) - results = reviews_table.select(query) - first_result = results.iloc[0] - webz_handler.call_webz_api.assert_called_once_with( - method_name="reviews", - params={}, - ) - self.assertEqual(results.shape[1], 1) - self.assertEqual(first_result[target_field], SAMPLE_REVIEW[target_field]) - - def test_select_with_invalid_order_by_field_fails(self): - webz_handler = Mock(WebzHandler) - webz_handler.call_webz_api.return_value = pd.DataFrame([SAMPLE_REVIEW]) - reviews_table = WebzReviewsTable(webz_handler) - query = parse_sql( - "SELECT item__uuid FROM reviews ORDER BY reviews.invalid_field", - ) - with self.assertRaises(ValueError) as context: - reviews_table.select(query) - self.assertEqual( - str(context.exception), "Order by unknown column invalid_field" - ) diff --git a/tests/unused/unit/handler_tests/test_xata_handler.py b/tests/unused/unit/handler_tests/test_xata_handler.py deleted file mode 100644 index c0192795f5b..00000000000 --- a/tests/unused/unit/handler_tests/test_xata_handler.py +++ /dev/null @@ -1,413 +0,0 @@ -import os -import importlib -from unittest.mock import patch - -import pandas as pd -import pytest -from mindsdb_sql_parser import parse_sql - -from mindsdb.tests.unit.executor_test_base import BaseExecutorTest - -try: - xata = importlib.import_module("xata") - XATA_INSTALLED = True -except ImportError: - XATA_INSTALLED = False - - -@pytest.mark.skipif(not XATA_INSTALLED, reason="xata is not installed") -class TestXetaHandler(BaseExecutorTest): - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def setup_method(self): - super().setup_method() - self.api_key = os.environ['XATA_TESTING_API_KEY'] - self.db_url = os.environ['XATA_TESTING_DB_URL'] - self.run_sql(f""" - CREATE DATABASE xata_test - WITH - ENGINE = 'xata', - PARAMETERS = {{ - "api_key": "{self.api_key}", - "db_url": "{self.db_url}", - "dimension": 3 - }}; - """) - self._client = xata.XataClient(api_key=self.api_key, db_url=self.db_url) - - def drop_table(self, table_name): - resp = self._client.table().delete(table_name) - if not resp.is_success(): - print(f"Unable to delete {table_name}: {resp['message']}") - - def get_num_records(self, table_name): - return self._client.search_and_filter().summarize(table_name, {"columns": [], "summaries": {"total": {"count": "*"}}})["summaries"][0]["total"] - - @pytest.mark.xfail(reason="create table for vectordatabase is not well supported") - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_create_table(self, postgres_handler_mock): - # create an empty table - sql = """CREATE TABLE xata_test.testingtable;""" - self.run_sql(sql) - # create a table with the schema definition is not allowed - sql = """ - CREATE TABLE xata_test.testingtable ( - id int, - metadata text, - embedding float[] - ); - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @pytest.mark.xfail(reason="drop table for vectordatabase is not working") - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_drop_table(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"testingtable": df}) - # create a table - sql = """ - CREATE TABLE xata_test.testingtable ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - # drop a table - sql = """ - DROP TABLE xata_test.testingtable; - """ - self.run_sql(sql) - - @pytest.mark.xfail(reason="update for vectordatabase is not implemented") - def test_update(self): - # update a table with a metadata filter - sql = """ - UPDATE xata_test.testingtable - SET metadata.test = 'test2' - WHERE metadata.test = 'test' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM xata_test.testingtable - WHERE metadata.test = 'test2' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - # update the embeddings - sql = """ - UPDATE xata_test.testingtable - SET embedding = '[3.0, 2.0, 1.0]' - WHERE metadata.test = 'test2' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM xata_test.testingtable - WHERE metadata.test = 'test2' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - assert ret.embedding[0] == [3.0, 2.0, 1.0] - # update multiple columns - sql = """ - UPDATE xata_test.testingtable - SET metadata.test = 'test3', - embedding = '[1.0, 2.0, 3.0]' - content = 'this is a test' - WHERE metadata.test = 'test2' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM xata_test.testingtable - WHERE metadata.test = 'test3' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - assert ret.embedding[0] == [1.0, 2.0, 3.0] - assert ret.content[0] == "this is a test" - # update a table with a search vector filter is not allowed - sql = """ - UPDATE xata_test.testingtable - SET metadata.test = 'test2' - WHERE search_vector = '[1.0, 2.0, 3.0]' - """ - with pytest.raises(Exception): - self.run_sql(sql) - # update a table without any filters is allowed - sql = """ - UPDATE xata_test.testingtable - SET metadata.test = 'test3' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM xata_test.testingtable - WHERE metadata.test = 'test3' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - # update a table with a search vector filter and a metadata filter is not allowed - sql = """ - UPDATE xata_test.testingtable - SET metadata.test = 'test3' - WHERE metadata.test = 'test2' - AND search_vector = '[1.0, 2.0, 3.0]' - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_create_with_select(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": ['{"test": "test"}', '{"test": "test"}'], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"testingtable": df}) - sql = """ - CREATE TABLE xata_test.testingtable (SELECT * FROM pg.df) - """ - self.drop_table("testingtable") - # this should work - self.run_sql(sql) - assert self.get_num_records("testingtable") == 2 - self.drop_table("testingtable") - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_insert_into(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2", "id3"], - "content": ["this is a test", "this is a test", "this is a test"], - "metadata": ['{"test": "test1"}', '{"test": "test2"}', '{"test": "test3"}'], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - df2 = pd.DataFrame( - { - "id": ["id4", "id5", "id6"], - "content": ["this is a test", "this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}, {"test": "test3"}], - "embeddings": [ - [1.0, 2.0, 3.0], - [1.0, 2.0, 4.0], - [5.0, 2.0, 3.0], - ], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"df": df, "df2": df2}) - # create a table - sql = """ - CREATE TABLE xata_test.testingtable (SELECT * FROM pg.df) - """ - self.drop_table("testingtable") - self.run_sql(sql) - # insert into a table with values - sql = """ - INSERT INTO xata_test.testingtable (id,content,metadata,embeddings) - VALUES ('some_unique_id', 'this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0]') - """ - self.run_sql(sql) - self.get_num_records("testingtable") == 4 - # insert without specifying id should also work - sql = """ - INSERT INTO xata_test.testingtable (content,metadata,embeddings) - VALUES ('this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0]') - """ - self.run_sql(sql) - self.get_num_records("testingtable") == 5 - # insert into a table with a select statement - sql = """ - INSERT INTO xata_test.testingtable (content,metadata,embeddings) - SELECT content,metadata,embeddings FROM pg.df2 - """ - self.run_sql(sql) - self.get_num_records("testingtable") == 8 - # insert into a table with a select statement, but wrong columns - with pytest.raises(Exception): - sql = """ - INSERT INTO xata_test.testingtable - SELECT (content,metadata,embeddings as wrong_column) FROM pg.df - """ - self.run_sql(sql) - # insert into a table with a select statement, missing metadata column - sql = """ - INSERT INTO xata_test.testingtable - SELECT content,embeddings FROM pg.df - """ - self.run_sql(sql) - self.get_num_records("testingtable") == 11 - # insert into a table with a select statement, with different embedding dimensions, shall raise an error - sql = """ - INSERT INTO xata_test.testingtable - VALUES ('this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0, 4.0]') - """ - with pytest.raises(Exception): - self.run_sql(sql) - self.drop_table("testingtable") - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_general_select_queries(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2", "id3", "id4", "id5", "id6"], - "content": ["test content", "test paragraph", "toast types", "", "tast misspelled", "hello"], - "metadata": ['{"price": 10}', '{"price": 100}', '{"price": 30}', '{"test": "test1"}', '{"test": "test2"}', '{"test": "test3"}'], - "embeddings": [[1.0, 2.0, 3.0], [5.0, 2.0, 8.0], [3.0, 6.0, 3.0], [1.0, 2.0, 3.0], [3.0, 1.0, 8.0], [1.0, 3.0, 7.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"testingtable": df}) - # create a table - sql = """ - CREATE TABLE xata_test.testingtable (SELECT * FROM pg.df) - """ - self.drop_table("testingtable") - self.run_sql(sql) - # query a table without any filters - sql = """ - SELECT * FROM xata_test.testingtable - """ - assert self.run_sql(sql).shape[0] == 6 - # query a table with limit - sql = """ - SELECT * FROM xata_test.testingtable - LIMIT 2 - """ - assert self.run_sql(sql).shape[0] == 2 - # query a table with id - sql = """ - SELECT * FROM xata_test.testingtable - WHERE id = 'id1' - """ - assert self.run_sql(sql).shape[0] == 1 - # query a table with a metadata filter - sql = """ - SELECT * FROM xata_test.testingtable - WHERE testingtable.metadata.test = 'test1' - """ - assert self.run_sql(sql).shape[0] == 1 - # query a table with a metadata complex filter - sql = """ - SELECT * FROM xata_test.testingtable - WHERE testingtable.metadata.price > 10 AND testingtable.metadata.price <= 100 - """ - assert self.run_sql(sql).shape[0] == 2 - # query a table with a content filter - sql = """ - SELECT * FROM xata_test.testingtable - WHERE content = 'test content' - """ - assert self.run_sql(sql).shape[0] == 1 - # query a table with a content filter - sql = """ - SELECT * FROM xata_test.testingtable - WHERE content = 'test content' - """ - assert self.run_sql(sql).shape[0] == 1 - # query a table with like operator - sql = """ - SELECT * FROM xata_test.testingtable - WHERE content LIKE 'test%' - """ - assert self.run_sql(sql).shape[0] == 2 - self.drop_table("testingtable") - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_vector_search(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2", "id3", "id4", "id5", "id6"], - "content": ["test content", "test paragraph", "toast types", "", "tast misspelled", "hello"], - "metadata": ['{"price": 10}', '{"price": 100}', '{"price": 30}', '{"test": "test1"}', '{"test": "test2"}', '{"test": "test3"}'], - "embeddings": [[1.0, 2.0, 3.0], [5.0, 2.0, 8.0], [3.0, 6.0, 3.0], [1.0, 2.0, 3.0], [3.0, 1.0, 8.0], [1.0, 3.0, 7.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"testingtable": df}) - # create a table - sql = """ - CREATE TABLE xata_test.testingtable ( - SELECT * FROM pg.df - ) - """ - self.drop_table("testingtable") - self.run_sql(sql) - # query a table with a search vector, without limit - sql = """ - SELECT * FROM xata_test.testingtable - WHERE search_vector = '[1.0, 2.0, 3.0]' - """ - assert self.run_sql(sql).shape[0] == 6 - # query a table with a search vector, with limit - sql = """ - SELECT * FROM xata_test.testingtable - WHERE search_vector = '[1.0, 2.0, 3.0]' - LIMIT 1 - """ - assert self.run_sql(sql).shape[0] == 1 - # query a table with a search vector, and content column - sql = """ - SELECT * FROM xata_test.testingtable - WHERE search_vector = '[1.0, 2.0, 3.0]' - AND content LIKE 'test%' - """ - assert self.run_sql(sql).shape[0] == 2 - # query a table with a metadata filter and a search vector does not work - sql = """ - SELECT * FROM xata_test.testingtable - WHERE metadata.price < 200 - AND search_vector = '[1.0, 2.0, 3.0]' - """ - with pytest.raises(Exception): - self.run_sql(sql) - self.drop_table("testingtable") - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_delete(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2", "id3", "id4", "id5", "id6"], - "content": ["test content", "test paragraph", "toast types", "", "tast misspelled", "hello"], - "metadata": ['{"price": 10}', '{"price": 100}', '{"price": 30}', '{"test": "test1"}', '{"test": "test2"}', '{"test": "test3"}'], - "embeddings": [[1.0, 2.0, 3.0], [5.0, 2.0, 8.0], [3.0, 6.0, 3.0], [1.0, 2.0, 3.0], [3.0, 1.0, 8.0], [1.0, 3.0, 7.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"testingtable": df}) - # create a table - sql = """ - CREATE TABLE xata_test.testingtable ( - SELECT * FROM pg.df - ) - """ - self.drop_table("testingtable") - self.run_sql(sql) - # delete by id - sql = """ - DELETE FROM xata_test.testingtable - WHERE id = 'id2' - """ - self.run_sql(sql) - self.get_num_records("testingtable") == 5 - # delete non existant passes - sql = """ - DELETE FROM xata_test.testingtable - WHERE id = 'id9' - """ - self.run_sql(sql) - self.drop_table("testingtable") diff --git a/tests/unused/unit/handler_tests/test_zipcodebase_handler.py b/tests/unused/unit/handler_tests/test_zipcodebase_handler.py deleted file mode 100644 index fa1c72992cf..00000000000 --- a/tests/unused/unit/handler_tests/test_zipcodebase_handler.py +++ /dev/null @@ -1,47 +0,0 @@ -import importlib -import os -import pytest -from mindsdb_sql_parser import parse_sql - -from ..unit.executor_test_base import BaseExecutorTest - -try: - importlib.import_module("requests") - REQUESTS_INSTALLED = True -except ImportError: - REQUESTS_INSTALLED = False - - -@pytest.mark.skipif(not REQUESTS_INSTALLED, reason="requests package is not installed") -class TestZipCodeBaseHandler(BaseExecutorTest): - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def setup_method(self): - super().setup_method() - self.api_key = os.environ.get("ZIPCODEBASE_API_KEY") - self.run_sql(f""" - CREATE DATABASE mindsdb_zipcodebase - WITH ENGINE = 'zipcodebase', - PARAMETERS = { - "api_key": '{self.api_key}' - }; - """) - - def test_basic_select_from(self): - sql = "SELECT * FROM mindsdb_zipcodebase.code_to_location where codes='10005';" - self.run_sql(sql) - - sql = 'SELECT * FROM mindsdb_zipcodebase.codes_within_radius WHERE code="10005" AND radius="100" AND country="us";' - self.run_sql(sql) - - def test_complex_select(self): - sql = 'SELECT state FROM mindsdb_zipcodebase.codes_within_radius WHERE code="10005" AND radius="100" AND country="us";' - assert self.run_sql(sql).shape[1] == 6 - - sql = "SELECT * FROM mindsdb_zipcodebase.code_to_location where codes='10005'; LIMIT 1;" - assert self.run_sql(sql).shape[0] == 1 diff --git a/tests/unused/unit/handler_tests/test_zotero_handler.py b/tests/unused/unit/handler_tests/test_zotero_handler.py deleted file mode 100644 index ceae6780595..00000000000 --- a/tests/unused/unit/handler_tests/test_zotero_handler.py +++ /dev/null @@ -1,175 +0,0 @@ -import unittest -from unittest.mock import Mock, patch -import pandas as pd -from mindsdb_sql_parser import ast -from mindsdb_sql_parser import parse_sql -from mindsdb.integrations.handlers.zotero_handler.zotero_handler import ZoteroHandler -from mindsdb.integrations.handlers.zotero_handler.zotero_tables import AnnotationsTable - - -class AnnotationsTableTest(unittest.TestCase): - def setUp(self): - self.api_handler = Mock(ZoteroHandler) - self.annotations_table = AnnotationsTable(self.api_handler) - - def test_get_columns_returns_all_columns(self): - expected_columns = [ - 'annotationColor', 'annotationComment', 'annotationPageLabel', 'annotationText', - 'annotationType', 'dateAdded', 'dateModified', 'key', 'parentItem', - 'relations', 'tags', 'version' - ] - self.assertListEqual(self.annotations_table.get_columns(), expected_columns) - - @patch.object(AnnotationsTable, '_get_items') - def test_select_returns_all_columns(self, mock_get_items): - mock_get_items.return_value = pd.DataFrame([ - { - 'annotationColor': 'red', - 'annotationComment': 'comment', - 'annotationPageLabel': 'page1', - 'annotationText': 'text', - 'annotationType': 'highlight', - 'dateAdded': '2023-01-01', - 'dateModified': '2023-01-02', - 'key': '12345', - 'parentItem': '67890', - 'relations': {}, - 'tags': [], - 'version': 1 - } - ]) - - select_all = ast.Select( - targets=[ast.Star()], - from_table='annotations' - ) - - result = self.annotations_table.select(select_all) - first_row = result.iloc[0] - - self.assertEqual(result.shape[1], 12) - self.assertEqual(first_row['annotationColor'], 'red') - self.assertEqual(first_row['annotationComment'], 'comment') - self.assertEqual(first_row['annotationPageLabel'], 'page1') - self.assertEqual(first_row['annotationText'], 'text') - self.assertEqual(first_row['annotationType'], 'highlight') - self.assertEqual(first_row['dateAdded'], '2023-01-01') - self.assertEqual(first_row['dateModified'], '2023-01-02') - self.assertEqual(first_row['key'], '12345') - self.assertEqual(first_row['parentItem'], '67890') - self.assertEqual(first_row['relations'], {}) - self.assertEqual(first_row['tags'], []) - self.assertEqual(first_row['version'], 1) - - @patch.object(AnnotationsTable, '_get_item') - def test_select_with_conditions_item_id(self, mock_get_item): - mock_get_item.return_value = pd.DataFrame([ - { - 'annotationColor': 'blue', - 'annotationComment': 'another comment', - 'annotationPageLabel': 'page2', - 'annotationText': 'another text', - 'annotationType': 'underline', - 'dateAdded': '2023-03-01', - 'dateModified': '2023-03-02', - 'key': '54321', - 'parentItem': '09876', - 'relations': {}, - 'tags': [], - 'version': 2 - } - ]) - - select_query = parse_sql('SELECT * FROM annotations WHERE item_id = "12345"') - - result = self.annotations_table.select(select_query) - first_row = result.iloc[0] - - self.assertEqual(result.shape[1], 12) - self.assertEqual(first_row['annotationColor'], 'blue') - self.assertEqual(first_row['annotationComment'], 'another comment') - self.assertEqual(first_row['annotationPageLabel'], 'page2') - self.assertEqual(first_row['annotationText'], 'another text') - self.assertEqual(first_row['annotationType'], 'underline') - self.assertEqual(first_row['dateAdded'], '2023-03-01') - self.assertEqual(first_row['dateModified'], '2023-03-02') - self.assertEqual(first_row['key'], '54321') - self.assertEqual(first_row['parentItem'], '09876') - self.assertEqual(first_row['relations'], {}) - self.assertEqual(first_row['tags'], []) - self.assertEqual(first_row['version'], 2) - - @patch.object(AnnotationsTable, '_get_item_children') - def test_select_with_conditions_parent_item_id(self, mock_get_item_children): - mock_get_item_children.return_value = pd.DataFrame([ - { - 'annotationColor': 'green', - 'annotationComment': 'yet another comment', - 'annotationPageLabel': 'page3', - 'annotationText': 'yet another text', - 'annotationType': 'strikeout', - 'dateAdded': '2023-05-01', - 'dateModified': '2023-05-02', - 'key': '98765', - 'parentItem': '43210', - 'relations': {}, - 'tags': [], - 'version': 3 - } - ]) - - select_query = parse_sql('SELECT * FROM annotations WHERE parent_item_id = "67890"') - - result = self.annotations_table.select(select_query) - first_row = result.iloc[0] - - self.assertEqual(result.shape[1], 12) - self.assertEqual(first_row['annotationColor'], 'green') - self.assertEqual(first_row['annotationComment'], 'yet another comment') - self.assertEqual(first_row['annotationPageLabel'], 'page3') - self.assertEqual(first_row['annotationText'], 'yet another text') - self.assertEqual(first_row['annotationType'], 'strikeout') - self.assertEqual(first_row['dateAdded'], '2023-05-01') - self.assertEqual(first_row['dateModified'], '2023-05-02') - self.assertEqual(first_row['key'], '98765') - self.assertEqual(first_row['parentItem'], '43210') - self.assertEqual(first_row['relations'], {}) - self.assertEqual(first_row['tags'], []) - self.assertEqual(first_row['version'], 3) - - -class ZoteroHandlerTest(unittest.TestCase): - - @patch('pyzotero.zotero.Zotero') - def setUp(self, mock_zotero): - self.mock_zotero = mock_zotero - connection_data = { - 'library_id': 'test_lib_id', - 'library_type': 'user', - 'api_key': 'test_api_key' - } - self.handler = ZoteroHandler(connection_data=connection_data) - self.handler.connect() - - def test_connect(self): - self.handler.connect() - self.mock_zotero.assert_called_once_with( - 'test_lib_id', 'user', 'test_api_key' - ) - self.assertTrue(self.handler.is_connected) - - def test_check_connection_success(self): - self.handler.connect = Mock(return_value=None) - self.handler.is_connected = True - response = self.handler.check_connection() - self.assertTrue(response.success) - - def test_check_connection_failure(self): - self.handler.connect = Mock(side_effect=Exception('Connection failed')) - response = self.handler.check_connection() - self.assertFalse(response.success) - self.assertIn('Error connecting to Zotero API', response.error_message) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/unused/unit/handlers/test_chromadb_handler.py b/tests/unused/unit/handlers/test_chromadb_handler.py deleted file mode 100644 index 5268c2ea441..00000000000 --- a/tests/unused/unit/handlers/test_chromadb_handler.py +++ /dev/null @@ -1,529 +0,0 @@ -import shutil -import tempfile -from unittest.mock import patch - -import pandas as pd -import pytest - -from tests.unit.executor_test_base import BaseExecutorTest - -# check if chroma_db is installed -import importlib - -try: - importlib.import_module("chromadb") - CHROMA_DB_INSTALLED = True -except ImportError: - CHROMA_DB_INSTALLED = False - - -@pytest.mark.skipif(not CHROMA_DB_INSTALLED, reason="chroma_db is not installed") -class TestChromaDBHandler(BaseExecutorTest): - - @pytest.fixture(autouse=True, scope="function") - def setup_method(self): - super().setup_method() - # create a chroma database under the tmp directory - tmp_directory = tempfile.mkdtemp() - self.run_sql( - f""" - CREATE DATABASE chroma_test - WITH ENGINE = "chromadb", - PARAMETERS = {{ - "persist_directory" : "{tmp_directory}" - }} - """ - ) - yield - # Teardown code: drop the database and remove the temporary directory - self.run_sql("DROP DATABASE chroma_test;") - shutil.rmtree(tmp_directory) - - @pytest.mark.xfail(reason="create table for vectordatabase is not well supported") - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_create_table(self, postgres_handler_mock): - # create an empty table - sql = """ - CREATE TABLE chroma_test.test_table; - """ - self.run_sql(sql) - - # create a table with the schema definition is not allowed - - sql = """ - CREATE TABLE chroma_test.test_table ( - id int, - metadata text, - embedding float[] - ); - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_create_with_select(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - - self.set_handler(postgres_handler_mock, "pg", tables={"test_table": df}) - - sql = """ - CREATE TABLE chroma_test.test_table2 ( - SELECT * FROM pg.df - ) - """ - # this should work - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_drop_table(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"test_table": df}) - - # create a table - sql = """ - CREATE TABLE chroma_test.test_table ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - # drop a table - sql = """ - DROP TABLE chroma_test.test_table; - """ - self.run_sql(sql) - - # drop a non existent table will raise an error - sql = """ - DROP TABLE chroma_test.test_table2; - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_insert_into(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2", "id3"], - "content": ["this is a test", "this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}, {"test": "test3"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - df2 = pd.DataFrame( - { - "id": ["id1", "id2", "id3"], - "content": ["this is a test", "this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}, {"test": "test3"}], - "embeddings": [ - [1.0, 2.0, 3.0, 4.0], - [1.0, 2.0], - [1.0, 2.0, 3.0], - ], # different dimensions - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"df": df, "df2": df2}) - num_record = df.shape[0] - - # create a table - sql = """ - CREATE TABLE chroma_test.test_table ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - # insert into a table with values - sql = """ - INSERT INTO chroma_test.test_table ( - id,content,metadata,embeddings - ) - VALUES ( - 'some_unique_id', 'this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0]' - ) - """ - self.run_sql(sql) - # check if the data is inserted - sql = """ - SELECT * FROM chroma_test.test_table - WHERE id = 'some_unique_id' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # insert without specifying id should also work - sql = """ - INSERT INTO chroma_test.test_table ( - content,metadata,embeddings - ) - VALUES ( - 'this is a test 0', '{"test": "test"}', '[1.0, 2.0, 3.0]' - ) - """ - self.run_sql(sql) - # check if the data is inserted - sql = """ - SELECT * FROM chroma_test.test_table - """ - ret = self.run_sql(sql) - assert ret.shape[0] == num_record + 2 - - # insert into a table with a select statement - sql = """ - INSERT INTO chroma_test.test_table ( - content,metadata,embeddings - ) - SELECT - content,metadata,embeddings - FROM - pg.df - """ - self.run_sql(sql) - # check if the data is inserted - sql = """ - SELECT * FROM chroma_test.test_table - """ - ret = self.run_sql(sql) - assert ret.shape[0] == num_record + 3 # only one unique record was added - - # insert into a table with a select statement, but wrong columns - with pytest.raises(Exception): - sql = """ - INSERT INTO chroma_test.test_table - SELECT - content,metadata,embeddings as wrong_column - FROM - pg.df - """ - self.run_sql(sql) - - # insert into a table with a select statement, missing metadata column - sql = """ - INSERT INTO chroma_test.test_table - SELECT - content,embeddings - FROM - pg.df - """ - self.run_sql(sql) - - # insert into a table with a select statement, missing embedding column, shall raise an error - with pytest.raises(Exception): - sql = """ - INSERT INTO chroma_test.test_table - SELECT - content,metadata - FROM - pg.df - """ - self.run_sql(sql) - - # insert into a table with a select statement, with different embedding dimensions, shall raise an error - sql = """ - INSERT INTO chroma_test.test_table - SELECT - content,metadata,embeddings - FROM - pg.df2 - """ - with pytest.raises(Exception): - self.run_sql(sql) - - # TODO: this behavior is not consistent with chromadb doc - # tracked in https://github.com/chroma-core/chroma/issues/1062 - # insert into a table with existing id, shall raise an error - # sql = """ - # INSERT INTO chroma_test.test_table ( - # id,content,metadata,embeddings - # ) - # VALUES ( - # 'id1', 'this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0]' - # ) - # """ - # with pytest.raises(Exception): - # self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_select_from(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test"}, {"test": "test"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"test_table": df}) - # create a table - sql = """ - CREATE TABLE chroma_test.test_table ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - # query a table without any filters - sql = """ - SELECT * FROM chroma_test.test_table - """ - self.run_sql(sql) - - # query a table with id - sql = """ - SELECT * FROM chroma_test.test_table - WHERE id = 'id1' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # query a table with a search vector, without limit - sql = """ - SELECT * FROM chroma_test.test_table - WHERE search_vector = '[1.0, 2.0, 3.0]' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # query a table with a search vector, with limit - sql = """ - SELECT * FROM chroma_test.test_table - WHERE search_vector = '[1.0, 2.0, 3.0]' - LIMIT 1 - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # query a table with a metadata filter - sql = """ - SELECT * FROM chroma_test.test_table - WHERE `metadata.test` = 'test' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - # query a table with a metadata filter and a search vector - sql = """ - SELECT * FROM chroma_test.test_table - WHERE `metadata.test` = 'test' - AND search_vector = '[1.0, 2.0, 3.0]' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 2 - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_update(self, postgres_handler_mock): - - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": [{"ext_id": "1"}, {"ext_id": "2"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"test_table": df}) - - # create a table - sql = """ - CREATE TABLE chroma_test.test_table ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - # updating the collection with only embeddings and not content is not allowed - sql = """ - UPDATE chroma_test.test_table - SET embeddings = '[3.0, 2.0, 1.0]', - id = 'id1' - """ - - with pytest.raises(Exception): - self.run_sql(sql) - - # updating the collection with only content and not embeddings is not allowed - sql = """ - UPDATE chroma_test.test_table - SET content = 'blah blah', - id = 'id1' - """ - - with pytest.raises(Exception): - self.run_sql(sql) - - # update multiple columns - sql = """ - UPDATE chroma_test.test_table - SET id = 'id1', - embeddings = '[1.0, 2.0, 3.0]', - content = 'this is a test' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM chroma_test.test_table - WHERE id = 'id1' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - assert ret.embeddings[0] == [1.0, 2.0, 3.0] - assert ret.content[0] == "this is a test" - - # update a table with a where clause is not allowed - sql = """ - UPDATE chroma_test.test_table - SET `metadata.test` = 'test2' - WHERE search_vector = [1.0, 2.0, 3.0] - """ - with pytest.raises(Exception): - self.run_sql(sql) - - # update a table with all columns - sql = """ - UPDATE chroma_test.test_table - SET id = 'id1', - embeddings = '[1.0, 2.0, 3.0]', - content = 'this is a test', - `metadata.ext_id` = '1' - """ - self.run_sql(sql) - # check if the data is updated - sql = """ - SELECT * FROM chroma_test.test_table - WHERE id = 'id1' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - assert ret.embeddings[0] == [1.0, 2.0, 3.0] - assert ret.content[0] == "this is a test" - assert ret.metadata[0] == {"ext_id": "1"} - - # update a table without providing a id is not allowed - sql = """ - UPDATE chroma_test.test_table - SET metadata.test = 'test3' - - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_delete(self, postgres_handler_mock): - df = pd.DataFrame( - { - "id": ["id1", "id2"], - "content": ["this is a test", "this is a test"], - "metadata": [{"test": "test1"}, {"test": "test2"}], - "embeddings": [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], - } - ) - self.set_handler(postgres_handler_mock, "pg", tables={"test_table": df}) - - # create a table - sql = """ - CREATE TABLE chroma_test.test_table ( - SELECT * FROM pg.df - ) - """ - self.run_sql(sql) - - # delete from a table with a metadata filter - sql = """ - DELETE FROM chroma_test.test_table - WHERE `metadata.test` = 'test1' - """ - self.run_sql(sql) - # check if the data is deleted - sql = """ - SELECT * FROM chroma_test.test_table - WHERE `metadata.test` = 'test2' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 1 - - # delete by id - sql = """ - DELETE FROM chroma_test.test_table - WHERE id = 'id2' - """ - self.run_sql(sql) - # check if the data is deleted - sql = """ - SELECT * FROM chroma_test.test_table - WHERE id = 'id2' - """ - ret = self.run_sql(sql) - assert ret.shape[0] == 0 - - # delete from a table with a search vector filter is not allowed - sql = """ - DELETE FROM chroma_test.test_table - WHERE search_vector = [1.0, 2.0, 3.0] - """ - with pytest.raises(Exception): - self.run_sql(sql) - - # delete from a table without any filters is not allowed - sql = """ - DELETE FROM chroma_test.test_table - """ - with pytest.raises(Exception): - self.run_sql(sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_insert_upsert_behavior(self, postgres_handler_mock): - # Initial data with IDs - df1 = pd.DataFrame({ - "id": ["id1", "id2"], - "content": ["content1", "content2"], - "metadata": [{"test": "test1"}, {"test": "test2"}], - "embeddings": [[1.0, 2.0], [2.0, 3.0]] - }) - - # Same IDs, different metadata - df2 = pd.DataFrame({ - "id": ["id1", "id2"], - "content": ["content1", "content2"], - "metadata": [{"test": "updated1"}, {"test": "updated2"}], - "embeddings": [[1.0, 2.0], [2.0, 3.0]] - }) - - self.set_handler(postgres_handler_mock, "pg", tables={"df1": df1, "df2": df2}) - - # Create table and insert initial data - self.run_sql(""" - CREATE TABLE chroma_test.test_table ( - SELECT * FROM pg.df1 - ) - """) - - # Verify initial insert - result = self.run_sql("SELECT * FROM chroma_test.test_table") - assert result.shape[0] == 2 - assert result.iloc[0].metadata == {"test": "test1"} - - # Insert same IDs with different metadata (should update) - self.run_sql(""" - INSERT INTO chroma_test.test_table ( - SELECT * FROM pg.df2 - ) - """) - - # Verify update - result = self.run_sql("SELECT * FROM chroma_test.test_table") - assert result.shape[0] == 2 # Should still have 2 rows - assert result.iloc[0].metadata == {"test": "updated1"} # Metadata should be updated diff --git a/tests/unused/unit/interfaces/agents/test_api_key_handling.py b/tests/unused/unit/interfaces/agents/test_api_key_handling.py index 484ba775577..8a45b931d6c 100644 --- a/tests/unused/unit/interfaces/agents/test_api_key_handling.py +++ b/tests/unused/unit/interfaces/agents/test_api_key_handling.py @@ -12,10 +12,9 @@ class TestAgentApiKeyHandling(unittest.TestCase): def setUp(self): """Set up test environment.""" # Mock environment variables - self.env_patcher = patch.dict(os.environ, { - 'OPENAI_API_KEY': 'test-env-api-key', - 'ANTHROPIC_API_KEY': 'test-env-anthropic-key' - }) + self.env_patcher = patch.dict( + os.environ, {"OPENAI_API_KEY": "test-env-api-key", "ANTHROPIC_API_KEY": "test-env-anthropic-key"} + ) self.env_patcher.start() def tearDown(self): @@ -25,43 +24,44 @@ def tearDown(self): def test_get_api_key_from_env(self): """Test retrieving API key from environment variables.""" # Test getting API key from environment variable - api_key = get_api_key('openai', {}) - self.assertEqual(api_key, 'test-env-api-key') + api_key = get_api_key("openai", {}) + self.assertEqual(api_key, "test-env-api-key") def test_get_api_key_from_args(self): """Test retrieving API key from create_args.""" # Test getting API key from create_args - api_key = get_api_key('openai', {'openai_api_key': 'test-args-api-key'}) - self.assertEqual(api_key, 'test-args-api-key') + api_key = get_api_key("openai", {"openai_api_key": "test-args-api-key"}) + self.assertEqual(api_key, "test-args-api-key") def test_get_api_key_from_params(self): """Test retrieving API key from params dictionary.""" # Test getting API key from params dictionary - api_key = get_api_key('openai', {'params': {'openai_api_key': 'test-params-api-key'}}) - self.assertEqual(api_key, 'test-params-api-key') + api_key = get_api_key("openai", {"params": {"openai_api_key": "test-params-api-key"}}) + self.assertEqual(api_key, "test-params-api-key") def test_get_api_key_priority(self): """Test API key retrieval priority.""" # Test that create_args takes priority over environment variables - api_key = get_api_key('openai', {'openai_api_key': 'test-args-api-key'}) - self.assertEqual(api_key, 'test-args-api-key') + api_key = get_api_key("openai", {"openai_api_key": "test-args-api-key"}) + self.assertEqual(api_key, "test-args-api-key") # Test that params takes priority over environment variables - api_key = get_api_key('openai', {'params': {'openai_api_key': 'test-params-api-key'}}) - self.assertEqual(api_key, 'test-params-api-key') + api_key = get_api_key("openai", {"params": {"openai_api_key": "test-params-api-key"}}) + self.assertEqual(api_key, "test-params-api-key") # Test that create_args takes priority over params - api_key = get_api_key('openai', { - 'openai_api_key': 'test-args-api-key', - 'params': {'openai_api_key': 'test-params-api-key'} - }) - self.assertEqual(api_key, 'test-args-api-key') - - @patch('mindsdb.interfaces.agents.agents_controller.AgentsController.check_model_provider') - @patch('mindsdb.interfaces.agents.agents_controller.AgentsController.get_agent') - @patch('mindsdb.interfaces.agents.agents_controller.ProjectController') - @patch('mindsdb.interfaces.storage.db.session') - def test_add_agent_with_api_key(self, mock_session, mock_project_controller, mock_get_agent, mock_check_model_provider): + api_key = get_api_key( + "openai", {"openai_api_key": "test-args-api-key", "params": {"openai_api_key": "test-params-api-key"}} + ) + self.assertEqual(api_key, "test-args-api-key") + + @patch("mindsdb.interfaces.agents.agents_controller.AgentsController.check_model_provider") + @patch("mindsdb.interfaces.agents.agents_controller.AgentsController.get_agent") + @patch("mindsdb.interfaces.agents.agents_controller.ProjectController") + @patch("mindsdb.interfaces.storage.db.session") + def test_add_agent_with_api_key( + self, mock_session, mock_project_controller, mock_get_agent, mock_check_model_provider + ): """Test adding an agent with an API key in params.""" # Mock project controller mock_project = MagicMock() @@ -71,36 +71,31 @@ def test_add_agent_with_api_key(self, mock_session, mock_project_controller, moc mock_get_agent.return_value = None # Mock check_model_provider to return a provider - mock_check_model_provider.return_value = (None, 'openai') + mock_check_model_provider.return_value = (None, "openai") # Create an instance of AgentsController agent_controller = AgentsController() # Test adding an agent with an API key in params - params = { - 'openai_api_key': 'test-agent-api-key', - 'other_param': 'value' - } + params = {"openai_api_key": "test-agent-api-key", "other_param": "value"} # Create a mock agent with proper params mock_agent = MagicMock() mock_agent.params = params.copy() # Set params directly # Mock db.Agents to return our prepared mock agent - with patch('mindsdb.interfaces.storage.db.Agents', return_value=mock_agent): + with patch("mindsdb.interfaces.storage.db.Agents", return_value=mock_agent): # Add the agent agent = agent_controller.add_agent( - name='test_agent', - project_name='mindsdb', - model_name='gpt-4', - skills=[], - provider='openai', - params=params + name="test_agent", + project_name="mindsdb", + model={"model_name": "gpt-4", "provider": "openai"}, + params=params, ) # Verify that the API key was preserved in the params - self.assertEqual(agent.params.get('openai_api_key'), 'test-agent-api-key') + self.assertEqual(agent.params.get("openai_api_key"), "test-agent-api-key") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/unused/unit/interfaces/skills/custom/text2sql/test_mindsdb_kb_tools.py b/tests/unused/unit/interfaces/skills/custom/text2sql/test_mindsdb_kb_tools.py deleted file mode 100644 index 468756c16bc..00000000000 --- a/tests/unused/unit/interfaces/skills/custom/text2sql/test_mindsdb_kb_tools.py +++ /dev/null @@ -1,163 +0,0 @@ -import unittest -from unittest.mock import MagicMock - -from typing import List, Dict, Any - -from mindsdb.interfaces.skills.custom.text2sql.mindsdb_kb_tools import ( - KnowledgeBaseListTool, - KnowledgeBaseInfoTool, - KnowledgeBaseQueryTool -) - - -class TestKnowledgeBaseTools(unittest.TestCase): - """Test cases for the MindsDB knowledge base tools.""" - - def setUp(self) -> None: - """Set up test fixtures.""" - self.mock_db = MagicMock() - - # Sample knowledge base data - self.kb_list_data: List[Dict[str, str]] = [ - {"name": "kb1"}, - {"name": "kb2"}, - {"name": "kb3"} - ] - - self.kb_schema_data: List[Dict[str, Any]] = [ - { - "name": "kb1", - "engine": "vector", - "embedding_model": "openai", - "storage": "qdrant", - "metadata_columns": ["product", "category"] - } - ] - - self.kb_sample_data: List[Dict[str, Any]] = [ - { - "id": "A1B", - "chunk_id": "A1B_notes:1of1:0to20", - "chunk_content": "Request color: black", - "metadata": {"product": "Wireless Mouse", "category": "Electronics"}, - "distance": 0.574, - "relevance": 0.509 - }, - { - "id": "Q7P", - "chunk_id": "Q7P_notes:1of1:0to22", - "chunk_content": "Prefer aluminum finish", - "metadata": {"product": "Aluminum Laptop Stand", "category": "Accessories"}, - "distance": 0.774, - "relevance": 0.250 - } - ] - - def test_knowledge_base_list_tool(self) -> None: - """Test the KnowledgeBaseListTool.""" - # Configure mock - self.mock_db.run.return_value = self.kb_list_data - - # Create tool instance - kb_list_tool = KnowledgeBaseListTool(db=self.mock_db) - - # Test tool execution - result = kb_list_tool._run("") - - # Verify results - self.assertEqual(result, "`kb1`, `kb2`, `kb3`") - self.mock_db.run.assert_called_once_with("SHOW KNOWLEDGE_BASES;") - - def test_knowledge_base_list_tool_empty_result(self) -> None: - """Test the KnowledgeBaseListTool with empty result.""" - # Configure mock - self.mock_db.run.return_value = [] - - # Create tool instance - kb_list_tool = KnowledgeBaseListTool(db=self.mock_db) - - # Test tool execution - result = kb_list_tool._run("") - - # Verify results - self.assertEqual(result, "No knowledge bases found.") - - def test_knowledge_base_info_tool(self) -> None: - """Test the KnowledgeBaseInfoTool.""" - # Configure mock - self.mock_db.run.side_effect = [self.kb_schema_data, self.kb_sample_data] - - # Create tool instance - kb_info_tool = KnowledgeBaseInfoTool(db=self.mock_db) - - # Test tool execution - result = kb_info_tool._run("$START$ `kb1` $STOP$") - - # Verify results - self.assertIn("## Knowledge Base: `kb1`", result) - self.assertIn("### Schema Information:", result) - self.assertIn("### Sample Data:", result) - self.assertEqual(self.mock_db.run.call_count, 2) - - def test_knowledge_base_info_tool_invalid_input(self) -> None: - """Test the KnowledgeBaseInfoTool with invalid input.""" - # Create tool instance - kb_info_tool = KnowledgeBaseInfoTool(db=self.mock_db) - - # Test tool execution with invalid input - result = kb_info_tool._run("invalid input") - - # Verify results - self.assertEqual( - result, - "No valid knowledge base names provided. Please provide names enclosed in backticks between $START$ and $STOP$." - ) - - def test_knowledge_base_query_tool(self) -> None: - """Test the KnowledgeBaseQueryTool.""" - # Configure mock - self.mock_db.run.return_value = self.kb_sample_data - - # Create tool instance - kb_query_tool = KnowledgeBaseQueryTool(db=self.mock_db) - - # Test tool execution - query = "SELECT * FROM kb1 WHERE content = 'color';" - result = kb_query_tool._run(f"$START$ {query} $STOP$") - - # Verify results - self.assertIn("| id | chunk_id | chunk_content | metadata | distance | relevance |", result) - self.mock_db.run.assert_called_once_with(query) - - def test_knowledge_base_query_tool_invalid_input(self) -> None: - """Test the KnowledgeBaseQueryTool with invalid input.""" - # Create tool instance - kb_query_tool = KnowledgeBaseQueryTool(db=self.mock_db) - - # Test tool execution with invalid input - result = kb_query_tool._run("invalid input") - - # Verify results - self.assertEqual( - result, - "No valid SQL query provided. Please provide a query between $START$ and $STOP$." - ) - - def test_knowledge_base_query_tool_empty_result(self) -> None: - """Test the KnowledgeBaseQueryTool with empty result.""" - # Configure mock - self.mock_db.run.return_value = [] - - # Create tool instance - kb_query_tool = KnowledgeBaseQueryTool(db=self.mock_db) - - # Test tool execution - query = "SELECT * FROM kb1 WHERE content = 'nonexistent';" - result = kb_query_tool._run(f"$START$ {query} $STOP$") - - # Verify results - self.assertEqual(result, "Query executed successfully, but no results were returned.") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unused/unit/ml_handlers/conftest.py b/tests/unused/unit/ml_handlers/conftest.py deleted file mode 100644 index 8ffd64431cb..00000000000 --- a/tests/unused/unit/ml_handlers/conftest.py +++ /dev/null @@ -1,24 +0,0 @@ -from pathlib import Path - -import pandas as pd -import pytest - -TEST_DATA_PATH = Path(__file__).parent.resolve() / "data" - - -def get_df(file_name: str, dtype: dict = None) -> pd.DataFrame: - return pd.read_csv(TEST_DATA_PATH / file_name, dtype=dtype) - - -@pytest.fixture -def lightfm_interaction_data() -> pd.DataFrame: - return get_df( - "ratings.csv", dtype={"userId": "str", "movieId": "str", "rating": "float64"} - ) - - -@pytest.fixture -def lightfm_item_data() -> pd.DataFrame: - return get_df( - "movies.csv", dtypes={"movieId": "str", "title": "str", "genres": "str"} - ) diff --git a/tests/unused/unit/ml_handlers/data/anomaly_detection.csv b/tests/unused/unit/ml_handlers/data/anomaly_detection.csv deleted file mode 100644 index 3aaa58d2eb4..00000000000 --- a/tests/unused/unit/ml_handlers/data/anomaly_detection.csv +++ /dev/null @@ -1,7 +0,0 @@ -carat,depth,table,x,y,z,category,class -0.23,61.5,55,3.95,3.98,2.43,"a",0 -0.21,59.8,61,3.89,3.84,2.31,"b",0 -0.23,56.9,65,4.05,4.07,2.31,"c",1 -0.29,62.4,58,4.2,4.23,2.63,"a",0 -0.31,63.3,58,4.34,4.35,2.75,"b",0 -0.31,64.3,28,4.24,4.35,2.75,"c",1 diff --git a/tests/unused/unit/ml_handlers/data/house_sales.csv b/tests/unused/unit/ml_handlers/data/house_sales.csv deleted file mode 100644 index 5eaf7a7304b..00000000000 --- a/tests/unused/unit/ml_handlers/data/house_sales.csv +++ /dev/null @@ -1,348 +0,0 @@ -saledate,ma,type,bedrooms -2007-09-30,441854,house,2 -2007-12-31,441854,house,2 -2008-03-31,441854,house,2 -2008-06-30,441854,house,2 -2008-09-30,451583,house,2 -2008-12-31,440256,house,2 -2009-03-31,442566,house,2 -2009-06-30,446113,house,2 -2009-09-30,440123,house,2 -2009-12-31,442131,house,2 -2010-03-31,459222,house,2 -2010-06-30,456822,house,2 -2010-09-30,457806,house,2 -2010-12-31,459109,house,2 -2011-03-31,460758,house,2 -2011-06-30,464788,house,2 -2011-09-30,467546,house,2 -2011-12-31,470333,house,2 -2012-03-31,470365,house,2 -2012-06-30,469149,house,2 -2012-09-30,465919,house,2 -2012-12-31,463090,house,2 -2013-03-31,451077,house,2 -2013-06-30,451516,house,2 -2013-09-30,454270,house,2 -2013-12-31,456548,house,2 -2014-03-31,469920,house,2 -2014-06-30,472726,house,2 -2014-09-30,475326,house,2 -2014-12-31,478413,house,2 -2015-03-31,478398,house,2 -2015-06-30,477238,house,2 -2015-09-30,477330,house,2 -2015-12-31,479010,house,2 -2016-03-31,482440,house,2 -2016-06-30,486436,house,2 -2016-09-30,489104,house,2 -2016-12-31,491152,house,2 -2017-03-31,494544,house,2 -2017-06-30,498846,house,2 -2017-09-30,504592,house,2 -2017-12-31,506578,house,2 -2018-03-31,507248,house,2 -2018-06-30,506116,house,2 -2018-09-30,504318,house,2 -2018-12-31,506001,house,2 -2019-03-31,496133,house,2 -2019-06-30,500158,house,2 -2019-09-30,510712,house,2 -2007-03-31,421291,house,3 -2007-06-30,421291,house,3 -2007-09-30,421291,house,3 -2007-12-31,421291,house,3 -2008-03-31,416031,house,3 -2008-06-30,419628,house,3 -2008-09-30,423811,house,3 -2008-12-31,426488,house,3 -2009-03-31,437724,house,3 -2009-06-30,444351,house,3 -2009-09-30,449742,house,3 -2009-12-31,457394,house,3 -2010-03-31,466433,house,3 -2010-06-30,474590,house,3 -2010-09-30,483176,house,3 -2010-12-31,491715,house,3 -2011-03-31,498022,house,3 -2011-06-30,503891,house,3 -2011-09-30,507090,house,3 -2011-12-31,507744,house,3 -2012-03-31,507449,house,3 -2012-06-30,507014,house,3 -2012-09-30,506615,house,3 -2012-12-31,506615,house,3 -2013-03-31,506380,house,3 -2013-06-30,505739,house,3 -2013-09-30,505823,house,3 -2013-12-31,506406,house,3 -2014-03-31,508499,house,3 -2014-06-30,512374,house,3 -2014-09-30,516618,house,3 -2014-12-31,522103,house,3 -2015-03-31,528926,house,3 -2015-06-30,534927,house,3 -2015-09-30,542051,house,3 -2015-12-31,549278,house,3 -2016-03-31,556586,house,3 -2016-06-30,564267,house,3 -2016-09-30,572582,house,3 -2016-12-31,581485,house,3 -2017-03-31,590949,house,3 -2017-06-30,601041,house,3 -2017-09-30,609355,house,3 -2017-12-31,615743,house,3 -2018-03-31,619638,house,3 -2018-06-30,622466,house,3 -2018-09-30,624602,house,3 -2018-12-31,626608,house,3 -2019-03-31,628423,house,3 -2019-06-30,630814,house,3 -2019-09-30,631875,house,3 -2007-03-31,548969,house,4 -2007-06-30,548969,house,4 -2007-09-30,548969,house,4 -2007-12-31,548969,house,4 -2008-03-31,552484,house,4 -2008-06-30,559580,house,4 -2008-09-30,561852,house,4 -2008-12-31,565467,house,4 -2009-03-31,569682,house,4 -2009-06-30,574680,house,4 -2009-09-30,579369,house,4 -2009-12-31,588379,house,4 -2010-03-31,599614,house,4 -2010-06-30,608528,house,4 -2010-09-30,615603,house,4 -2010-12-31,623105,house,4 -2011-03-31,628969,house,4 -2011-06-30,634155,house,4 -2011-09-30,636582,house,4 -2011-12-31,637421,house,4 -2012-03-31,635411,house,4 -2012-06-30,633695,house,4 -2012-09-30,634803,house,4 -2012-12-31,633875,house,4 -2013-03-31,634229,house,4 -2013-06-30,635515,house,4 -2013-09-30,636687,house,4 -2013-12-31,641125,house,4 -2014-03-31,648174,house,4 -2014-06-30,655757,house,4 -2014-09-30,664635,house,4 -2014-12-31,673762,house,4 -2015-03-31,684006,house,4 -2015-06-30,694800,house,4 -2015-09-30,706711,house,4 -2015-12-31,718261,house,4 -2016-03-31,727736,house,4 -2016-06-30,737159,house,4 -2016-09-30,745430,house,4 -2016-12-31,755683,house,4 -2017-03-31,771216,house,4 -2017-06-30,789732,house,4 -2017-09-30,810694,house,4 -2017-12-31,828058,house,4 -2018-03-31,836056,house,4 -2018-06-30,837295,house,4 -2018-09-30,830727,house,4 -2018-12-31,820924,house,4 -2019-03-31,811121,house,4 -2019-06-30,803925,house,4 -2019-09-30,791446,house,4 -2007-09-30,735904,house,5 -2007-12-31,735904,house,5 -2008-03-31,735904,house,5 -2008-06-30,735904,house,5 -2008-09-30,758340,house,5 -2008-12-31,764025,house,5 -2009-03-31,770046,house,5 -2009-06-30,765555,house,5 -2009-09-30,765515,house,5 -2009-12-31,771280,house,5 -2010-03-31,773355,house,5 -2010-06-30,776325,house,5 -2010-09-30,772699,house,5 -2010-12-31,775199,house,5 -2011-03-31,778470,house,5 -2011-06-30,789627,house,5 -2011-09-30,789614,house,5 -2011-12-31,790965,house,5 -2012-03-31,794533,house,5 -2012-06-30,792171,house,5 -2012-09-30,800432,house,5 -2012-12-31,804474,house,5 -2013-03-31,807826,house,5 -2013-06-30,812224,house,5 -2013-09-30,805066,house,5 -2013-12-31,805682,house,5 -2014-03-31,811908,house,5 -2014-06-30,820368,house,5 -2014-09-30,843904,house,5 -2014-12-31,855039,house,5 -2015-03-31,866489,house,5 -2015-06-30,880625,house,5 -2015-09-30,891981,house,5 -2015-12-31,909131,house,5 -2016-03-31,923594,house,5 -2016-06-30,933589,house,5 -2016-09-30,952327,house,5 -2016-12-31,968331,house,5 -2017-03-31,980953,house,5 -2017-06-30,995349,house,5 -2017-09-30,1004117,house,5 -2017-12-31,1010848,house,5 -2018-03-31,1015529,house,5 -2018-06-30,1017752,house,5 -2018-09-30,1007114,house,5 -2018-12-31,1002323,house,5 -2019-03-31,998136,house,5 -2019-06-30,995363,house,5 -2019-09-30,970268,house,5 -2007-12-31,326076,unit,1 -2008-03-31,326076,unit,1 -2008-06-30,326076,unit,1 -2008-09-30,326076,unit,1 -2008-12-31,327321,unit,1 -2009-03-31,324712,unit,1 -2009-06-30,323556,unit,1 -2009-09-30,318922,unit,1 -2009-12-31,316914,unit,1 -2010-03-31,316751,unit,1 -2010-06-30,317711,unit,1 -2010-09-30,318695,unit,1 -2010-12-31,324778,unit,1 -2011-03-31,329856,unit,1 -2011-06-30,333049,unit,1 -2011-09-30,337144,unit,1 -2011-12-31,337400,unit,1 -2012-03-31,339125,unit,1 -2012-06-30,341807,unit,1 -2012-09-30,344793,unit,1 -2012-12-31,347754,unit,1 -2013-03-31,348491,unit,1 -2013-06-30,348512,unit,1 -2013-09-30,347962,unit,1 -2013-12-31,345573,unit,1 -2014-03-31,343298,unit,1 -2014-06-30,341289,unit,1 -2014-09-30,338293,unit,1 -2014-12-31,336520,unit,1 -2015-03-31,334488,unit,1 -2015-06-30,332703,unit,1 -2015-09-30,330278,unit,1 -2015-12-31,328300,unit,1 -2016-03-31,326476,unit,1 -2016-06-30,324725,unit,1 -2016-09-30,325127,unit,1 -2016-12-31,325521,unit,1 -2017-03-31,327870,unit,1 -2017-06-30,330319,unit,1 -2017-09-30,332481,unit,1 -2017-12-31,334804,unit,1 -2018-03-31,336637,unit,1 -2018-06-30,338105,unit,1 -2018-09-30,339220,unit,1 -2018-12-31,339350,unit,1 -2019-03-31,337838,unit,1 -2019-06-30,336551,unit,1 -2019-09-30,335449,unit,1 -2007-06-30,368817,unit,2 -2007-09-30,368817,unit,2 -2007-12-31,368817,unit,2 -2008-03-31,368817,unit,2 -2008-06-30,373482,unit,2 -2008-09-30,377481,unit,2 -2008-12-31,382010,unit,2 -2009-03-31,380810,unit,2 -2009-06-30,385791,unit,2 -2009-09-30,391161,unit,2 -2009-12-31,396448,unit,2 -2010-03-31,402898,unit,2 -2010-06-30,408608,unit,2 -2010-09-30,412509,unit,2 -2010-12-31,415991,unit,2 -2011-03-31,417970,unit,2 -2011-06-30,419777,unit,2 -2011-09-30,421158,unit,2 -2011-12-31,423144,unit,2 -2012-03-31,424673,unit,2 -2012-06-30,424249,unit,2 -2012-09-30,425453,unit,2 -2012-12-31,425922,unit,2 -2013-03-31,425751,unit,2 -2013-06-30,426621,unit,2 -2013-09-30,428398,unit,2 -2013-12-31,428365,unit,2 -2014-03-31,429283,unit,2 -2014-06-30,429361,unit,2 -2014-09-30,428911,unit,2 -2014-12-31,429832,unit,2 -2015-03-31,431567,unit,2 -2015-06-30,432730,unit,2 -2015-09-30,432791,unit,2 -2015-12-31,432801,unit,2 -2016-03-31,431418,unit,2 -2016-06-30,430880,unit,2 -2016-09-30,430654,unit,2 -2016-12-31,430308,unit,2 -2017-03-31,429897,unit,2 -2017-06-30,429059,unit,2 -2017-09-30,428878,unit,2 -2017-12-31,428532,unit,2 -2018-03-31,427856,unit,2 -2018-06-30,427623,unit,2 -2018-09-30,426970,unit,2 -2018-12-31,426936,unit,2 -2019-03-31,426669,unit,2 -2019-06-30,425659,unit,2 -2019-09-30,424412,unit,2 -2007-09-30,518911,unit,3 -2007-12-31,518911,unit,3 -2008-03-31,518911,unit,3 -2008-06-30,518911,unit,3 -2008-09-30,518911,unit,3 -2008-12-31,518911,unit,3 -2009-03-31,518911,unit,3 -2009-06-30,518911,unit,3 -2009-09-30,523285,unit,3 -2009-12-31,522862,unit,3 -2010-03-31,524008,unit,3 -2010-06-30,535063,unit,3 -2010-09-30,538694,unit,3 -2010-12-31,555117,unit,3 -2011-03-31,550851,unit,3 -2011-06-30,547981,unit,3 -2011-09-30,539828,unit,3 -2011-12-31,530987,unit,3 -2012-03-31,540344,unit,3 -2012-06-30,537592,unit,3 -2012-09-30,548326,unit,3 -2012-12-31,555644,unit,3 -2013-03-31,566706,unit,3 -2013-06-30,580696,unit,3 -2013-09-30,581428,unit,3 -2013-12-31,586470,unit,3 -2014-03-31,583883,unit,3 -2014-06-30,583370,unit,3 -2014-09-30,598512,unit,3 -2014-12-31,598812,unit,3 -2015-03-31,599507,unit,3 -2015-06-30,602877,unit,3 -2015-09-30,603343,unit,3 -2015-12-31,612295,unit,3 -2016-03-31,617363,unit,3 -2016-06-30,622045,unit,3 -2016-09-30,616198,unit,3 -2016-12-31,610618,unit,3 -2017-03-31,606935,unit,3 -2017-06-30,605273,unit,3 -2017-09-30,606850,unit,3 -2017-12-31,604413,unit,3 -2018-03-31,604293,unit,3 -2018-06-30,603434,unit,3 -2018-09-30,603281,unit,3 -2018-12-31,601167,unit,3 -2019-03-31,605637,unit,3 -2019-06-30,599339,unit,3 -2019-09-30,597884,unit,3 \ No newline at end of file diff --git a/tests/unused/unit/ml_handlers/data/movies.csv b/tests/unused/unit/ml_handlers/data/movies.csv deleted file mode 100644 index b2e75d71137..00000000000 --- a/tests/unused/unit/ml_handlers/data/movies.csv +++ /dev/null @@ -1,90 +0,0 @@ -movieId,title,genres -1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy -2,Jumanji (1995),Adventure|Children|Fantasy -3,Grumpier Old Men (1995),Comedy|Romance -4,Waiting to Exhale (1995),Comedy|Drama|Romance -5,Father of the Bride Part II (1995),Comedy -6,Heat (1995),Action|Crime|Thriller -7,Sabrina (1995),Comedy|Romance -8,Tom and Huck (1995),Adventure|Children -9,Sudden Death (1995),Action -10,GoldenEye (1995),Action|Adventure|Thriller -11,"American President, The (1995)",Comedy|Drama|Romance -12,Dracula: Dead and Loving It (1995),Comedy|Horror -13,Balto (1995),Adventure|Animation|Children -14,Nixon (1995),Drama -15,Cutthroat Island (1995),Action|Adventure|Romance -16,Casino (1995),Crime|Drama -17,Sense and Sensibility (1995),Drama|Romance -18,Four Rooms (1995),Comedy -19,Ace Ventura: When Nature Calls (1995),Comedy -20,Money Train (1995),Action|Comedy|Crime|Drama|Thriller -21,Get Shorty (1995),Comedy|Crime|Thriller -22,Copycat (1995),Crime|Drama|Horror|Mystery|Thriller -23,Assassins (1995),Action|Crime|Thriller -24,Powder (1995),Drama|Sci-Fi -25,Leaving Las Vegas (1995),Drama|Romance -26,Othello (1995),Drama -27,Now and Then (1995),Children|Drama -28,Persuasion (1995),Drama|Romance -29,"City of Lost Children, The (CitΓ© des enfants perdus, La) (1995)",Adventure|Drama|Fantasy|Mystery|Sci-Fi -30,Shanghai Triad (Yao a yao yao dao waipo qiao) (1995),Crime|Drama -31,Dangerous Minds (1995),Drama -32,Twelve Monkeys (a.k.a. 12 Monkeys) (1995),Mystery|Sci-Fi|Thriller -34,Babe (1995),Children|Drama -36,Dead Man Walking (1995),Crime|Drama -38,It Takes Two (1995),Children|Comedy -39,Clueless (1995),Comedy|Romance -40,"Cry, the Beloved Country (1995)",Drama -41,Richard III (1995),Drama|War -42,Dead Presidents (1995),Action|Crime|Drama -43,Restoration (1995),Drama -44,Mortal Kombat (1995),Action|Adventure|Fantasy -45,To Die For (1995),Comedy|Drama|Thriller -46,How to Make an American Quilt (1995),Drama|Romance -47,Seven (a.k.a. Se7en) (1995),Mystery|Thriller -48,Pocahontas (1995),Animation|Children|Drama|Musical|Romance -49,When Night Is Falling (1995),Drama|Romance -50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller -52,Mighty Aphrodite (1995),Comedy|Drama|Romance -53,Lamerica (1994),Adventure|Drama -54,"Big Green, The (1995)",Children|Comedy -55,Georgia (1995),Drama -57,Home for the Holidays (1995),Drama -58,"Postman, The (Postino, Il) (1994)",Comedy|Drama|Romance -60,"Indian in the Cupboard, The (1995)",Adventure|Children|Fantasy -61,Eye for an Eye (1996),Drama|Thriller -62,Mr. Holland's Opus (1995),Drama -63,Don't Be a Menace to South Central While Drinking Your Juice in the Hood (1996),Comedy|Crime -64,Two if by Sea (1996),Comedy|Romance -65,Bio-Dome (1996),Comedy -66,Lawnmower Man 2: Beyond Cyberspace (1996),Action|Sci-Fi|Thriller -68,French Twist (Gazon maudit) (1995),Comedy|Romance -69,Friday (1995),Comedy -70,From Dusk Till Dawn (1996),Action|Comedy|Horror|Thriller -71,Fair Game (1995),Action -72,Kicking and Screaming (1995),Comedy|Drama -73,"MisΓ©rables, Les (1995)",Drama|War -74,Bed of Roses (1996),Drama|Romance -75,Big Bully (1996),Comedy|Drama -76,Screamers (1995),Action|Sci-Fi|Thriller -77,Nico Icon (1995),Documentary -78,"Crossing Guard, The (1995)",Action|Crime|Drama|Thriller -79,"Juror, The (1996)",Drama|Thriller -80,"White Balloon, The (Badkonake sefid) (1995)",Children|Drama -81,Things to Do in Denver When You're Dead (1995),Crime|Drama|Romance -82,Antonia's Line (Antonia) (1995),Comedy|Drama -83,Once Upon a Time... When We Were Colored (1995),Drama|Romance -85,Angels and Insects (1995),Drama|Romance -86,White Squall (1996),Action|Adventure|Drama -87,Dunston Checks In (1996),Children|Comedy -88,Black Sheep (1996),Comedy -89,Nick of Time (1995),Action|Thriller -92,Mary Reilly (1996),Drama|Horror|Thriller -93,Vampire in Brooklyn (1995),Comedy|Horror|Romance -94,Beautiful Girls (1996),Comedy|Drama|Romance -95,Broken Arrow (1996),Action|Adventure|Thriller -96,In the Bleak Midwinter (1995),Comedy|Drama -97,"Hate (Haine, La) (1995)",Crime|Drama -99,Heidi Fleiss: Hollywood Madam (1995),Documentary -100,City Hall (1996),Drama|Thriller diff --git a/tests/unused/unit/ml_handlers/data/ratings.csv b/tests/unused/unit/ml_handlers/data/ratings.csv deleted file mode 100644 index eb8ce0e976d..00000000000 --- a/tests/unused/unit/ml_handlers/data/ratings.csv +++ /dev/null @@ -1,3207 +0,0 @@ -userId,movieId,rating,timestamp -1,1,4.0,964982703 -1,3,4.0,964981247 -1,6,4.0,964982224 -1,47,5.0,964983815 -1,50,5.0,964982931 -1,70,3.0,964982400 -3,31,0.5,1306463578 -4,21,3.0,986935199 -4,32,2.0,945173447 -4,45,3.0,986935047 -4,47,2.0,945173425 -4,52,3.0,964622786 -4,58,3.0,964538444 -5,1,4.0,847434962 -5,21,4.0,847435238 -5,34,4.0,847434881 -5,36,4.0,847435292 -5,39,3.0,847434961 -5,50,4.0,847434881 -5,58,5.0,847435238 -6,2,4.0,845553522 -6,3,5.0,845554296 -6,4,3.0,845554349 -6,5,5.0,845553938 -6,6,4.0,845553757 -6,7,4.0,845554264 -6,8,3.0,845555281 -6,10,3.0,845553253 -6,11,4.0,845553489 -6,13,3.0,845555588 -6,15,4.0,845554505 -6,16,4.0,845553819 -6,17,4.0,845553559 -6,19,2.0,845553382 -6,21,2.0,845553382 -6,22,5.0,845553875 -6,24,4.0,845554397 -6,25,3.0,845553695 -6,26,4.0,845555362 -6,27,3.0,845555402 -6,31,3.0,845553819 -6,32,4.0,845553426 -6,34,4.0,845553354 -6,36,5.0,845553726 -6,41,4.0,845554962 -6,43,4.0,845555607 -6,45,3.0,845553907 -6,46,4.0,845554551 -6,47,4.0,845553317 -6,50,1.0,845553381 -6,54,4.0,845555402 -6,60,4.0,845554263 -6,61,4.0,845555454 -6,62,4.0,845553660 -6,65,3.0,845555070 -6,66,3.0,845555151 -6,76,4.0,845555317 -6,79,3.0,845554907 -6,86,5.0,845556131 -6,87,3.0,845555300 -6,88,2.0,845555694 -6,89,4.0,845555124 -6,92,4.0,845555454 -6,93,4.0,845554584 -6,95,4.0,845553559 -6,100,3.0,845555151 -7,1,4.5,1106635946 -7,50,4.5,1106635993 -7,58,3.0,1106635520 -8,2,4.0,839463806 -8,10,2.0,839463509 -8,11,4.0,839463806 -8,21,4.0,839463564 -8,32,3.0,839463624 -8,34,5.0,839463546 -8,39,3.0,839463644 -8,47,4.0,839463546 -8,50,5.0,839463644 -9,41,3.0,1044656650 -11,6,5.0,902154266 -11,10,3.0,902154316 -11,36,4.0,902155135 -11,44,2.0,902154593 -11,95,3.0,902154458 -12,39,4.0,1247264471 -13,47,5.0,987895819 -14,4,3.0,835441653 -14,7,3.0,835441989 -14,19,1.0,835441168 -14,25,4.0,835441394 -14,32,4.0,835441168 -14,39,3.0,835441186 -14,47,5.0,835441133 -14,95,5.0,835441295 -15,1,2.5,1510577970 -15,44,1.0,1299424916 -15,47,3.5,1510571970 -16,47,3.5,1377477814 -16,50,4.0,1377476781 -17,1,4.5,1305696483 -17,44,3.5,1305696245 -17,47,4.0,1307262715 -17,50,4.5,1305697013 -18,1,3.5,1455209816 -18,2,3.0,1455617462 -18,6,4.0,1460138360 -18,16,4.5,1461311583 -18,32,4.0,1455209840 -18,34,2.5,1455617533 -18,36,4.0,1455617478 -18,47,4.5,1455050013 -18,50,5.0,1455049343 -18,70,3.5,1455735732 -19,1,4.0,965705637 -19,2,3.0,965704331 -19,3,3.0,965707636 -19,7,2.0,965706657 -19,10,2.0,965709556 -19,12,1.0,965705347 -19,13,3.0,965709102 -19,15,2.0,965710039 -19,19,2.0,965708339 -19,32,4.0,965703646 -19,34,4.0,965705661 -19,44,3.0,965710019 -19,47,3.0,965710720 -19,48,1.0,965709172 -19,54,2.0,965702660 -19,64,2.0,965708037 -19,65,2.0,965708217 -19,70,2.0,965704974 -19,87,2.0,965708059 -19,92,2.0,965712048 -20,2,3.0,1054038313 -20,8,1.0,1054038422 -20,13,4.0,1054038425 -20,34,4.0,1054038093 -20,48,5.0,1054038357 -21,1,3.5,1407618878 -21,2,3.5,1419795031 -21,10,5.0,1403459783 -21,19,2.5,1419795052 -21,38,2.5,1419795113 -21,44,1.0,1376822969 -21,48,3.0,1376822924 -23,6,4.0,1107342267 -23,29,4.0,1107341574 -23,32,3.5,1107341750 -23,50,4.0,1107163741 -23,58,3.0,1107164183 -24,6,4.5,1458941767 -24,32,3.5,1458942033 -24,50,4.0,1458942023 -26,10,3.0,836950403 -26,34,3.0,836950483 -26,47,4.0,836950431 -27,1,3.0,962685262 -27,2,4.0,962685711 -27,34,5.0,962685351 -27,48,4.0,962685318 -27,60,3.0,962685387 -27,62,5.0,962684909 -28,6,3.5,1234570483 -28,16,2.5,1234570375 -28,21,3.0,1242290535 -28,23,1.5,1242290862 -28,31,2.5,1234334920 -28,32,3.5,1234335329 -28,47,3.0,1235975946 -28,50,3.5,1234337966 -28,62,3.5,1242290542 -28,95,2.0,1234516201 -29,50,3.5,1308007653 -31,1,5.0,850466616 -31,5,3.0,850466642 -31,7,4.0,850466642 -31,10,4.0,850467366 -31,17,3.0,850466616 -31,25,2.0,850466616 -31,62,4.0,850466616 -32,1,3.0,856736119 -32,3,3.0,856736172 -32,6,3.0,856736172 -32,7,4.0,856736173 -32,21,4.0,856737002 -32,24,3.0,856737205 -32,25,4.0,856736119 -32,26,4.0,856736347 -32,32,4.0,856736119 -32,36,4.0,856736172 -32,39,3.0,856737205 -32,50,5.0,856737002 -32,52,3.0,856736227 -32,58,5.0,856736227 -32,62,3.0,856736119 -32,74,4.0,856736316 -32,78,4.0,856736477 -32,95,3.0,856736119 -32,100,4.0,856736290 -33,1,3.0,939647444 -33,7,1.0,939654896 -33,11,2.0,939654896 -33,17,4.0,939654828 -33,21,4.0,939715904 -33,25,3.0,939646902 -33,28,5.0,939654815 -33,32,3.0,939647370 -33,34,3.0,939646940 -33,36,5.0,939715317 -33,39,5.0,939654896 -33,43,2.0,939716739 -33,50,5.0,939646733 -33,58,4.0,939646784 -33,94,4.0,939716218 -34,10,5.0,1162048773 -34,70,3.5,1162048002 -35,11,4.0,830939577 -35,21,5.0,830939505 -35,39,3.0,830939546 -35,50,5.0,830939598 -35,60,5.0,830939912 -35,62,5.0,830939748 -36,25,2.5,1100803583 -38,11,5.0,841341447 -38,17,3.0,841341494 -38,21,3.0,841341362 -38,39,3.0,841341384 -38,48,3.0,841341570 -38,50,5.0,841341362 -38,62,3.0,841341570 -39,32,3.0,974787510 -39,47,3.0,974789632 -39,50,5.0,974788030 -40,1,5.0,832058959 -40,17,5.0,832059273 -40,19,2.0,832059137 -40,21,4.0,832059053 -40,26,4.0,832059937 -40,31,4.0,832059371 -40,34,5.0,832059080 -40,44,3.0,832059393 -40,46,4.0,832059892 -40,48,2.0,832059371 -40,60,4.0,832059509 -40,62,5.0,832059339 -40,82,5.0,832060151 -41,47,3.5,1458939486 -41,50,3.0,1459367479 -42,3,4.0,996221045 -42,7,3.0,996220162 -42,10,5.0,996215205 -42,11,5.0,996219314 -42,16,5.0,996218017 -42,19,2.0,996256258 -42,21,4.0,996219086 -42,22,5.0,996219167 -42,47,4.0,996218105 -42,50,5.0,996217838 -42,86,4.0,996220615 -42,95,3.0,996214545 -43,1,5.0,848993983 -43,3,5.0,848994405 -43,5,5.0,848994281 -43,7,5.0,848994392 -43,8,5.0,848994814 -43,10,4.0,848993747 -43,11,4.0,848993928 -43,23,5.0,848994451 -43,29,5.0,848994937 -43,34,5.0,848993816 -43,47,4.0,848993793 -43,48,5.0,848994152 -43,57,5.0,848994678 -43,60,5.0,848994424 -43,79,5.0,848994617 -43,95,4.0,848993983 -44,1,3.0,869251860 -44,3,3.0,869251910 -44,6,3.0,869251910 -44,12,1.0,869252043 -44,18,4.0,869252115 -44,36,4.0,869251910 -44,65,3.0,869252497 -44,66,3.0,869252563 -44,94,4.0,869252333 -44,95,4.0,869251861 -45,1,4.0,951170182 -45,5,3.0,959625102 -45,6,4.0,1121724608 -45,7,3.0,951170390 -45,11,3.0,951170225 -45,19,4.5,1091306129 -45,21,4.0,1091306011 -45,32,4.5,1121724503 -45,39,3.0,951170338 -45,50,5.0,951756750 -45,62,4.0,1091306047 -45,65,3.0,1020803228 -45,69,3.0,951170141 -45,70,4.0,951170563 -45,88,4.0,951170543 -46,1,5.0,834787906 -46,10,3.0,834787826 -46,32,4.0,834788094 -46,39,3.0,834788093 -46,50,5.0,834788094 -47,31,3.0,1496205717 -47,47,3.0,1496209354 -47,62,3.0,1496205312 -48,48,4.0,1127128718 -50,1,3.0,1514238116 -50,32,3.0,1523740563 -51,2,4.5,1230932741 -51,3,4.0,1230932736 -51,7,4.0,1230932700 -51,36,3.0,1230929322 -51,65,3.5,1230930652 -51,70,5.0,1230932691 -51,76,5.0,1230930644 -51,93,3.0,1230931640 -54,1,3.0,830247330 -54,17,3.0,830248246 -54,21,3.0,839921390 -54,32,3.0,830247417 -54,47,3.0,839921084 -54,50,3.0,830248246 -56,10,4.0,835799162 -56,11,4.0,835799366 -56,19,5.0,835799219 -56,39,4.0,835799274 -56,47,5.0,835799219 -56,69,4.0,835799457 -57,1,5.0,965796031 -57,6,3.0,972173446 -57,10,3.0,965798286 -57,11,3.0,965797264 -57,21,3.0,965796969 -57,32,4.0,965798685 -57,39,3.0,965796392 -57,50,5.0,965796686 -57,52,3.0,969753586 -57,65,1.0,965797815 -57,69,4.0,965797105 -57,70,1.0,965797305 -57,89,2.0,972174827 -57,95,3.0,969754664 -58,3,3.0,847719397 -58,5,4.0,847719151 -58,7,5.0,847719397 -58,19,1.0,847718718 -58,21,4.0,847718718 -58,22,3.0,847719108 -58,31,4.0,847719088 -58,32,5.0,847718745 -58,36,4.0,847718991 -58,39,5.0,847718745 -58,44,2.0,847718960 -58,47,5.0,847718657 -58,48,3.0,847719035 -58,50,5.0,847718718 -58,62,3.0,847718910 -59,10,3.0,953609378 -59,41,5.0,953609923 -60,48,3.0,1393541734 -60,50,3.0,1393542060 -60,60,3.0,1393541955 -61,16,4.0,1145531659 -61,32,4.5,1145532647 -61,50,4.5,1145532639 -62,2,4.0,1528843890 -62,6,4.5,1522190219 -62,47,4.5,1521489234 -63,1,5.0,1443199669 -63,10,3.0,1443201199 -63,32,3.0,1443199999 -63,34,3.0,1443201169 -63,47,4.0,1443201084 -63,50,5.0,1443199758 -64,1,4.0,1161520134 -64,3,3.5,1161519668 -64,6,4.5,1161520752 -64,16,5.0,1161528794 -64,19,3.5,1161521687 -64,22,3.5,1161559916 -64,25,3.5,1161521571 -64,32,4.0,1161520763 -64,34,4.5,1161520212 -64,36,4.0,1161521343 -64,39,4.0,1161520967 -64,45,3.5,1161564506 -64,47,4.5,1161520185 -64,48,3.0,1161529467 -64,50,3.0,1161528982 -64,70,4.0,1161559767 -66,1,4.0,1104643957 -66,5,4.0,1113190367 -66,18,4.0,1113190353 -66,19,3.0,1093143913 -66,21,4.0,1113190363 -66,29,4.5,1099187646 -66,32,5.0,1093747471 -66,47,5.0,1093747353 -66,50,4.0,1104644110 -68,1,2.5,1158531426 -68,2,2.5,1158532776 -68,3,2.0,1158533415 -68,5,2.0,1158533624 -68,6,4.0,1158532058 -68,7,2.0,1230498124 -68,10,4.5,1158531612 -68,11,4.5,1158531050 -68,16,3.5,1158533018 -68,17,3.5,1158531987 -68,18,2.0,1532897231 -68,19,1.5,1158532448 -68,25,3.5,1158531974 -68,26,3.0,1158534106 -68,31,3.5,1158535464 -68,34,1.0,1158531735 -68,39,4.0,1158532000 -68,44,3.0,1158534993 -68,47,4.0,1158531489 -68,48,4.0,1158533544 -68,50,3.0,1158531764 -68,62,3.5,1158531021 -68,65,1.5,1269123315 -68,70,4.0,1158534541 -68,95,3.5,1158532180 -69,50,5.0,1021646926 -70,36,5.0,1355198746 -71,1,5.0,864737933 -71,7,3.0,864738008 -71,17,1.0,864737933 -71,24,2.0,864740028 -71,62,4.0,864737933 -71,86,5.0,864738119 -71,95,4.0,864737933 -72,32,4.5,1217324825 -72,47,4.5,1217324851 -72,50,4.5,1217324720 -73,1,4.5,1464196374 -74,58,5.0,1207502891 -75,47,2.0,1158968272 -76,1,0.5,1439165548 -76,47,3.5,1439168120 -76,48,0.5,1439168949 -78,1,4.0,1252575124 -78,20,3.0,1252573422 -78,32,3.5,1252575140 -79,6,4.0,975282111 -80,32,4.0,1377308186 -80,50,4.5,1377308037 -81,10,1.0,845299756 -81,32,5.0,845299983 -81,47,3.0,845299983 -82,1,2.5,1084467729 -82,2,3.0,1084465035 -82,6,3.5,1084467865 -82,10,3.5,1084467796 -82,34,2.0,1084467775 -82,47,3.5,1084467783 -84,4,3.0,858772461 -84,5,3.0,857653289 -84,6,4.0,857653289 -84,7,3.0,857653289 -84,10,3.0,860397812 -84,11,4.0,858771861 -84,14,4.0,857653349 -84,17,5.0,857653240 -84,21,4.0,858772111 -84,25,5.0,860396793 -84,31,3.0,860398679 -84,34,4.0,858772153 -84,36,5.0,857653289 -84,45,4.0,858772239 -84,46,3.0,858771890 -84,50,4.0,857653594 -84,52,5.0,857653318 -84,55,4.0,858772543 -84,57,3.0,858771951 -84,62,4.0,857653240 -84,94,4.0,857653422 -84,95,2.0,857653240 -84,100,3.0,860396882 -85,53,5.0,889468268 -86,1,4.0,1344082549 -87,60,3.0,1270702696 -88,16,4.5,1331421395 -88,50,5.0,1331421330 -88,69,3.5,1331420307 -89,1,3.0,1520408314 -89,11,2.5,1520408449 -89,88,2.0,1520408790 -90,1,3.0,856353996 -90,7,4.0,856354037 -90,14,5.0,856354100 -90,17,5.0,856353996 -90,25,5.0,856353996 -90,32,4.0,856353996 -90,36,4.0,856354037 -90,52,5.0,856354072 -90,58,5.0,856354071 -90,68,3.0,856354249 -90,81,3.0,856354174 -90,82,5.0,856354212 -90,85,5.0,856354195 -91,1,4.0,1112713037 -91,2,3.0,1112713392 -91,3,3.0,1112712323 -91,6,5.0,1112712032 -91,10,3.5,1112713269 -91,16,4.5,1112710867 -91,19,2.0,1112713417 -91,21,4.0,1112712052 -91,22,3.5,1112712292 -91,25,2.0,1112713314 -91,29,4.0,1112711719 -91,32,4.0,1112711273 -91,36,3.0,1112712316 -91,39,1.5,1112713319 -91,41,4.0,1112716937 -91,47,4.5,1112712832 -91,50,4.5,1112712816 -91,58,2.0,1112710933 -91,88,3.0,1112716912 -91,92,2.0,1112716903 -91,95,3.0,1112711260 -93,1,3.0,942767337 -93,2,5.0,942946208 -93,10,4.0,942946208 -93,15,4.0,942946091 -93,34,5.0,942767143 -93,60,4.0,942946445 -93,86,4.0,942946013 -94,2,4.0,843406960 -94,10,3.0,843406732 -94,11,3.0,843406930 -94,17,1.0,843406942 -94,19,2.0,843406846 -94,21,3.0,843406846 -94,32,5.0,843406866 -94,34,4.0,843406765 -94,39,1.0,843406866 -94,44,1.0,843406960 -94,47,2.0,843406765 -94,95,3.0,843406942 -95,7,3.0,1043340043 -95,18,3.5,1105400752 -95,25,5.0,1044744590 -95,29,5.0,1043340522 -95,32,4.0,1043340522 -95,34,4.0,1071803856 -95,39,4.0,1043339476 -96,1,5.0,964772990 -96,34,5.0,964773008 -96,50,5.0,964773008 -98,1,4.5,1532457849 -99,10,4.0,829827514 -99,22,4.0,829828005 -99,23,4.0,829828005 -100,3,3.5,1100183804 -100,11,4.0,1100184041 -100,16,4.5,1100185959 -100,17,4.5,1100184147 -100,19,1.0,1100183757 -100,28,4.5,1100184447 -100,62,4.0,1100185952 -100,74,4.0,1100186732 -100,89,4.0,1100186731 -100,95,4.5,1100185961 -102,3,5.0,840635033 -102,6,3.0,835877535 -102,21,3.0,835876107 -102,23,3.0,835877570 -102,39,3.0,835876151 -102,47,5.0,835876045 -103,1,4.0,1431954238 -103,2,4.0,1431957270 -103,5,4.0,1431957598 -103,16,5.0,1450982615 -103,18,5.0,1431969228 -103,19,3.5,1431957278 -103,34,4.0,1431957104 -103,36,4.0,1431957252 -103,48,3.5,1431957563 -103,50,4.0,1431955220 -103,60,4.0,1431968436 -103,70,3.5,1431957632 -104,2,3.0,1114809641 -104,10,3.0,1048595683 -104,31,4.0,1049135743 -104,39,4.0,1048587379 -104,47,0.5,1053336550 -104,87,3.0,1048795372 -105,6,4.0,1446773632 -105,16,4.5,1446749417 -105,32,3.5,1446571805 -105,47,5.0,1446571714 -105,50,5.0,1446571713 -107,1,4.0,829322340 -107,2,5.0,829322340 -107,5,4.0,829322340 -107,11,5.0,829322340 -107,62,5.0,829322340 -108,25,4.0,1042838964 -108,82,5.0,1042839636 -109,6,3.0,841108665 -109,11,4.0,841108045 -109,14,4.0,841109326 -109,16,4.0,841108751 -109,18,3.0,841109921 -109,21,3.0,841107538 -109,25,5.0,841108589 -109,31,3.0,841108623 -109,32,4.0,841107643 -109,34,3.0,841107487 -109,36,5.0,841143673 -109,39,3.0,841107584 -109,45,3.0,841108914 -109,47,3.0,841107393 -109,50,4.0,841107584 -109,52,3.0,841109666 -109,62,5.0,841108564 -109,76,3.0,841151480 -110,47,4.5,1175330162 -111,5,3.5,1517441319 -111,16,4.5,1518640768 -111,24,2.5,1518640854 -111,34,2.5,1516153818 -111,39,4.0,1516153474 -111,48,3.5,1518640791 -111,65,3.5,1518640926 -112,1,3.0,1442535639 -112,2,1.5,1513989948 -112,6,4.5,1513989933 -112,16,4.5,1513990003 -112,17,0.5,1513989967 -112,19,4.0,1513989970 -112,21,2.0,1513989975 -112,25,2.5,1513989973 -112,32,5.0,1442535842 -112,36,0.5,1513989966 -112,39,0.5,1513989927 -112,47,5.0,1442535846 -112,50,5.0,1442535839 -112,62,4.0,1513989987 -112,95,1.5,1513989977 -113,11,3.0,980051838 -113,25,1.0,980051660 -113,39,3.0,980051704 -113,46,3.0,980051838 -113,64,2.0,980051988 -113,71,1.0,980051522 -113,74,3.0,980051922 -115,21,4.0,970756771 -115,32,5.0,958574098 -115,34,2.0,957760059 -116,3,3.5,1337200325 -116,47,3.5,1337199584 -117,2,3.0,844163002 -117,3,3.0,844163663 -117,5,3.0,844163754 -117,6,3.0,844163102 -117,7,4.0,844163615 -117,10,3.0,844162850 -117,11,4.0,844162970 -117,17,3.0,844163037 -117,19,2.0,844162892 -117,21,4.0,844162913 -117,26,4.0,844163712 -117,31,3.0,844163204 -117,32,3.0,844162955 -117,34,3.0,844163454 -117,36,3.0,844163102 -117,39,3.0,844162955 -117,41,4.0,844163693 -117,44,2.0,844163037 -117,47,4.0,844162892 -117,48,3.0,844163082 -117,50,4.0,844162933 -117,60,3.0,844163754 -117,62,4.0,844163122 -117,73,4.0,844163663 -118,25,2.0,944924731 -119,1,3.5,1435942468 -119,10,4.0,1435943180 -119,44,3.0,1435943918 -120,5,4.0,860070029 -120,12,3.0,860070182 -120,32,3.0,860069973 -120,52,3.0,860070065 -121,1,4.0,847656180 -121,5,3.0,847656405 -121,6,5.0,847656264 -121,11,4.0,847656100 -121,16,5.0,847656340 -121,19,2.0,847656045 -121,22,3.0,847656357 -121,25,4.0,847656224 -121,39,4.0,847656062 -121,44,1.0,847656224 -121,47,3.0,847656010 -121,62,5.0,847656203 -121,95,3.0,847656129 -122,2,4.0,1461561176 -122,19,3.5,1461562132 -122,32,5.0,1461561526 -122,47,4.5,1461561420 -122,50,5.0,1461561151 -122,70,4.5,1461562250 -123,47,4.5,1447291940 -124,1,4.0,1336584336 -124,50,4.5,1336412889 -125,2,4.0,1474311709 -125,81,3.0,1474415153 -126,34,3.0,845925854 -126,47,5.0,845925822 -129,47,3.5,1167376335 -130,1,3.0,832589610 -131,31,3.0,1349838570 -131,47,4.0,1349840567 -131,50,4.0,1349840171 -132,1,2.0,1157921785 -132,17,3.0,1157922698 -132,29,2.0,1157924165 -132,32,3.0,1329983726 -132,34,1.5,1157921395 -132,39,3.0,1157921453 -132,45,2.5,1157923125 -132,47,4.0,1157921243 -132,48,3.0,1157919960 -132,50,4.0,1157920377 -132,58,2.0,1157919923 -132,70,2.5,1157923053 -132,89,2.5,1157997580 -133,32,4.0,843491488 -133,47,4.0,843491396 -133,50,3.0,843491446 -134,1,3.0,832841103 -134,47,4.0,832841168 -134,48,3.0,832841524 -135,1,4.0,1009691859 -135,2,3.0,1009692764 -135,21,3.0,1009691915 -135,32,5.0,1009691695 -135,47,5.0,1009691144 -135,65,1.0,1009692359 -136,10,5.0,832449222 -136,15,5.0,832449934 -136,16,5.0,832449614 -136,19,3.0,832449345 -136,23,5.0,832449838 -136,34,2.0,832449299 -136,44,2.0,832449539 -136,47,5.0,832449299 -136,62,5.0,832449522 -136,93,4.0,832450076 -136,95,4.0,832449391 -137,1,4.0,1204859907 -139,19,1.0,1453924016 -140,1,3.0,942924980 -140,2,3.5,1085569813 -140,6,5.0,942843185 -140,11,4.0,949667337 -140,21,4.0,949666898 -140,22,3.0,951163606 -140,23,3.0,967049087 -140,34,4.0,942910010 -140,47,4.0,942842215 -140,50,3.0,942840991 -140,62,4.5,1055092420 -140,86,4.0,942844005 -140,95,2.5,1085569725 -141,1,4.0,1513130643 -141,19,2.0,1513130814 -141,47,3.0,1513130660 -142,34,4.0,838932132 -142,36,4.0,838933753 -142,47,4.0,838934238 -142,50,5.0,838934155 -142,95,3.0,838934522 -144,1,3.5,1137323419 -144,2,3.0,1137323803 -144,10,3.0,1137323629 -144,17,4.0,1137323659 -144,19,3.0,1136812137 -144,25,3.0,1136813341 -144,32,4.0,1137323444 -144,34,3.0,1137323545 -144,39,3.5,1137323676 -144,47,4.5,1137323517 -144,48,4.5,1137324255 -145,1,5.0,832105242 -145,50,5.0,832105428 -146,32,5.0,1312508179 -147,5,4.5,1203267700 -149,2,1.0,902084874 -149,32,4.0,902085272 -150,3,3.0,854203124 -150,5,3.0,854203124 -150,6,4.0,854203123 -150,7,3.0,854203124 -150,25,4.0,854203072 -150,32,5.0,854203071 -150,36,4.0,854203123 -150,52,4.0,854203163 -150,58,3.0,854203163 -150,62,3.0,854203072 -150,79,3.0,854203229 -150,95,3.0,854203072 -151,1,5.0,855947195 -151,3,3.0,855947242 -151,9,4.0,855947372 -151,12,3.0,855947434 -151,62,4.0,855947196 -151,75,3.0,855948083 -151,92,1.0,855948256 -151,95,4.0,855947196 -152,47,5.0,1450867741 -152,50,4.5,1450572430 -153,1,2.0,1525548642 -153,2,2.0,1525550601 -155,1,3.0,961861723 -156,1,4.0,1106854640 -156,11,3.0,946799769 -156,17,4.0,939884874 -156,19,3.0,1106855017 -156,21,5.0,946799679 -156,25,4.0,1106854805 -156,34,4.0,939853183 -156,39,1.0,939842934 -156,45,4.0,946799570 -156,47,3.5,1106854709 -156,50,4.0,940001349 -156,52,3.5,1106855391 -156,58,4.0,1106855269 -156,62,2.0,1106854830 -156,68,2.0,946799375 -159,1,4.5,1508640172 -159,31,3.5,1508641164 -159,62,3.0,1508641179 -160,1,4.0,971115026 -160,2,4.0,971619578 -160,6,2.0,971115114 -160,10,1.0,971196422 -160,22,1.0,971437089 -160,32,5.0,971113953 -160,34,5.0,971619022 -160,44,3.0,971115727 -160,47,5.0,971115962 -160,50,4.0,971113158 -160,60,2.0,971619579 -160,76,4.0,971196402 -160,79,1.0,971196754 -160,85,5.0,991075851 -160,93,1.0,976798624 -160,95,1.0,971112529 -161,1,4.0,1176751765 -161,48,4.0,1176498494 -162,4,3.0,836684306 -162,16,5.0,836511416 -162,17,5.0,836510953 -162,31,4.0,836511596 -162,36,5.0,836511416 -162,50,4.0,836511263 -162,62,5.0,836510953 -166,1,5.0,1189980529 -166,6,3.5,1190828796 -166,10,4.5,1190827465 -166,17,4.0,1190828781 -166,32,4.0,1189038252 -166,47,4.0,1188774624 -166,50,5.0,1188774404 -167,1,3.5,1154721923 -167,39,2.0,1154723333 -167,69,2.5,1154719322 -169,1,4.5,1059427918 -169,2,4.0,1078284713 -169,3,5.0,1078284750 -169,5,5.0,1078284788 -169,7,4.5,1078284741 -169,11,4.0,1059427956 -169,34,4.0,1059427862 -169,39,3.5,1059428140 -169,48,3.5,1059427123 -169,60,4.0,1078285375 -169,62,5.0,1078284604 -169,73,4.0,1070250173 -170,5,3.0,840473290 -170,10,3.0,840472869 -170,34,5.0,840472895 -170,48,4.0,840473093 -170,95,3.0,840473046 -171,1,5.0,866904159 -171,25,5.0,866904159 -171,29,5.0,866904396 -171,32,5.0,866904158 -171,36,4.0,866904192 -171,47,5.0,866905267 -171,81,5.0,866904379 -173,10,3.0,843397944 -173,17,4.0,843398078 -174,10,3.0,848486985 -174,11,5.0,848487132 -174,32,3.0,848487101 -174,34,4.0,848487060 -174,39,4.0,848487101 -174,50,3.0,848487075 -174,62,1.0,848487212 -176,10,5.0,840108984 -176,39,3.0,840108930 -176,47,5.0,840109138 -177,1,5.0,1435533535 -177,2,3.5,1435534109 -177,7,1.0,1435534432 -177,11,3.0,1435890660 -177,16,3.0,1435890664 -177,19,2.0,1435534140 -177,28,2.5,1435536420 -177,39,4.0,1435890554 -177,47,4.5,1435533571 -177,48,3.0,1435534447 -177,50,4.0,1435523529 -177,60,3.0,1435535258 -178,1,4.0,1164354911 -178,10,4.0,1164355337 -178,25,4.5,1164355401 -178,47,4.5,1164355292 -178,50,4.5,1163673981 -179,1,4.0,852114051 -179,3,4.0,852114317 -179,7,3.0,852115405 -179,9,3.0,852114156 -179,18,4.0,852115405 -179,19,3.0,840907679 -179,32,4.0,852114116 -179,34,5.0,840907679 -179,47,4.0,840907616 -179,52,4.0,852115696 -179,65,3.0,852115576 -179,95,3.0,852114051 -181,5,3.0,845470124 -181,6,3.0,845469804 -181,7,3.0,845470611 -181,10,2.0,845469438 -181,11,5.0,845469653 -181,16,3.0,845469893 -181,21,2.0,845469555 -181,22,3.0,845469972 -181,24,3.0,845470571 -181,25,1.0,845469842 -181,31,3.0,845469893 -181,34,5.0,845469500 -181,39,3.0,845469625 -181,45,3.0,845470124 -181,47,3.0,845469500 -181,61,4.0,845472115 -181,62,4.0,845469756 -181,79,4.0,845470717 -181,86,4.0,845472271 -181,100,3.0,845470909 -182,1,4.0,1063289621 -182,6,4.5,1054782012 -182,10,3.5,1054782216 -182,14,4.0,1063289654 -182,16,5.0,1054783631 -182,17,4.0,1066428620 -182,18,5.0,1055153282 -182,21,2.5,1054782030 -182,23,3.5,1055153587 -182,25,4.5,1054783186 -182,26,4.0,1063648819 -182,29,4.0,1054779789 -182,32,4.0,1054779785 -182,36,4.5,1063289589 -182,41,4.5,1054781875 -182,44,1.5,1055150820 -182,45,3.0,1075764933 -182,47,4.0,1054781315 -182,50,4.5,1054781309 -182,69,3.0,1054784267 -182,70,4.5,1077804802 -182,76,3.5,1055152107 -182,81,3.5,1055154219 -182,89,3.0,1055151186 -182,94,4.0,1075765007 -182,97,4.5,1075764905 -182,100,3.0,1055153511 -185,1,4.0,1044311830 -185,34,2.0,1044311830 -186,1,4.0,1031080831 -186,2,4.0,1031087675 -186,10,4.0,1031088020 -187,16,4.0,1161849621 -187,25,3.0,1161872627 -187,29,4.0,1161849623 -187,47,4.0,1161850486 -187,50,4.5,1161849680 -187,70,4.0,1161849108 -187,97,3.0,1180301412 -188,7,4.0,962559461 -191,1,4.0,829759809 -191,6,4.0,829759809 -191,16,4.0,829759809 -191,17,5.0,829759809 -191,21,4.0,829759809 -191,25,5.0,829759809 -191,32,5.0,829759809 -191,34,4.0,829760897 -191,36,5.0,829759809 -191,39,1.0,829760897 -191,47,4.0,829760898 -191,50,5.0,829760898 -191,52,5.0,829759809 -191,58,4.0,829759809 -191,72,4.0,829760897 -191,85,5.0,829759809 -191,94,4.0,829759809 -191,95,3.0,829759809 -191,99,5.0,829759809 -192,10,5.0,835128874 -192,47,3.0,835129031 -193,1,2.0,1435856890 -194,48,2.0,1110316714 -195,6,4.0,974705807 -195,10,4.0,974705349 -195,16,4.0,974705903 -195,25,4.0,1008558673 -195,32,4.0,974709650 -195,36,4.0,1008558588 -195,50,5.0,974705671 -195,70,4.0,979858326 -196,62,4.5,1460954240 -197,50,3.0,947462391 -198,17,3.0,1034136028 -198,24,4.0,1034138101 -198,25,4.0,1034136139 -198,32,5.0,1034135531 -198,34,4.0,1034137414 -198,36,5.0,1034136028 -198,47,5.0,1034136177 -198,50,5.0,1034136008 -198,58,5.0,1034137364 -198,66,1.0,1034135137 -199,6,4.0,1057590506 -199,7,3.0,940544181 -199,11,4.0,940543988 -199,20,2.0,1022161793 -199,21,4.0,940372914 -199,25,5.0,940372513 -199,36,4.0,940372656 -199,47,3.0,940372883 -199,50,4.0,940372419 -199,62,1.0,941295529 -199,81,3.0,940544216 -199,85,1.0,940380890 -200,1,3.5,1229886312 -200,5,4.0,1229876436 -200,10,4.5,1229887453 -200,19,3.5,1229877095 -200,34,2.5,1229877775 -200,39,5.0,1229886362 -200,47,4.0,1229885752 -200,62,3.0,1229886642 -201,1,5.0,939801780 -201,11,4.0,939227085 -201,16,4.0,939227554 -201,24,4.0,941211478 -201,25,5.0,939227591 -201,32,4.0,939833041 -201,34,5.0,939801740 -201,46,1.0,939227085 -202,1,4.0,974923506 -202,2,4.0,974923336 -202,6,5.0,974918622 -202,10,4.0,974919042 -202,11,4.0,975013705 -202,29,4.0,974923156 -202,32,4.0,974925068 -202,44,2.0,974923392 -202,49,3.0,974925453 -202,50,4.0,974924628 -202,58,4.0,975013724 -202,62,3.0,975013683 -203,31,3.5,1390094511 -204,47,5.0,1327182956 -206,1,5.0,850763267 -206,7,4.0,850763310 -206,14,4.0,850763367 -206,17,5.0,850763267 -206,25,5.0,850763267 -206,32,3.0,850763267 -206,36,5.0,850763310 -206,58,5.0,850763367 -206,62,4.0,850763267 -206,95,3.0,850763267 -207,100,3.0,1258548270 -208,10,2.0,940639452 -211,50,4.0,1350912267 -213,1,3.5,1316196157 -214,1,3.0,853937855 -214,17,3.0,853937855 -214,25,2.0,853937855 -214,62,3.0,853937855 -214,66,3.0,853938014 -214,83,3.0,853938185 -215,50,5.0,1260908668 -216,1,3.0,975211713 -216,21,3.0,975212544 -216,34,5.0,975212451 -216,39,3.0,975212451 -216,48,2.0,975211866 -217,1,4.0,955942540 -217,2,2.0,955942327 -217,3,1.0,955944713 -217,6,2.0,955935939 -217,9,3.0,955941157 -217,10,4.0,955940584 -217,12,3.0,955945336 -217,19,1.0,955945611 -217,20,2.0,955941424 -217,22,3.0,955943052 -217,34,4.0,955943345 -217,38,2.0,955945548 -217,44,2.0,955941505 -217,45,2.0,955943999 -217,47,2.0,955942953 -217,50,3.0,955942801 -217,60,2.0,955942221 -217,65,1.0,955945452 -217,87,3.0,955945423 -217,89,4.0,955940533 -217,95,3.0,955941157 -219,1,3.5,1194681084 -219,2,2.5,1194740185 -219,6,3.5,1198783144 -219,10,4.5,1194932162 -219,19,2.5,1194932235 -219,21,3.5,1199581468 -219,32,3.5,1194686177 -219,44,1.5,1195349252 -219,47,3.5,1194686017 -219,50,5.0,1194685882 -219,65,1.0,1198782611 -219,95,1.5,1198782277 -220,1,5.0,1230055565 -220,6,3.5,1230061735 -220,10,4.0,1230055828 -220,32,4.5,1230055680 -220,34,4.5,1230055796 -220,47,3.0,1230055738 -220,50,5.0,1230054959 -221,2,3.5,1111177796 -221,29,4.5,1119984574 -221,32,5.0,1111177792 -221,50,4.5,1118246575 -221,58,4.5,1111176234 -222,2,2.5,1391353926 -222,19,3.5,1391351897 -222,32,3.0,1391353917 -222,47,4.0,1391350444 -222,50,3.0,1391346849 -223,1,3.5,1226209758 -223,34,1.0,1226209953 -223,47,3.0,1226209770 -224,39,5.0,971812919 -225,69,5.0,949111262 -226,1,3.5,1095662606 -226,2,3.0,1095662788 -226,3,3.5,1095662861 -226,10,4.0,1095662675 -226,16,4.5,1095662900 -226,19,3.5,1095662814 -226,22,2.0,1095662268 -226,32,4.0,1095662618 -226,39,3.0,1095662724 -226,44,3.5,1095663137 -226,47,5.0,1160003508 -226,48,3.0,1095663041 -226,60,2.5,1114838860 -226,63,3.5,1096420461 -226,65,3.0,1095712984 -226,69,4.0,1172040506 -226,88,3.0,1097545177 -227,17,3.5,1447210004 -227,32,4.5,1447210296 -227,47,5.0,1447209787 -228,50,4.5,1363222968 -228,65,2.0,1363222731 -229,1,5.0,838144316 -229,5,3.0,838143681 -229,10,4.0,836941790 -229,19,3.0,836942063 -229,21,4.0,836942097 -229,34,5.0,836942064 -229,36,4.0,838147251 -229,45,3.0,838143681 -230,2,2.5,1196305107 -230,34,2.0,1196304670 -230,39,3.0,1196304844 -230,47,3.0,1196304391 -232,1,3.5,1076955621 -232,2,4.0,1085351710 -232,10,3.0,1218167397 -232,39,3.0,1182909940 -232,47,4.5,1241823324 -232,48,2.5,1218169473 -233,1,3.0,1524781249 -233,47,3.5,1524781264 -233,50,4.0,1472587568 -234,1,5.0,1004409347 -234,24,5.0,1004407893 -234,34,3.0,1004409582 -234,42,4.0,1004408338 -234,48,5.0,1004409503 -234,60,4.0,1004409020 -234,65,3.0,1001975877 -235,10,2.0,841422389 -235,11,4.0,841422530 -235,21,4.0,841422461 -235,25,5.0,841422634 -235,32,4.0,841422499 -235,34,4.0,841422446 -235,36,4.0,841422650 -235,39,3.0,841422477 -235,48,3.0,841422615 -235,62,5.0,841422615 -237,32,5.0,1411233598 -237,50,2.5,1410631753 -239,1,4.0,1221158265 -239,6,5.0,1221158564 -239,11,5.0,1221159019 -239,16,4.5,1221159148 -239,20,3.5,1221147484 -239,21,4.0,1221158640 -239,23,3.5,1221147477 -239,32,5.0,1221159452 -239,47,5.0,1221158250 -239,50,4.5,1221158083 -239,95,3.5,1221158503 -240,1,5.0,849122434 -240,2,5.0,849122404 -240,3,4.0,849122858 -240,10,3.0,849122194 -240,16,3.0,849122640 -240,19,5.0,849122301 -240,31,4.0,849122640 -240,34,5.0,849122277 -240,39,5.0,849122324 -240,44,4.0,849122528 -240,46,3.0,849123161 -240,48,3.0,849122594 -240,60,4.0,849122924 -240,95,5.0,849122434 -241,47,4.0,1447540959 -241,50,4.0,1447536935 -242,21,3.0,834073178 -242,32,5.0,834073281 -242,39,3.0,834073303 -242,47,5.0,834073178 -243,10,5.0,837155138 -243,36,4.0,837155409 -243,44,4.0,837155345 -243,48,4.0,837155356 -243,62,5.0,837155394 -244,6,5.0,975092819 -244,10,5.0,975091789 -246,17,5.0,1354126066 -246,28,4.5,1354126962 -246,29,5.0,1353864666 -246,50,4.5,1353869486 -247,1,5.0,1467644119 -247,32,3.0,1467661273 -247,47,4.0,1467644262 -247,50,5.0,1467643861 -248,10,3.5,1534591944 -249,1,4.0,1347317775 -249,2,4.0,1353800871 -249,19,3.5,1354107358 -249,20,3.5,1355366891 -249,32,5.0,1346752537 -249,47,5.0,1346757700 -249,48,3.0,1346752050 -249,50,4.0,1364546154 -249,70,4.0,1355677606 -249,89,3.5,1370785235 -250,45,4.0,1121217522 -251,47,5.0,1470677268 -252,1,4.5,1498284904 -253,29,4.5,1286643322 -254,1,4.5,1180446553 -254,32,4.5,1180446451 -254,47,4.5,1180447550 -254,50,4.5,1180564704 -254,79,2.0,1180443477 -255,34,1.0,1005717433 -256,18,5.0,1447000271 -257,7,1.0,1141625546 -257,16,3.5,1141625441 -259,2,2.0,1146845339 -259,32,5.0,1146845633 -260,29,4.0,1109409486 -260,47,4.5,1109408487 -260,50,5.0,1109410623 -261,16,4.0,1404881583 -261,47,4.0,1404881499 -261,50,4.0,1404881245 -262,4,1.0,840306203 -262,7,3.0,840306238 -262,10,2.0,840305646 -262,17,3.0,840305810 -262,21,3.0,840305713 -262,25,3.0,840305912 -262,26,3.0,840309913 -262,28,5.0,840310932 -262,36,5.0,840305940 -262,48,1.0,840305912 -262,50,3.0,840305713 -262,57,3.0,840306644 -262,85,3.0,840306620 -263,1,4.0,940384199 -263,11,4.0,941590364 -263,17,5.0,940384427 -263,24,3.0,941590705 -263,34,4.0,940384374 -263,36,4.0,940384427 -263,39,4.0,941590574 -264,1,4.0,1136978964 -264,32,1.0,1136979041 -264,48,3.0,1136978326 -265,25,1.0,965318357 -265,32,4.0,965316504 -265,36,4.0,965316145 -265,79,2.0,965318627 -266,1,2.0,945669542 -266,6,4.0,944980835 -266,16,5.0,944981193 -266,17,1.0,944980787 -266,21,4.0,945669287 -266,24,1.0,948586506 -266,32,4.0,945751855 -266,39,2.0,945669397 -266,45,2.0,945669246 -266,50,4.0,944980462 -266,64,1.0,945669246 -266,69,5.0,945669505 -266,95,3.0,946768666 -267,29,4.0,997136080 -267,58,5.0,959807857 -268,29,5.0,940182766 -268,41,4.0,940181204 -268,46,1.0,940180773 -268,47,3.0,940180858 -268,50,5.0,940180799 -269,1,5.0,850865423 -269,3,4.0,850865480 -269,5,3.0,850865480 -269,6,5.0,850865480 -269,9,2.0,850865553 -269,25,5.0,850865423 -269,32,4.0,850865423 -269,63,3.0,850865688 -269,74,5.0,850865586 -269,76,2.0,850865622 -269,79,4.0,850865553 -269,81,3.0,850865720 -269,95,3.0,850865423 -270,1,5.0,853918728 -270,3,3.0,853918793 -270,5,3.0,853918793 -270,6,3.0,853918793 -270,7,3.0,853918793 -270,9,3.0,853918943 -270,14,5.0,853918850 -270,17,4.0,853918728 -270,25,4.0,853918728 -270,32,4.0,853918727 -270,36,5.0,853918793 -270,52,4.0,853918850 -270,58,4.0,853918849 -270,62,5.0,853918728 -270,79,2.0,853918942 -270,95,2.0,853918728 -271,32,4.0,1234302161 -273,1,5.0,835861234 -273,12,1.0,835860711 -273,21,5.0,835860782 -273,32,5.0,835861257 -273,34,4.0,835860782 -274,1,4.0,1171410158 -274,2,3.5,1171934785 -274,6,4.0,1197022122 -274,8,3.0,1172030892 -274,10,4.0,1171428459 -274,12,3.5,1171829597 -274,16,4.5,1171493420 -274,19,4.0,1171934796 -274,20,3.5,1171830022 -274,22,3.5,1171759024 -274,23,3.5,1171829251 -274,24,3.5,1171785219 -274,29,4.0,1238050945 -274,32,4.0,1171409321 -274,34,4.0,1171756208 -274,44,4.0,1171759773 -274,47,4.0,1171172762 -274,48,2.0,1171943719 -274,50,4.0,1171408986 -274,60,3.5,1171827419 -274,62,3.0,1171492326 -274,65,3.0,1171829616 -274,69,4.0,1171932360 -274,70,4.5,1171493880 -274,86,3.5,1171828191 -274,87,3.0,1197272712 -274,88,3.5,1171828649 -274,93,3.5,1172024378 -274,95,2.5,1174524214 -275,1,5.0,1049076484 -275,17,5.0,1049079511 -275,21,5.0,1049076555 -275,25,3.0,1049079449 -275,32,5.0,1049078728 -275,34,5.0,1049078728 -275,39,4.0,1049076538 -275,45,2.0,1049076616 -275,50,4.0,1049078044 -276,1,4.0,858350384 -276,2,4.0,858351189 -276,5,3.0,858350427 -276,7,4.0,858350645 -276,12,4.0,858350757 -276,19,4.0,858351186 -276,62,5.0,858350385 -276,88,5.0,858350816 -277,1,4.0,861812794 -277,32,5.0,861812794 -277,62,2.0,861812794 -277,65,3.0,861812976 -277,95,3.0,861812794 -278,50,5.0,1193753316 -279,1,3.0,1506394495 -279,50,3.5,1506394189 -280,1,4.5,1348435273 -280,7,4.5,1348532595 -280,19,3.0,1348435266 -280,27,4.5,1348531940 -280,39,4.0,1348532012 -280,50,4.5,1348434052 -282,1,4.5,1378495714 -282,6,4.0,1378497085 -282,10,3.5,1378497072 -282,16,4.0,1378491787 -282,25,4.0,1378490653 -282,31,4.5,1378488783 -282,32,4.0,1378497758 -282,50,4.5,1378489751 -283,1,3.0,901227602 -283,39,1.0,901228254 -283,63,3.0,901228285 -283,69,4.0,901228313 -283,70,3.0,901228337 -284,2,4.0,832699451 -284,10,4.0,832699451 -284,15,4.0,832699795 -284,19,4.0,832786975 -284,20,1.0,832699760 -284,21,3.0,832699760 -284,32,5.0,832699723 -284,39,3.0,832786975 -284,44,3.0,832699890 -284,47,3.0,832695534 -284,95,5.0,832699673 -286,6,4.5,1119563039 -286,7,3.5,1119562096 -286,39,3.5,1119562094 -286,47,5.0,1119562911 -286,70,3.0,1119563855 -287,16,2.0,1110229950 -287,29,4.5,1110231627 -287,48,1.0,1110229874 -287,50,3.5,1110230077 -287,62,2.0,1110231040 -288,1,4.5,1054568869 -288,2,2.0,978467973 -288,3,4.0,975691635 -288,5,2.0,978622571 -288,10,3.0,978465794 -288,12,2.0,978622871 -288,13,2.0,978469581 -288,17,3.5,1054569627 -288,21,4.0,975693063 -288,32,5.0,975692000 -288,34,4.0,975693063 -288,43,3.0,976122034 -288,47,3.5,1054568985 -288,48,2.0,978469623 -288,73,3.0,979163671 -288,87,3.0,1174395882 -288,95,1.0,978466299 -289,3,2.5,1143424657 -289,16,4.5,1143424648 -290,1,4.0,975031464 -290,11,4.0,974943069 -290,24,3.0,975032355 -290,25,4.0,974942930 -290,34,4.0,974942304 -290,36,4.0,975031318 -290,50,4.0,975031748 -291,1,4.0,1453051567 -292,1,4.0,1219625000 -292,10,3.5,1219625069 -292,19,1.5,1442099069 -292,32,2.0,1019516667 -292,34,3.5,1219625047 -292,62,3.0,1293563062 -293,1,3.0,1044870886 -294,2,3.0,966634189 -294,3,1.0,966596854 -294,6,3.0,966597476 -294,10,3.0,966634030 -294,12,1.0,966597190 -294,19,2.0,966597233 -294,21,3.0,966595514 -294,39,2.0,966595276 -294,44,3.0,966634266 -294,47,2.0,966597476 -294,50,3.0,966597387 -294,60,1.0,966634119 -294,65,3.0,966597039 -294,69,4.0,966596095 -294,70,4.0,966595946 -295,18,3.5,1320064940 -295,29,5.0,1320064417 -295,50,5.0,1320064969 -295,70,5.0,1320064943 -296,50,5.0,1532993858 -297,6,5.0,900871748 -297,22,2.0,900871989 -297,47,4.0,900871748 -297,50,5.0,900871748 -297,70,2.0,900871989 -297,79,1.0,900872461 -297,95,1.0,900872461 -297,100,1.0,900872461 -298,1,2.0,1447518257 -298,2,0.5,1450452897 -298,16,4.0,1447597782 -298,32,4.0,1450369288 -298,47,4.0,1447518037 -298,50,3.5,1447516828 -299,2,3.0,974620151 -301,10,4.0,1211378532 -301,17,4.0,1211378910 -301,21,4.0,1211378459 -301,36,4.5,1211378851 -301,47,4.5,1211377521 -301,50,4.5,1211377304 -302,3,3.0,854472084 -302,14,4.0,854472122 -302,17,5.0,854472022 -302,21,4.0,854473184 -302,25,5.0,854472022 -302,32,3.0,854472021 -302,36,4.0,854472084 -302,47,5.0,854473264 -302,58,5.0,854472122 -302,95,3.0,854472021 -303,16,2.5,1053302761 -303,32,3.5,1053303318 -304,1,5.0,881428344 -304,2,4.0,891173962 -304,7,4.0,881428370 -304,10,4.0,891173528 -304,11,5.0,891173135 -304,13,4.0,913327439 -304,17,3.0,891174111 -304,21,4.0,896268435 -304,32,4.0,881427991 -304,36,1.0,881428195 -304,46,5.0,911720227 -304,58,4.0,899026578 -304,60,1.0,891174024 -304,62,5.0,881427812 -304,73,4.0,920884193 -304,81,4.0,947050233 -304,86,5.0,880915450 -305,2,3.5,1460136227 -305,6,3.5,1460222104 -305,16,4.5,1460136042 -305,25,3.0,1518197993 -305,32,5.0,1460222250 -305,34,3.0,1460136295 -305,47,4.0,1460135569 -305,50,5.0,1460134079 -305,70,5.0,1494259450 -307,1,4.0,1186160893 -307,2,2.5,1186161482 -307,3,3.5,1186161652 -307,10,2.5,1186161010 -307,16,4.5,1186087665 -307,18,3.0,1186410788 -307,19,4.0,1186084466 -307,21,2.5,1189457112 -307,22,2.0,1186162078 -307,24,2.0,1186162409 -307,27,2.5,1186172544 -307,31,1.5,1186162274 -307,32,3.5,1186160917 -307,39,2.0,1186161053 -307,47,4.0,1186160966 -307,50,4.5,1186160949 -307,62,3.0,1186161410 -307,65,2.5,1186086548 -307,70,3.5,1186161936 -307,72,2.0,1186173251 -307,88,3.5,1186086639 -307,94,3.5,1186086089 -308,2,3.0,1421374418 -308,3,0.5,1421374465 -308,6,1.0,1421374400 -308,19,1.0,1421374425 -308,21,1.0,1421374404 -308,25,1.0,1421374411 -308,36,1.0,1421374415 -308,39,3.0,1421374380 -308,48,4.0,1421374475 -308,50,2.5,1421373873 -309,50,4.5,1126451972 -310,16,1.0,1078647827 -310,58,4.5,1078647847 -311,16,2.5,1057854247 -311,40,3.5,1057854804 -312,6,4.0,1043177752 -312,16,4.0,1043177752 -312,32,4.0,1043176620 -313,6,3.0,1030556299 -313,10,4.0,1030556439 -313,29,4.0,1030474666 -313,32,4.0,1030475177 -313,39,4.0,1030555731 -313,47,5.0,1030476272 -313,50,5.0,1030476169 -313,70,4.0,1030475884 -313,76,1.0,1030475081 -314,1,3.0,834398280 -314,6,3.0,842432711 -314,7,4.0,834241492 -314,10,3.0,834241870 -314,11,5.0,834241810 -314,15,2.0,834428676 -314,17,4.0,834398442 -314,21,3.0,834398026 -314,22,3.0,834398622 -314,23,3.0,834241586 -314,26,4.0,834398961 -314,31,3.0,834241586 -314,32,3.0,839857723 -314,39,3.0,834398364 -314,41,3.0,834398878 -314,44,3.0,834398518 -314,47,1.0,834398302 -314,50,4.0,847175449 -314,52,3.0,845285951 -314,60,3.0,834790837 -314,62,4.0,836234560 -314,95,2.0,834241545 -314,100,3.0,847175533 -316,45,4.5,1111493212 -316,48,1.5,1111493120 -317,6,5.0,1430532917 -317,16,5.0,1430598211 -317,32,4.0,1430361493 -317,47,5.0,1430361508 -317,50,5.0,1430361374 -317,97,3.0,1430598706 -318,2,3.5,1270753195 -318,6,4.0,1347180277 -318,18,3.0,1426353075 -318,19,3.5,1262805276 -318,29,3.5,1413627585 -318,32,4.5,1263848497 -318,47,4.0,1433139531 -318,48,3.5,1275844835 -318,72,4.5,1417278807 -318,97,4.5,1419694783 -321,2,5.0,843212595 -321,3,3.0,843212762 -321,5,3.0,843212719 -321,19,3.0,843212522 -321,21,3.0,843212550 -321,24,4.0,843212762 -322,1,3.5,1217676206 -322,2,3.0,1217676495 -322,10,4.0,1217676313 -322,11,4.0,1217676534 -322,47,4.0,1217676246 -322,50,4.0,1217675827 -323,1,3.5,1422640363 -323,2,4.0,1422640110 -323,17,3.5,1422640288 -323,19,2.5,1422640116 -323,22,3.0,1422640551 -323,29,3.5,1422640570 -323,32,2.5,1422640375 -323,34,2.5,1422640415 -323,36,3.5,1422640290 -323,48,3.5,1422640448 -323,50,4.5,1422640061 -323,60,3.0,1422640654 -323,62,2.5,1422640114 -325,6,4.0,1039398204 -325,19,2.0,1039395714 -325,22,3.0,1039397193 -325,29,4.0,1039398353 -325,30,4.0,1039399501 -325,32,4.0,1039396494 -325,58,4.0,1039395583 -325,80,4.0,1039397611 -326,20,3.5,1322250792 -326,47,4.0,1419880411 -326,50,2.0,1322867550 -326,97,3.5,1443983581 -327,50,4.5,1234788095 -328,1,5.0,1494210665 -328,13,3.0,1494212082 -328,34,5.0,1494210549 -328,39,2.0,1494210780 -328,47,4.5,1494211964 -328,50,4.0,1494206971 -329,50,2.0,1523468332 -330,1,4.0,1285904910 -330,2,1.5,1285905277 -330,3,3.0,1285905545 -330,16,3.5,1285905440 -330,25,4.5,1285905238 -330,29,3.0,1285903982 -330,32,5.0,1285904958 -330,34,3.0,1285905096 -330,47,3.0,1285904519 -330,50,5.0,1285904637 -330,58,5.0,1285904454 -330,62,3.0,1285905259 -331,10,3.0,1537157573 -331,34,2.0,1537157568 -331,50,5.0,1537157516 -332,1,4.0,1352672340 -332,16,3.5,1352673555 -332,32,2.5,1352671273 -332,47,4.0,1352672211 -332,50,4.0,1352671786 -334,1,3.5,1225477466 -334,10,3.5,1234629982 -334,47,2.0,1234629901 -335,50,5.0,1261542414 -336,1,4.0,1122227329 -336,6,4.0,1122227549 -336,47,4.5,1122227343 -336,50,5.0,1120568496 -336,70,4.0,1120568169 -337,1,4.0,860255715 -337,3,4.0,860255784 -337,5,4.0,860255784 -337,6,5.0,860255784 -337,7,3.0,860255784 -337,9,5.0,860255929 -337,12,3.0,860255992 -337,14,3.0,860255866 -337,16,4.0,860256120 -337,17,4.0,860255716 -337,24,5.0,860256119 -337,25,4.0,860255716 -337,27,5.0,860256159 -337,31,5.0,860256120 -337,32,4.0,860255715 -337,36,4.0,860255783 -337,41,3.0,860255992 -337,47,5.0,860256118 -337,50,3.0,860256158 -337,62,3.0,860255716 -337,65,3.0,860255929 -337,66,4.0,860255992 -337,74,5.0,860255992 -337,76,5.0,860256043 -337,79,4.0,860255867 -337,95,5.0,860255716 -337,100,3.0,860255992 -338,50,4.5,1530148314 -339,1,4.0,1460183470 -339,6,4.0,1460345729 -339,16,4.5,1460794139 -339,32,3.0,1460518918 -339,47,4.5,1460186377 -340,10,4.0,848666415 -341,1,5.0,1439750939 -342,24,3.0,1042822170 -343,50,4.5,1202061253 -345,11,2.5,1342828883 -345,32,3.5,1342827269 -346,17,4.0,1155066710 -346,47,3.5,1172693170 -346,50,4.0,1125857414 -347,1,5.0,847645986 -347,2,3.0,847645925 -347,10,4.0,847645690 -347,11,4.0,847645925 -347,19,4.0,847645789 -347,32,3.0,847645812 -347,34,4.0,847645761 -347,39,4.0,847645812 -347,47,3.0,847645738 -348,50,5.0,1378850183 -349,10,4.0,834750699 -349,34,5.0,834750842 -350,1,4.0,864940931 -350,7,3.0,864941017 -350,12,3.0,864941118 -350,17,2.0,864940932 -350,32,4.0,864940930 -350,65,3.0,864941118 -350,95,3.0,864940931 -351,12,5.0,1325978665 -351,47,4.5,1326027964 -352,47,5.0,1493931953 -352,50,5.0,1493931911 -353,1,5.0,831939685 -353,2,4.0,831939875 -353,5,3.0,831939914 -353,6,4.0,831939947 -353,9,3.0,831940313 -353,10,3.0,831939669 -353,11,4.0,831939792 -353,19,4.0,831939775 -353,22,5.0,831939991 -353,25,4.0,831939875 -353,32,5.0,831939731 -353,34,5.0,831939731 -353,39,1.0,831939750 -353,47,5.0,831939731 -353,69,3.0,831940600 -353,70,4.0,831940127 -354,10,3.5,1200870391 -354,32,4.0,1200870157 -354,34,3.5,1200870571 -354,44,3.5,1200865357 -354,47,4.0,1200870867 -354,50,4.5,1200869832 -354,70,4.5,1200953640 -355,60,3.0,974966167 -356,21,3.5,1229140823 -356,50,3.5,1229142733 -356,62,4.5,1229142896 -357,1,5.0,1348610184 -357,2,3.0,1348611277 -357,11,3.5,1348612270 -357,16,3.5,1348610125 -357,17,5.0,1348612178 -357,19,2.0,1348612202 -357,34,4.0,1348611388 -357,36,3.5,1348610447 -357,39,4.5,1348612128 -357,50,5.0,1348610163 -357,62,3.0,1348612191 -359,1,4.0,1198112023 -359,2,3.5,1198112829 -359,34,2.5,1198112136 -359,58,4.5,1198113196 -361,10,3.5,1204045505 -361,70,2.0,1204046026 -362,6,4.0,1530637587 -362,16,4.0,1530640994 -362,31,4.0,1530642798 -362,47,4.5,1530640109 -362,50,3.5,1530637009 -362,70,4.0,1530637651 -363,50,4.0,1502584540 -363,95,3.0,1502584443 -364,1,5.0,869443366 -364,9,3.0,869443802 -364,32,3.0,869443366 -364,95,5.0,869443367 -367,1,5.0,997811550 -367,34,3.0,997811126 -367,39,4.0,997812337 -368,3,3.0,971273951 -368,6,4.0,971275527 -368,10,3.0,971276726 -368,16,4.0,971275668 -368,21,3.0,975828964 -368,22,3.0,971276062 -368,42,2.0,975828507 -368,47,4.0,971275475 -368,50,4.0,971275320 -368,70,3.0,971273705 -368,79,2.0,971276901 -368,81,3.0,975828421 -368,95,2.0,971277081 -369,19,4.0,1237081519 -369,47,3.5,1237083091 -370,39,3.5,1159162930 -370,47,3.5,1159162973 -370,50,4.5,1159161956 -371,48,4.5,1407691602 -372,1,3.0,874416443 -372,6,5.0,874415865 -372,8,2.0,874415441 -372,10,3.0,874416808 -372,11,4.0,874415285 -372,14,4.0,874416949 -372,16,4.0,874415966 -372,21,3.0,874416570 -372,25,4.0,874415164 -372,31,2.0,874416808 -372,32,4.0,874415441 -372,34,4.0,874415934 -372,36,3.0,874414994 -372,39,3.0,874417086 -372,41,4.0,874415991 -372,47,5.0,874417240 -372,48,2.0,874416616 -372,50,5.0,874414678 -372,52,4.0,874416234 -372,62,3.0,874415902 -372,70,3.0,874415199 -372,71,2.0,874416544 -372,81,2.0,874416280 -373,1,3.0,846830172 -373,2,3.0,846830111 -373,6,5.0,846830247 -373,11,2.0,846830094 -373,16,4.0,846830296 -373,19,1.0,846830035 -373,21,1.0,846830035 -373,22,3.0,846830333 -373,25,5.0,846830207 -373,32,4.0,846830055 -373,34,4.0,846830015 -373,44,1.0,846830190 -373,47,5.0,846829997 -373,50,5.0,846830055 -373,60,3.0,846830521 -373,95,2.0,846830128 -375,32,5.0,1225314381 -376,10,4.5,1364994181 -376,19,2.0,1364994453 -376,32,5.0,1364994049 -376,95,3.5,1364994336 -377,19,2.0,1340342704 -377,32,4.5,1340342216 -377,34,4.0,1340347654 -378,1,4.5,1445347576 -378,36,4.0,1445347078 -379,19,2.0,847397556 -379,21,2.0,847397556 -379,34,5.0,847397499 -379,47,3.0,847397459 -380,1,5.0,1493420345 -380,2,5.0,1493420295 -380,6,5.0,1494278663 -380,10,5.0,1493419787 -380,12,4.0,1493668065 -380,16,3.0,1494803499 -380,18,4.0,1494278868 -380,19,5.0,1493667522 -380,21,2.0,1493422718 -380,32,5.0,1493494899 -380,34,4.0,1493420018 -380,44,4.0,1493667678 -380,47,5.0,1493494893 -380,48,3.0,1493423481 -380,50,4.0,1493420347 -380,60,4.0,1494035896 -380,70,5.0,1493423196 -380,76,3.0,1494278971 -380,95,3.0,1493420301 -381,1,3.5,1164383653 -381,2,4.0,1166971855 -381,19,2.5,1165975855 -381,32,3.5,1200824503 -381,34,3.5,1164877286 -381,47,2.0,1166980226 -381,48,3.0,1165976148 -381,50,4.5,1168576731 -381,62,4.5,1263494357 -382,1,4.5,1515162628 -382,50,5.0,1515160611 -384,60,4.0,994038823 -385,1,4.0,834691642 -385,6,3.0,840648313 -385,10,3.0,834691622 -385,14,3.0,834692097 -385,16,3.0,834691914 -385,21,5.0,835118614 -385,25,3.0,834691845 -385,32,4.0,838323677 -385,45,3.0,837949548 -385,50,3.0,834691694 -385,62,3.0,836844635 -385,68,3.0,865024733 -385,85,3.0,844794767 -385,94,4.0,847137503 -385,95,3.0,834691768 -386,6,3.0,842613783 -386,10,3.0,842610246 -386,16,4.0,842613849 -386,20,2.0,842614033 -386,21,4.0,842610304 -386,22,3.0,842613867 -386,25,2.0,842613763 -386,32,3.0,842610360 -386,34,4.0,842610286 -386,39,3.0,842610325 -386,44,2.0,842613763 -386,45,3.0,842613904 -386,47,3.0,842610286 -386,50,3.0,842610344 -387,10,3.5,1094938515 -387,11,2.5,1117415000 -387,29,3.5,1094875272 -387,31,2.5,1094939853 -387,32,3.5,1094876449 -387,39,2.5,1095040527 -387,44,1.0,1182720368 -387,45,2.5,1094877493 -387,47,4.5,1094876919 -387,50,4.5,1094876246 -387,63,2.0,1095042100 -387,70,3.5,1183536308 -387,71,1.5,1150877466 -387,97,4.5,1199607279 -389,1,5.0,857934174 -389,5,4.0,857934242 -389,6,5.0,857934242 -389,9,3.0,857934552 -389,17,4.0,857934174 -389,25,4.0,857934175 -389,32,4.0,857934174 -389,62,5.0,857934174 -389,65,4.0,857934553 -389,95,4.0,857934174 -391,1,3.0,1032388077 -391,6,4.0,1030944221 -391,10,3.0,1030944309 -391,25,5.0,1030827466 -391,29,5.0,1030826859 -391,32,4.0,1030826859 -391,34,3.0,1032389962 -391,47,3.0,1030944178 -391,50,4.0,1030827636 -391,58,4.0,1030827343 -391,70,1.0,1030944454 -394,34,3.0,838994398 -394,50,5.0,838994626 -395,2,3.0,841503884 -395,10,3.0,841503555 -395,34,3.0,841503656 -395,48,2.0,841504003 -395,60,3.0,841505041 -395,95,3.0,841503940 -396,1,5.0,1111688626 -396,44,2.0,1111688564 -396,45,3.0,1111688518 -396,50,4.0,1111688630 -398,32,5.0,1311207513 -399,1,4.0,1167220428 -400,6,5.0,1498870480 -400,47,5.0,1498870391 -400,50,5.0,1498870285 -401,1,3.5,1510450550 -401,48,2.0,1514347122 -402,5,3.0,849598135 -402,9,3.0,849598259 -402,25,3.0,849598104 -402,36,3.0,849598135 -402,47,3.0,849654599 -402,52,3.0,849598217 -402,61,5.0,849598453 -402,95,4.0,849598075 -403,47,5.0,1225243620 -404,11,4.0,838376006 -404,17,4.0,838376049 -404,21,3.0,838375894 -404,34,4.0,838375864 -404,36,3.0,838376195 -404,47,3.0,838375864 -404,62,4.0,838376141 -405,16,4.0,1295916006 -405,25,4.0,1299377952 -405,32,4.5,1295910764 -405,70,4.0,1295917097 -408,10,4.5,1467913128 -408,19,3.5,1469521999 -408,44,3.5,1484264475 -409,39,4.0,968979119 -410,3,4.0,990910830 -410,11,3.0,990910298 -410,21,3.0,990910575 -410,52,4.0,990910213 -410,58,4.0,990807791 -411,1,5.0,835532155 -411,2,4.0,835532398 -411,4,2.0,835533021 -411,7,3.0,835533471 -411,10,3.0,835532065 -411,11,4.0,835532370 -411,21,4.0,835532191 -411,22,3.0,835532644 -411,31,4.0,835532539 -411,34,4.0,835532191 -411,46,2.0,835533337 -411,47,4.0,835532191 -411,50,3.0,835532221 -411,52,2.0,835533198 -411,62,5.0,835532559 -412,1,2.0,939114353 -412,17,5.0,939136944 -412,34,5.0,939114265 -412,47,3.0,939114573 -413,16,5.0,1484440002 -414,1,4.0,961438127 -414,2,3.0,961594981 -414,3,4.0,961439278 -414,5,2.0,961437647 -414,6,3.0,961515642 -414,7,3.0,961439170 -414,8,3.0,961594849 -414,10,3.0,961515863 -414,11,5.0,1052148205 -414,15,2.0,961514611 -414,16,3.0,961517557 -414,17,4.0,961513829 -414,18,3.0,961682128 -414,21,4.0,961438199 -414,22,3.0,961518227 -414,23,2.0,961682276 -414,24,3.0,961436964 -414,25,3.0,961517140 -414,27,2.0,961518812 -414,31,3.0,961518520 -414,32,5.0,961514667 -414,34,5.0,961438127 -414,36,3.0,961516989 -414,39,4.0,961438265 -414,42,2.0,961515844 -414,44,2.0,961516249 -414,45,3.0,961438476 -414,46,2.0,961514407 -414,47,4.0,961681857 -414,48,3.0,961437741 -414,50,5.0,961681714 -414,52,3.0,961438413 -414,54,1.0,1027521065 -414,57,3.0,961517912 -414,62,4.0,961517885 -414,65,2.0,961439623 -414,71,2.0,961515844 -414,72,4.0,961438532 -414,75,1.0,1027521022 -414,78,3.0,961518581 -414,81,2.0,961514568 -414,86,3.0,961437096 -414,88,2.0,961439433 -414,89,3.0,961515928 -414,92,2.0,961518731 -414,94,5.0,961517421 -414,95,2.0,961515958 -415,47,4.0,1382470301 -415,50,4.5,1382469955 -417,47,5.0,1532134167 -418,50,4.5,1461865740 -419,10,3.5,1321659128 -419,47,4.0,1321659049 -419,50,4.0,1321854706 -420,1,4.0,1218207191 -420,32,3.5,1218047864 -420,73,4.0,1218036406 -421,73,4.0,1311494239 -422,1,4.0,986173307 -424,32,3.5,1457901858 -424,47,5.0,1457842442 -424,50,5.0,1457842255 -425,2,3.0,1085477682 -425,6,4.0,1085477536 -425,10,3.0,1085477406 -425,32,4.0,1085477320 -425,34,3.5,1114173653 -425,36,3.0,1085477524 -425,47,4.5,1085477391 -425,50,4.5,1085477355 -425,58,3.5,1085490369 -425,70,4.0,1085490625 -425,95,2.5,1085477510 -426,2,4.5,1451080838 -426,31,5.0,1451080848 -426,47,0.5,1451081886 -427,16,4.0,1053068794 -427,58,2.0,1053071377 -428,9,2.0,1111524871 -428,19,1.0,1111523373 -428,22,2.5,1111487500 -428,44,2.0,1111623964 -428,69,3.5,1111524882 -428,70,3.0,1111524672 -428,95,2.0,1111524678 -429,22,4.0,828124615 -429,48,4.0,828124616 -429,60,4.0,828124616 -430,34,3.0,963889406 -432,1,2.5,1316391457 -432,2,4.0,1316391739 -432,19,2.0,1316391760 -432,25,4.5,1315243647 -432,36,3.5,1316391708 -432,47,4.0,1315244565 -432,50,3.0,1315243055 -433,47,3.0,1506823900 -433,50,5.0,1506823884 -434,1,4.0,1270604402 -434,2,2.5,1271039378 -434,6,4.0,1270603905 -434,10,3.5,1270606683 -434,32,3.5,1270604323 -434,34,3.5,1270604933 -434,39,3.0,1270606827 -434,47,4.0,1270603499 -434,95,2.0,1270606732 -435,48,4.0,1366675847 -435,50,4.0,1366676016 -436,1,4.0,833529571 -436,2,4.0,833529751 -436,8,3.0,833530760 -436,34,5.0,833529621 -436,48,4.0,833529821 -436,60,3.0,833530067 -437,5,2.0,859721015 -437,6,5.0,859721015 -437,9,3.0,859721103 -437,14,3.0,859721080 -437,16,5.0,859721743 -437,17,4.0,859720978 -437,25,4.0,859720978 -437,31,4.0,859721815 -437,32,4.0,859720977 -437,36,4.0,859721015 -437,43,3.0,859721556 -437,47,4.0,859721361 -437,50,4.0,859721362 -437,52,4.0,859721046 -437,62,3.0,859720978 -437,78,3.0,859721693 -437,79,2.0,859721080 -437,81,3.0,859721790 -437,86,3.0,859721170 -437,95,1.0,859720977 -438,1,4.5,1105650469 -438,6,5.0,1105664272 -438,10,4.0,1105664201 -438,11,4.0,1105668426 -438,19,3.0,1105666629 -438,21,3.5,1105666251 -438,32,4.0,1105666143 -438,44,3.0,1105667144 -438,62,4.0,1105649800 -438,70,3.0,1105762813 -438,71,2.0,1105664825 -438,95,3.5,1105666265 -440,29,4.5,1237569069 -441,10,4.5,1449070452 -441,47,4.5,1451166486 -443,1,4.0,1501722482 -444,6,4.0,832677558 -444,16,4.0,832678415 -444,21,4.0,832670075 -444,47,4.0,832670399 -444,50,5.0,832670838 -444,58,5.0,832679295 -444,97,5.0,839310140 -444,100,3.0,839310140 -445,17,3.5,1454621917 -445,32,5.0,1454622049 -445,47,4.5,1454622061 -446,2,3.0,843839379 -446,10,3.0,843839232 -446,11,4.0,843839379 -446,16,3.0,843839544 -446,17,5.0,843839379 -446,21,3.0,843839290 -446,25,3.0,843839504 -446,32,4.0,843839327 -446,34,5.0,843839272 -446,39,3.0,843839306 -446,45,3.0,843839593 -446,47,4.0,843839250 -446,50,4.0,843839306 -446,95,4.0,843839401 -447,2,5.0,836961067 -447,10,3.0,836960825 -447,31,4.0,836961195 -447,44,5.0,836961153 -447,48,3.0,836961178 -448,1,5.0,1019126661 -448,2,3.0,1019125424 -448,3,3.0,1019128536 -448,5,3.0,1019128415 -448,10,4.0,1019124400 -448,12,2.0,1019563753 -448,16,5.0,1019138531 -448,19,2.0,1019132168 -448,20,3.0,1019124922 -448,21,2.0,1019124231 -448,32,2.0,1019132949 -448,38,3.0,1019227804 -448,47,4.0,1019132386 -448,50,4.0,1064741727 -448,65,1.0,1028111080 -448,66,2.0,1019133230 -448,95,2.0,1019124644 -449,32,4.0,1053200119 -449,50,4.5,1053199959 -450,70,2.0,974705218 -451,1,5.0,854089165 -451,5,3.0,854089243 -451,6,4.0,854089242 -451,7,3.0,854089243 -451,17,5.0,854089172 -451,25,5.0,854089175 -451,32,5.0,854089163 -451,94,4.0,854089769 -451,95,4.0,854089167 -452,10,4.0,1013395144 -452,44,4.0,1019580991 -452,47,5.0,1019581177 -452,69,5.0,1019585277 -452,70,4.0,1013397998 -453,1,5.0,1005966797 -453,10,2.0,972621985 -453,16,3.0,972622830 -453,21,3.0,972621787 -453,32,5.0,972621444 -453,34,4.0,972622496 -453,47,5.0,972622637 -453,50,5.0,972621262 -453,70,4.0,972622055 -454,82,3.5,1279476639 -455,11,4.0,836436201 -455,34,4.0,836436023 -455,50,3.0,836436049 -455,62,4.0,836436499 -456,1,5.0,856883308 -456,3,3.0,856883349 -456,5,3.0,856883349 -456,9,4.0,856883450 -456,32,3.0,856883308 -456,64,3.0,856883540 -456,65,2.0,856883450 -456,74,4.0,856883468 -456,79,2.0,856883417 -457,34,2.0,993610564 -458,2,4.0,845653124 -458,5,3.0,845652992 -458,10,4.0,845651737 -458,21,5.0,845653030 -458,27,5.0,845652968 -458,39,4.0,845653086 -458,48,3.0,845652560 -458,62,5.0,845652560 -458,95,5.0,845652537 -460,1,4.5,1359177505 -462,1,1.5,1154037653 -462,10,3.0,1269929071 -462,16,3.5,1123893685 -462,21,4.0,1121923492 -462,25,3.0,1154037817 -462,32,3.5,1121921239 -462,36,3.5,1123891148 -462,47,4.0,1154037716 -462,50,3.5,1154037688 -462,52,3.0,1123890939 -464,9,2.5,1287400084 -464,16,4.5,1287400356 -464,20,4.0,1275548142 -464,23,3.0,1275548130 -464,25,4.0,1287400227 -464,31,4.0,1287400817 -464,32,3.5,1275548714 -464,47,5.0,1275549635 -465,95,3.0,959896430 -467,41,4.0,919671922 -467,58,5.0,919672099 -468,1,4.0,831400444 -468,32,4.0,831400500 -468,39,5.0,831400519 -468,47,5.0,831400545 -469,1,4.0,965336888 -469,6,3.0,965336673 -469,10,2.0,965334356 -469,11,3.0,965425831 -469,29,4.0,965335401 -469,32,5.0,965335350 -469,36,3.0,965846167 -469,39,2.0,965425327 -469,44,1.0,965335037 -469,45,4.0,965425742 -469,47,4.0,965336711 -469,50,4.0,965336630 -469,70,2.0,965334011 -469,89,4.0,965334954 -470,1,4.0,849224825 -470,2,3.0,849224778 -470,3,3.0,849370396 -470,5,3.0,849370453 -470,6,3.0,849843318 -470,7,3.0,849370453 -470,10,3.0,849075144 -470,14,4.0,849843318 -470,19,3.0,849075545 -470,21,3.0,849075545 -470,26,3.0,849843245 -470,32,3.0,849075682 -470,34,4.0,849075432 -470,36,4.0,849370395 -470,39,3.0,849075682 -470,41,3.0,849843245 -470,43,3.0,849370292 -470,47,3.0,849075299 -470,50,3.0,849075545 -470,62,3.0,849224778 -470,95,3.0,849843245 -471,1,5.0,1496671820 -472,50,5.0,1345843346 -473,60,2.0,1169351160 -474,1,4.0,978575760 -474,2,3.0,1046886814 -474,5,1.5,1053021982 -474,6,3.0,1047054565 -474,7,3.0,978576381 -474,11,2.5,1053021437 -474,14,3.0,1120827247 -474,16,4.0,1088426731 -474,17,5.0,974668666 -474,21,4.0,1119232784 -474,22,3.0,1046896006 -474,24,3.0,1060105861 -474,25,3.5,1127143175 -474,26,3.5,1136942664 -474,27,2.0,1069686414 -474,28,4.5,1165540007 -474,29,3.5,1053021379 -474,31,3.0,1060105936 -474,32,4.0,1081177409 -474,34,4.5,1081177349 -474,36,5.0,979180034 -474,38,1.0,1089387538 -474,39,3.5,1129579520 -474,41,3.5,1089386983 -474,43,3.5,1081177921 -474,45,4.0,1014924369 -474,46,3.0,1081178156 -474,47,4.0,979180247 -474,50,4.0,979179872 -474,52,4.0,1004131663 -474,57,3.0,1060105374 -474,58,3.5,1126533613 -474,62,2.5,1053021366 -474,74,3.0,974669217 -474,82,2.5,1121262740 -474,92,3.5,1087832141 -474,96,3.5,1053020997 -474,100,2.0,1048710159 -475,2,4.5,1498031744 -475,19,4.0,1498031776 -476,1,4.0,835021447 -476,2,4.0,835021693 -476,10,3.0,835021420 -476,11,3.0,835021635 -476,13,3.0,835022487 -476,32,4.0,835021513 -476,34,4.0,835021494 -476,45,3.0,835022455 -476,48,4.0,835022192 -476,73,4.0,835022035 -477,1,4.0,1200939636 -477,2,4.0,1200939962 -477,3,3.0,1200941177 -477,19,3.0,1200939977 -477,24,4.0,1201159341 -477,32,4.5,1200939654 -477,34,4.0,1200939719 -477,47,4.0,1200939679 -477,66,0.5,1201158662 -477,76,4.0,1201158833 -479,24,3.0,1039362593 -479,31,3.0,1039362413 -479,45,3.0,1039367502 -479,82,5.0,1039362593 -480,1,3.0,1179178004 -480,2,3.0,1179178191 -480,3,2.5,1179178556 -480,6,4.0,1179162163 -480,10,4.0,1179177922 -480,16,4.0,1179161099 -480,19,1.5,1179178233 -480,21,2.5,1179161927 -480,32,4.0,1179177844 -480,34,4.0,1179178044 -480,39,2.5,1179177930 -480,47,4.5,1179177656 -480,50,3.5,1179160287 -480,60,2.0,1179159595 -480,62,3.5,1179178063 -480,69,2.0,1179161861 -480,95,1.0,1179177869 -482,2,4.5,1105395956 -482,16,2.0,1105396054 -482,50,4.0,1105396885 -482,62,5.0,1105396550 -483,1,4.0,1178293130 -483,2,4.0,1178293652 -483,5,2.5,1327277284 -483,10,2.0,1178293444 -483,16,4.0,1178293782 -483,18,4.0,1215897224 -483,19,3.5,1181494938 -483,23,3.0,1215897538 -483,24,2.5,1327277916 -483,26,4.0,1215898801 -483,29,5.0,1181495581 -483,31,2.0,1181495275 -483,32,5.0,1178213437 -483,47,3.5,1178215187 -483,48,3.0,1181496689 -483,50,4.5,1204405320 -483,81,3.5,1215898195 -484,1,4.5,1342295949 -484,2,2.5,1342296219 -484,39,4.5,1342296074 -484,47,4.0,1342228947 -484,48,4.5,1342296910 -484,63,4.0,1342228081 -484,69,4.5,1342300148 -484,72,4.0,1342369324 -485,10,4.0,837943350 -486,6,5.0,839537263 -486,10,4.0,839537237 -486,21,4.0,839537017 -486,32,5.0,839537271 -486,39,3.0,839537186 -486,44,4.0,839537298 -486,47,3.0,839537220 -486,86,5.0,839537298 -486,95,3.0,839537249 -488,1,4.5,1112382025 -488,17,5.0,1112382176 -488,26,2.5,1112382126 -488,34,3.0,1112382292 -488,36,4.5,1112382199 -488,73,2.5,1112382455 -489,2,2.5,1333101570 -489,5,2.0,1385823772 -489,7,4.0,1334587547 -489,10,2.5,1333101410 -489,11,1.5,1334170648 -489,17,4.5,1332773255 -489,18,2.0,1333234401 -489,19,2.0,1333101593 -489,21,2.5,1333101507 -489,22,3.5,1333022826 -489,32,3.5,1333023575 -489,34,2.0,1333101391 -489,39,3.5,1332772964 -489,46,3.5,1333658594 -489,47,3.0,1333101316 -489,48,4.0,1334587634 -489,70,2.0,1385823826 -489,95,1.5,1333101502 -490,1,3.5,1328229305 -490,5,0.5,1324370305 -492,1,4.0,863975949 -492,3,4.0,863976005 -492,5,3.0,863976004 -492,6,3.0,863976004 -492,7,3.0,863976055 -492,9,5.0,863976201 -492,12,3.0,863976249 -492,14,3.0,863976101 -492,32,3.0,863975946 -492,36,4.0,863976004 -492,52,3.0,863976101 -492,61,4.0,863976447 -492,62,4.0,863975954 -492,63,3.0,863976526 -492,64,3.0,863976525 -492,65,5.0,863976248 -492,66,3.0,863976249 -492,74,4.0,863976249 -492,75,3.0,863976878 -492,76,5.0,863976409 -492,79,4.0,863976101 -492,88,5.0,863976409 -492,95,3.0,863975954 -492,100,3.0,863976249 -493,6,4.0,1001562846 -495,19,4.5,1458636447 -495,50,4.0,1458634563 -497,2,2.5,1429127190 -497,19,4.0,1429127195 -497,50,3.0,1429127291 -498,7,4.0,839197378 -498,10,3.0,839197307 -498,34,3.0,839197341 -498,47,3.0,839197341 -498,62,5.0,839197444 -500,1,4.0,1005527755 -500,11,1.0,1005528017 -500,39,1.0,1005527926 -501,2,3.0,844973419 -501,3,5.0,844973817 -501,5,3.0,844973535 -501,6,3.0,844973487 -501,7,4.0,844973817 -501,8,3.0,844974090 -501,10,4.0,844973340 -501,11,3.0,844973340 -501,15,1.0,844973980 -501,16,3.0,844973535 -501,17,3.0,844973419 -501,18,5.0,844974006 -501,20,1.0,844974037 -501,21,3.0,844973340 -501,25,2.0,844973487 -501,27,3.0,844973931 -501,32,3.0,844973340 -501,36,3.0,844973487 -501,45,3.0,844973535 -501,50,3.0,844973340 -501,61,3.0,844973980 -501,62,3.0,844973487 -501,73,2.0,844974214 -501,86,3.0,844974037 -501,93,3.0,844974090 -501,95,4.0,844973419 -502,45,3.5,1111757345 -504,1,4.0,1063644695 -504,22,3.5,1063644469 -505,47,5.0,1298780253 -506,27,3.0,1424487740 -506,39,5.0,1424487595 -507,34,5.0,838964683 -507,39,3.0,838964728 -507,47,3.0,838964683 -509,1,4.0,1435992343 -509,5,1.5,1435992893 -509,13,3.0,1436101243 -509,17,3.0,1435992515 -509,19,4.0,1436027225 -509,28,3.5,1436031727 -509,32,4.0,1435998135 -509,34,3.5,1435998986 -509,39,4.0,1435992749 -509,48,3.5,1436000115 -510,7,1.0,1141158812 -510,16,1.0,1141158760 -510,50,4.0,1141160526 -512,2,3.0,841449636 -512,32,5.0,841449445 -512,39,4.0,841449426 -512,47,5.0,841449359 -512,50,5.0,841449402 -513,7,3.0,1159980407 -513,32,4.0,1159980466 -514,1,4.0,1533872400 -514,11,4.0,1533949297 -514,16,3.5,1533872553 -514,22,3.0,1533947171 -514,34,4.0,1533872519 -514,44,2.5,1533945558 -514,47,4.0,1533872325 -514,62,4.0,1533949200 -515,50,4.5,1513678307 -517,1,4.0,1487954343 -517,2,3.0,1487954340 -517,10,0.5,1487957717 -517,17,0.5,1487953834 -517,34,5.0,1487954303 -517,47,2.0,1487958109 -518,24,2.5,1056907643 -518,31,1.0,1056907629 -520,10,4.0,1326609139 -520,19,1.0,1326609359 -520,32,3.5,1326608236 -521,5,3.0,852713143 -521,6,4.0,852713143 -521,7,3.0,852713143 -521,14,4.0,852713185 -521,18,4.0,852713356 -521,25,5.0,852713083 -521,29,3.0,852713417 -521,32,3.0,852713082 -521,36,4.0,852713143 -521,43,4.0,852713446 -521,52,4.0,852713185 -521,58,4.0,852713185 -521,79,4.0,852713251 -521,81,3.0,852713417 -521,86,4.0,852713380 -521,95,3.0,852713082 -522,1,3.0,1253344674 -522,10,4.5,1253344761 -522,50,4.5,1253430096 -522,62,3.0,1253345769 -523,2,4.5,1503126180 -524,1,4.0,851608466 -524,6,4.0,852404399 -524,10,4.0,851608818 -524,12,1.0,852404800 -524,19,3.0,851609256 -524,20,2.0,851609083 -524,21,3.0,852404913 -524,23,2.0,851608986 -524,25,3.0,851608466 -524,32,3.0,851608466 -524,47,5.0,851608960 -524,50,2.0,851608781 -524,76,2.0,852404800 -524,86,3.0,852404550 -524,95,4.0,851608466 -525,1,4.0,1476475973 -525,2,3.5,1476480324 -525,34,3.0,1476480775 -525,39,4.5,1476477672 -525,47,3.5,1476476493 -525,48,3.0,1476480566 -525,50,4.5,1476476363 -525,62,3.5,1476480183 -527,2,4.0,1033173338 -527,30,1.0,1033173581 -527,34,5.0,1033173966 -527,48,3.0,1033173290 -528,1,2.5,1391736605 -529,1,3.0,855583216 -529,7,2.0,855583292 -529,32,5.0,855583215 -529,62,3.0,855583216 -529,65,1.0,855583470 -529,95,4.0,855583216 -530,11,4.0,843227087 -531,10,4.0,1032961647 -532,6,5.0,1025523798 -533,1,5.0,1424753740 -534,1,4.0,1459787997 -534,2,4.5,1459787996 -534,10,4.0,1459787996 -534,19,4.0,1459787996 -534,31,3.5,1459788707 -534,34,4.0,1459793302 -534,44,4.0,1459788735 -534,47,4.0,1459787998 -534,48,4.0,1459792580 -536,45,5.0,832840081 -539,65,4.5,1332474525 -539,88,3.5,1332474517 -540,47,4.5,1179109020 -541,1,3.0,835643027 -541,7,4.0,835644446 -541,10,4.0,835642985 -541,11,4.0,835643146 -541,15,3.0,835643931 -541,19,2.0,835643052 -541,22,4.0,835643333 -541,47,1.0,835643038 -541,50,5.0,835643052 -541,95,3.0,835643186 -542,47,5.0,1163386913 -544,1,3.0,850688537 -544,3,3.0,850688562 -544,14,3.0,850688581 -544,32,3.0,850688537 -544,40,5.0,850688776 -544,62,5.0,850688537 -544,68,5.0,850688776 -544,95,4.0,850688537 -545,44,2.5,1240358381 -546,70,5.0,973588518 -548,69,5.0,1488243429 -550,1,4.0,1488728441 -551,47,4.5,1505548073 -552,3,1.0,1111472953 -552,19,3.5,1111473056 -552,25,3.0,1112151250 -552,95,1.5,1111473067 -553,6,5.0,1219558120 -553,16,5.0,1219558107 -553,32,5.0,1219564918 -553,42,4.0,1219559052 -553,50,4.0,1219557525 -555,1,4.0,978746159 -555,3,5.0,978747454 -555,19,3.0,980123949 -555,21,4.0,978746440 -555,24,5.0,978841879 -555,29,4.0,978841345 -555,32,4.0,978841464 -555,39,4.0,978746326 -555,50,5.0,978819462 -555,65,3.0,980125946 -555,72,3.0,978746552 -555,75,3.0,980125866 -555,88,4.0,980126008 -557,10,4.5,1452797765 -558,94,3.0,1035415930 -559,1,5.0,865095758 -559,2,4.0,845476032 -559,6,5.0,865095857 -559,10,3.0,845475880 -559,15,3.0,845476569 -559,19,2.0,845475946 -559,32,3.0,845475965 -559,34,5.0,845475917 -559,36,4.0,845476089 -559,39,3.0,845475987 -559,48,3.0,845476159 -559,58,4.0,865095758 -559,66,3.0,865096234 -559,76,3.0,865096444 -559,95,4.0,865095801 -560,1,3.0,1469653413 -560,32,3.5,1469647882 -560,34,2.5,1469647264 -560,47,4.0,1469648029 -560,48,2.5,1469654312 -560,50,4.0,1469647239 -560,70,4.0,1469653546 -560,97,4.0,1469653182 -561,1,4.0,1491094681 -561,2,4.0,1491094318 -561,5,3.0,1491094488 -561,6,4.0,1491092289 -561,31,2.5,1491094481 -561,32,3.5,1491091981 -561,34,2.5,1491095062 -561,39,3.0,1491091334 -561,44,2.0,1491091620 -561,47,4.5,1491091954 -561,50,4.5,1491090860 -562,1,4.5,1368893997 -562,50,4.0,1368894758 -563,2,2.5,1447185161 -563,34,2.0,1441846213 -563,48,3.5,1440800284 -564,39,3.5,1478453734 -565,19,3.0,846533399 -565,21,3.0,846533399 -565,32,5.0,846533428 -565,34,4.0,846533367 -565,47,4.0,846533338 -565,50,5.0,846533399 -566,2,4.0,849005893 -566,7,4.0,849006845 -566,10,3.0,849005345 -566,11,5.0,849005826 -566,17,5.0,849006000 -566,21,4.0,849005643 -566,25,2.0,849006116 -566,32,4.0,849005720 -566,39,4.0,849005720 -566,50,5.0,849005642 -566,57,3.0,849006827 -567,1,3.5,1525286001 -567,34,2.5,1525288053 -567,50,1.0,1525282012 -568,50,5.0,1243576177 -569,10,4.0,849190709 -569,50,3.0,849190709 -570,1,4.0,1181476989 -570,2,3.5,1181477805 -570,10,3.5,1181477300 -570,11,3.5,1181477768 -570,25,4.0,1181477537 -570,32,4.0,1181477032 -570,34,3.5,1181477210 -570,39,3.0,1181477492 -570,47,3.5,1181477162 -570,95,2.5,1181477499 -571,12,1.0,966901337 -571,70,2.0,966900368 -572,1,4.0,945892484 -572,17,5.0,945888053 -572,21,5.0,945890765 -572,62,4.0,945890338 -573,1,5.0,1186722182 -573,2,4.5,1187044862 -573,6,4.5,1248842019 -573,10,4.5,1186722464 -573,47,5.0,1186589586 -573,50,5.0,1248841981 -574,47,5.0,834634527 -576,29,3.5,1358151482 -577,6,4.0,945967415 -577,39,3.0,945964946 -577,88,3.0,945968259 -579,1,4.0,958881146 -579,11,3.0,958879371 -579,17,5.0,977364909 -579,34,3.0,958880089 -579,48,4.0,958881238 -580,1,3.0,1167792349 -580,6,4.0,1167789917 -580,10,3.5,1167792602 -580,16,4.5,1167790725 -580,22,4.0,1167861702 -580,25,4.0,1167789966 -580,32,5.0,1167790855 -580,34,2.5,1167673463 -580,47,5.0,1167791127 -580,50,5.0,1167789884 -580,62,2.0,1167790047 -580,70,5.0,1167673477 -583,39,5.0,1481474136 -584,1,5.0,834987643 -584,10,5.0,834987172 -584,19,3.0,834987751 -584,22,4.0,834988132 -584,34,3.0,834987730 -584,39,1.0,834987810 -584,47,5.0,834987730 -584,48,5.0,834988006 -584,60,5.0,834988340 -585,16,5.0,1307417343 -585,88,4.0,1307416427 -586,2,4.0,1529901723 -587,1,5.0,953137847 -587,11,4.0,953138510 -587,21,4.0,953138653 -587,32,5.0,953141417 -587,50,5.0,953141366 -587,58,5.0,953141592 -588,3,3.0,839317471 -588,6,5.0,839316454 -588,10,3.0,839316215 -588,16,4.0,839316454 -588,20,2.0,839317230 -588,21,3.0,839316350 -588,22,3.0,839317568 -588,25,3.0,839316993 -588,36,2.0,839316709 -588,42,3.0,839316637 -588,47,3.0,839316278 -588,50,5.0,839316659 -589,25,5.0,856038816 -589,36,5.0,856038894 -590,1,4.0,1258420408 -590,2,2.5,1258420835 -590,3,3.0,1258416995 -590,5,2.0,1258421220 -590,6,3.5,1258420706 -590,10,3.5,1258420600 -590,11,3.0,1258419975 -590,17,3.5,1258418302 -590,19,2.0,1258420848 -590,32,3.0,1258420444 -590,36,3.5,1258420766 -590,47,3.0,1258420506 -590,50,4.5,1264910688 -591,24,2.0,970525102 -592,2,4.0,837350242 -592,6,3.0,837350390 -592,10,3.0,837349966 -592,15,4.0,837350747 -592,19,4.0,837350082 -592,21,3.0,837350081 -592,24,4.0,837350801 -592,39,4.0,837350111 -592,44,3.0,837350308 -592,93,3.0,837350801 -592,95,3.0,837350282 -593,34,4.0,1181008004 -593,41,4.0,1181007216 -593,50,4.5,1181007737 -594,2,4.0,1109037094 -594,3,4.0,1108798921 -594,11,5.0,1109036973 -594,17,4.0,1108972872 -594,28,4.5,1108973143 -594,39,4.5,1115885611 -594,46,4.5,1108973615 -594,48,4.5,1109038257 -594,70,3.5,1108951120 -595,50,5.0,938807286 -596,1,4.0,1535709666 -596,32,3.5,1535709749 -596,34,4.0,1535827362 -596,39,4.0,1535827547 -596,50,3.5,1535709301 -597,1,4.0,941557863 -597,6,3.0,940420695 -597,7,1.0,941558874 -597,10,3.0,941729264 -597,11,3.0,941558713 -597,17,3.0,940362409 -597,21,5.0,941559030 -597,34,4.0,940362281 -597,39,4.0,941558116 -597,42,3.0,941729264 -597,45,5.0,941559030 -597,47,4.0,940361541 -597,50,5.0,940362491 -597,52,4.0,941559030 -597,69,4.0,941558258 -597,70,2.0,941559139 -599,1,3.0,1498524204 -599,2,2.5,1498514085 -599,3,1.5,1498505071 -599,6,4.5,1498539623 -599,7,2.5,1498514161 -599,9,1.5,1498504960 -599,10,3.5,1498500281 -599,11,2.5,1498516445 -599,12,1.5,1519181787 -599,15,1.5,1519239842 -599,16,3.0,1498523389 -599,17,3.5,1498501103 -599,18,3.0,1498523048 -599,19,3.0,1498524930 -599,20,1.5,1498504813 -599,21,4.0,1498499235 -599,23,1.0,1498503332 -599,24,2.5,1498517444 -599,26,2.5,1498518655 -599,29,3.5,1498500987 -599,31,2.0,1498511120 -599,32,3.0,1498519822 -599,39,3.0,1498525783 -599,41,2.5,1498518847 -599,42,3.0,1498525483 -599,43,3.0,1519347048 -599,44,2.5,1498517161 -599,45,2.5,1498516640 -599,47,4.0,1498499364 -599,50,3.5,1498500777 -599,52,3.0,1498525392 -599,57,2.5,1519240604 -599,60,2.0,1519118310 -599,61,2.5,1519327817 -599,65,2.0,1498511235 -599,69,2.5,1498515243 -599,70,3.5,1498501183 -599,73,3.0,1519421396 -599,75,1.0,1519353713 -599,76,2.5,1498518457 -599,79,2.0,1519336237 -599,81,2.5,1498517265 -599,87,0.5,1519184941 -599,88,0.5,1498533540 -599,89,2.5,1498517086 -599,93,1.5,1498504070 -599,95,2.0,1498510588 -599,97,3.0,1519120150 -599,100,2.0,1498511085 -600,1,2.5,1237764347 -600,2,4.0,1237764627 -600,4,1.5,1237760055 -600,5,2.5,1237759452 -600,7,3.5,1237851387 -600,17,3.5,1237712509 -600,19,3.0,1237709125 -600,24,2.0,1237707977 -600,29,4.5,1237713604 -600,32,4.5,1237858629 -600,34,2.0,1237711536 -600,39,3.0,1237858693 -600,46,3.0,1237851925 -600,47,4.0,1237852430 -600,52,3.5,1237715563 -600,62,2.5,1237713038 -600,72,1.0,1237760885 -600,73,3.5,1237760041 -601,1,4.0,1521467801 -601,47,4.0,1521467863 -601,50,5.0,1441639169 -602,2,4.0,840875851 -602,6,3.0,840876055 -602,10,3.0,840875622 -602,11,3.0,840875825 -602,14,5.0,840875999 -602,16,3.0,840876085 -602,19,2.0,840875700 -602,21,4.0,840875720 -602,22,3.0,840876417 -602,25,4.0,840875998 -602,29,2.0,840876620 -602,32,3.0,840875779 -602,34,1.0,840875700 -602,36,3.0,840876228 -602,39,5.0,840875757 -602,45,5.0,840876055 -602,47,5.0,840875668 -602,50,5.0,840875720 -602,52,3.0,840876566 -602,95,3.0,840875901 -602,100,3.0,840876228 -603,1,4.0,963178147 -603,6,4.0,963177624 -603,16,4.0,963179585 -603,17,3.0,954482210 -603,21,5.0,963177624 -603,25,4.0,954482181 -603,28,5.0,953925191 -603,29,2.0,963177361 -603,30,4.0,963179273 -603,32,3.0,963179615 -603,34,4.0,963179273 -603,36,4.0,953925157 -603,39,5.0,954482276 -603,45,4.0,963179538 -603,52,1.0,963178236 -603,53,5.0,963180003 -603,57,2.0,963180025 -603,58,3.0,954482414 -603,62,1.0,963180075 -603,70,4.0,953925705 -603,77,4.0,954482106 -603,82,1.0,963180075 -603,85,5.0,953925191 -603,97,4.0,963179791 -603,99,5.0,954482070 -604,1,3.0,832079851 -604,2,5.0,832080293 -604,5,3.0,832080355 -604,6,3.0,832080355 -604,14,4.0,832081027 -604,17,4.0,832080092 -604,19,1.0,832080050 -604,22,3.0,832080546 -604,23,4.0,832081042 -604,25,3.0,832080316 -604,32,4.0,832079958 -604,34,4.0,832079958 -604,39,3.0,832079983 -604,60,4.0,832080939 -604,62,4.0,832080293 -604,76,4.0,832080615 -604,92,3.0,832080615 -604,95,4.0,832080071 -605,1,4.0,1277097561 -605,2,3.5,1277176522 -605,28,4.0,1277094943 -605,73,3.0,1277094964 -606,1,2.5,1349082950 -606,7,2.5,1171754710 -606,11,2.5,1174349629 -606,15,3.5,1171839063 -606,17,4.0,1171838026 -606,18,4.0,1171327151 -606,19,2.0,1171814553 -606,28,3.5,1173049970 -606,29,4.5,1179419005 -606,32,4.0,1173653921 -606,36,3.5,1171820699 -606,46,4.0,1171757334 -606,47,3.0,1171927423 -606,50,4.5,1171234887 -606,58,3.5,1181771144 -606,68,4.0,1171817003 -606,70,4.0,1171733439 -606,73,4.0,1171410274 -606,80,4.0,1171409272 -606,82,4.0,1175637619 -606,92,3.5,1171365342 -607,1,4.0,964744033 -607,11,3.0,964744602 -607,25,3.0,963078417 -607,34,3.0,963079238 -607,36,4.0,964744413 -607,86,4.0,963079311 -608,1,2.5,1117408267 -608,2,2.0,1117490786 -608,3,2.0,1117504413 -608,10,4.0,1117408486 -608,16,4.5,1189471181 -608,19,2.0,1117504385 -608,21,3.5,1147210949 -608,24,2.0,1117504646 -608,31,3.0,1117504582 -608,32,3.5,1117336682 -608,34,3.5,1117491662 -608,39,3.0,1117415401 -608,44,0.5,1117504562 -608,47,4.5,1117162426 -608,48,0.5,1117161754 -608,50,4.5,1117491010 -608,63,0.5,1117506926 -608,65,2.0,1117415653 -608,70,3.0,1117415406 -608,88,2.5,1117505159 -608,93,2.5,1117506858 -608,95,2.0,1117490752 -609,1,3.0,847221025 -609,10,4.0,847220937 -610,1,5.0,1479542900 -610,6,5.0,1493850345 -610,16,4.5,1479542171 -610,32,4.5,1479543331 -610,47,5.0,1479545853 -610,50,4.0,1493844757 -610,70,4.0,1495959282 -610,95,3.5,1479542004 diff --git a/tests/unused/unit/ml_handlers/data/vertex_anomaly_detection.csv b/tests/unused/unit/ml_handlers/data/vertex_anomaly_detection.csv deleted file mode 100755 index b87f4cb8e2d..00000000000 --- a/tests/unused/unit/ml_handlers/data/vertex_anomaly_detection.csv +++ /dev/null @@ -1,6 +0,0 @@ -carat,cut,color,clarity,depth,table,x,y,z -0.23,Ideal,E,VS2,61.5,55,3.95,3.98,2.43 -0.21,Premium,E,VS2,59.8,61,3.89,3.84,2.31 -0.23,Good,E,VS2,56.9,65,4.05,4.07,2.31 -0.29,Premium,I,VS2,62.4,58,4.2,4.23,2.63 -0.31,Good,J,VS2,63.3,58,4.34,4.35,2.75 \ No newline at end of file diff --git a/tests/unused/unit/ml_handlers/data/vertex_classification.csv b/tests/unused/unit/ml_handlers/data/vertex_classification.csv deleted file mode 100644 index 994bd3a5804..00000000000 --- a/tests/unused/unit/ml_handlers/data/vertex_classification.csv +++ /dev/null @@ -1,6 +0,0 @@ -Amount,Class,Time,V1,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V2,V20,V21,V22,V23,V24,V25,V26,V27,V28,V3,V4,V5,V6,V7,V8,V9 -149.62,'0',0,-1.3598071337,0.090794172,-0.5515995333,-0.6178008558,-0.9913898472,-0.3111693537,1.4681769721,-0.47040052530000004,0.20797124190000002,0.0257905802,0.40399296030000004,-0.0727811733,0.2514120982,-0.0183067779,0.2778375756,-0.1104739102,0.06692807490000001,0.12853935830000002,-0.1891148439,0.1335583767,-0.021053053500000002,2.5363467379999998,1.3781552243,-0.3383207699,0.4623877778,0.2395985541,0.0986979013,0.3637869696 -2.69,'0',0,1.1918571113,-0.16697441400000002,1.6127266611,1.0652353114,0.48909501590000004,-0.1437722964,0.6355580933,0.46391704100000003,-0.11480466310000001,-0.1833612701,-0.1457830413,0.2661507121,-0.0690831352,-0.225775248,-0.6386719528,0.1012880213,-0.3398464755,0.1671704044,0.1258945324,-0.008983099100000001,0.0147241692,0.16648011340000002,0.4481540785,0.0600176493,-0.0823608088,-0.0788029833,0.0851016549,-0.2554251281 -378.66,'0',1,-1.3583540616,0.2076428652,0.6245014594,0.0660836853,0.7172927314,-0.1659459228,2.345864949,-2.8900831944,1.1099693787,-0.1213593132,-2.2618570953,-1.3401630747,0.5249797252,0.2479981535,0.7716794019000001,0.9094122623,-0.6892809565,-0.3276418337,-0.13909657150000002,-0.055352794000000004,-0.0597518406,1.7732093426,0.379779593,-0.5031981333000001,1.8004993808,0.7914609565,0.2476757866,-1.5146543226 -123.5,'0',1,-0.9662717116,-0.0549519225,-0.2264872638,0.1782282259,0.50775687,-0.2879237455,-0.6314181177,-1.0596472454,-0.6840927863,1.9657750035000001,-1.2326219701,-0.1852260081,-0.2080377812,-0.108300452,0.0052735968000000005,-0.1903205187,-1.1755753319,0.6473760346,-0.2219288445,0.0627228487,0.0614576285,1.7929933396000002,-0.863291275,-0.010308879600000001,1.2472031675,0.2376089398,0.3774358747,-1.3870240627000001 -69.99,'0',2,-1.1582330935,0.753074432,-0.8228428779,0.5381955501,1.3458515932,-1.1196698347,0.17512113,-0.4514491828,-0.2370332394,-0.038194787,0.8034869250000001,0.8777367548,0.4085423604,-0.0094306971,0.7982784946,-0.1374580796,0.1412669838,-0.20600958760000002,0.5022922242,0.2194222295,0.2151531475,1.5487178465,0.40303393400000004,-0.4071933773,0.0959214625,0.5929407454,-0.2705326772,0.8177393082000001 diff --git a/tests/unused/unit/ml_handlers/data/vertex_regression.csv b/tests/unused/unit/ml_handlers/data/vertex_regression.csv deleted file mode 100644 index dba96576054..00000000000 --- a/tests/unused/unit/ml_handlers/data/vertex_regression.csv +++ /dev/null @@ -1,5 +0,0 @@ -date,quarter,department,day,team,targeted_productivity,smv,wip,over_time,incentive,idle_time,idle_men,no_of_style_change,no_of_workers,actual_productivity -1/1/2015,Quarter1,sweing,Thursday,8,0.8,26.16,1108,7080,98,0,0,0,59,0.940725424 -1/1/2015,Quarter1,finishing ,Thursday,1,0.75,3.94,,960,0,0,0,0,8,0.8865 -1/1/2015,Quarter1,sweing,Thursday,11,0.8,11.41,968,3660,50,0,0,0,30.5,0.800570492 -1/1/2015,Quarter1,sweing,Thursday,12,0.8,11.41,968,3660,50,0,0,0,30.5,0.800570492 diff --git a/tests/unused/unit/ml_handlers/test_dspy.py b/tests/unused/unit/ml_handlers/test_dspy.py deleted file mode 100644 index 10d5ad4ebe3..00000000000 --- a/tests/unused/unit/ml_handlers/test_dspy.py +++ /dev/null @@ -1,100 +0,0 @@ -import os - - -import ollama -import pytest -from ..executor_test_base import BaseExecutorTest - - -OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") - - -def ollama_model_exists(model_name: str) -> bool: - - try: - ollama.show(model_name) - return True - except Exception: - return False - - -class TestDSPy(BaseExecutorTest): - - """Test Class for DSPy Integration Testing""" - @pytest.fixture(autouse=True, scope="function") - def setup_method(self): - """Setup test environment, creating a project""" - super().setup_method() - self.run_sql("create database proj") - - @pytest.mark.skipif(OPENAI_API_KEY is None, reason='Missing OpenAI API key (OPENAI_API_KEY env variable)') - def test_default_provider(self): - - self.run_sql( - f""" - CREATE ML_ENGINE dspy_engine - FROM dspy - USING - openai_api_key = '{OPENAI_API_KEY}'; - """ - ) - - self.run_sql( - """ - create model proj.test_conversational_model - predict answer - using - engine='dspy_engine', - provider = 'openai', - model_name = 'gpt-4', - mode = 'conversational', - user_column = 'question', - assistant_column = 'answer', - prompt_template='Answer the user in a useful way'; - """ - ) - self.wait_predictor("proj", "test_conversational_model") - result_df = self.run_sql( - """ - SELECT question, answer - FROM proj.test_conversational_model - WHERE question='What is the capital of Sweden?;' - """ - ) - assert "stockholm" in result_df['answer'].iloc[0].lower() - - @pytest.mark.skipif(OPENAI_API_KEY is None, reason='Missing OpenAI API key (OPENAI_API_KEY env variable)') - def test_default_provider2(self): - - self.run_sql( - f""" - CREATE ML_ENGINE dspy_engine - FROM dspy - USING - openai_api_key = '{OPENAI_API_KEY}'; - """ - ) - - self.run_sql( - """ - create model proj.test_conversational_model - predict answer - using - engine='dspy_engine', - provider = 'openai', - model_name = 'gpt-3.5-turbo', - mode = 'conversational', - user_column = 'question', - assistant_column = 'answer', - prompt_template='Answer the user in a useful way'; - """ - ) - self.wait_predictor("proj", "test_conversational_model") - result_df = self.run_sql( - """ - SELECT question, answer - FROM proj.test_conversational_model - WHERE question='What is 3 + 4?;' - """ - ) - assert "7" in result_df['answer'].iloc[0].lower() diff --git a/tests/unused/unit/ml_handlers/test_google_gemini.py b/tests/unused/unit/ml_handlers/test_google_gemini.py deleted file mode 100644 index 8e6726f2343..00000000000 --- a/tests/unused/unit/ml_handlers/test_google_gemini.py +++ /dev/null @@ -1,105 +0,0 @@ -import os -import pytest -import pandas as pd -from unittest.mock import patch - -from .base_ml_test import BaseMLAPITest - -GEMINI_API_KEY = os.environ.get('GOOGLE_GENAI_API_KEY') - - -@pytest.mark.skipif(GEMINI_API_KEY is None, reason='Missing API key!') -class TestGeminiHandler(BaseMLAPITest): - """Test Class for Google Gemini (Bard) API handler""" - - def setup_method(self): - """Setup test environment, creating a project""" - super().setup_method() - self.run_sql("create database proj") - - def test_invalid_model_parameter(self): - """Test for invalid Gemini model parameter""" - self.run_sql( - f""" - CREATE MODEL proj.test_google_invalid_model - PREDICT answer - USING - engine='google_gemini', - column='question', - model='non-existing-gemini-model', - api_key='{GEMINI_API_KEY}'; - """ - ) - with pytest.raises(Exception): - self.wait_predictor("proj", "test_google_invalid_model") - - @pytest.mark.skip(reason="This test is failing as no error is being thrown") - def test_unknown_model_argument(self): - """Test for unknown argument when creating Gemini model""" - self.run_sql( - f""" - CREATE MODEL proj.test_google_unknown_arg - PREDICT answer - USING - engine='google', - column='question', - api_key='{GEMINI_API_KEY}', - evidently_wrong_argument='wrong value'; - """ - ) - with pytest.raises(Exception): - self.wait_predictor("proj", "test_google_unknown_arg") - - def test_single_qa(self): - """Test for single question/answer pair""" - self.run_sql( - f""" - CREATE MODEL proj.test_google_single_qa - PREDICT answer - USING - engine='google_gemini', - column='question', - api_key='{GEMINI_API_KEY}'; - """ - ) - self.wait_predictor("proj", "test_google_single_qa") - - result_df = self.run_sql( - """ - SELECT answer - FROM proj.test_google_single_qa - WHERE question = 'What is the capital of Sweden?'; - """ - ) - assert "stockholm" in result_df["answer"].iloc[0].lower() - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_bulk_qa(self, mock_handler): - """Test for bulk question/answer pairs""" - df = pd.DataFrame.from_dict({"question": [ - "What is the capital of Sweden?", - "What is the second planet of the solar system?" - ]}) - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - self.run_sql( - f""" - CREATE MODEL proj.test_google_bulk_qa - PREDICT answer - USING - engine='google_gemini', - column='question', - api_key='{GEMINI_API_KEY}'; - """ - ) - self.wait_predictor("proj", "test_google_bulk_qa") - - result_df = self.run_sql( - """ - SELECT p.answer - FROM pg.df as t - JOIN proj.test_google_bulk_qa as p; - """ - ) - assert "stockholm" in result_df["answer"].iloc[0].lower() - assert "venus" in result_df["answer"].iloc[1].lower() diff --git a/tests/unused/unit/ml_handlers/test_langchain.py b/tests/unused/unit/ml_handlers/test_langchain.py deleted file mode 100644 index e602a1b1bc0..00000000000 --- a/tests/unused/unit/ml_handlers/test_langchain.py +++ /dev/null @@ -1,162 +0,0 @@ -import os - -import ollama -import pytest - -from ..executor_test_base import BaseExecutorTest - -ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY") -ANYSCALE_API_KEY = os.environ.get("ANYSCALE_API_KEY") -OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") -GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY") - - -def ollama_model_exists(model_name: str) -> bool: - try: - ollama.show(model_name) - return True - except Exception: - return False - - -class TestLangchain(BaseExecutorTest): - """Test Class for Langchain Integration Testing""" - @pytest.fixture(autouse=True, scope="function") - def setup_method(self): - """Setup test environment, creating a project""" - super().setup_method() - self.run_sql("create database proj") - - @pytest.mark.skipif(OPENAI_API_KEY is None, reason='Missing OpenAI API key (OPENAI_API_KEY env variable)') - def test_default_provider(self): - self.run_sql( - f""" - create model proj.test_conversational_model - predict answer - using - engine='langchain', - prompt_template='Answer the user in a useful way: {{{{question}}}}', - openai_api_key='{OPENAI_API_KEY}'; - """ - ) - self.wait_predictor("proj", "test_conversational_model") - - result_df = self.run_sql( - """ - SELECT answer - FROM proj.test_conversational_model - WHERE question='What is the capital of Sweden?' - """ - ) - assert "stockholm" in result_df['answer'].iloc[0].lower() - - @pytest.mark.skipif(ANTHROPIC_API_KEY is None, reason='Missing Anthropic API key (ANTHROPIC_API_KEY env variable)') - def test_anthropic_provider(self): - self.run_sql( - f""" - create model proj.test_anthropic_langchain_model - predict answer - using - engine='langchain', - model_name='claude-2.1', - prompt_template='Answer the user in a useful way: {{{{question}}}}', - anthropic_api_key='{ANTHROPIC_API_KEY}'; - """ - ) - self.wait_predictor("proj", "test_anthropic_langchain_model") - - result_df = self.run_sql( - """ - SELECT answer - FROM proj.test_anthropic_langchain_model - WHERE question='What is the capital of Sweden?' - """ - ) - assert "stockholm" in result_df['answer'].iloc[0].lower() - - @pytest.mark.skipif(not ollama_model_exists('mistral'), reason='Make sure the mistral model is available locally by running `ollama pull mistral`') - def test_ollama_provider(self): - self.run_sql( - """ - create model proj.test_ollama_model - predict answer - using - engine='langchain', - model_name='mistral', - prompt_template='Answer the user in a useful way: {{question}}' - """ - ) - self.wait_predictor("proj", "test_ollama_model") - - result_df = self.run_sql( - """ - SELECT answer - FROM proj.test_ollama_model - WHERE question='What is the capital of British Columbia, Canada?' - """ - ) - assert "victoria" in result_df['answer'].iloc[0].lower() - - - @pytest.mark.skipif(GOOGLE_API_KEY is None, reason='Missing Google API key (GOOGLE_API_KEY env variable)') - def test_google_provider(self): - self.run_sql( - f""" - create model proj.test_google_langchain_model - predict answer - using - engine='langchain', - provider='google', - model_name='gemini-1.5-pro', - prompt_template='Answer the user in a useful way: {{{{question}}}}', - google_api_key='{GOOGLE_API_KEY}'; - """ - ) - self.wait_predictor("proj", "test_google_langchain_model") - - result_df = self.run_sql( - """ - SELECT answer - FROM proj.test_google_langchain_model - WHERE question='What is the capital of Sweden?' - """ - ) - assert "stockholm" in result_df['answer'].iloc[0].lower() - - def test_describe(self): - self.run_sql( - """ - create model proj.test_describe_model - predict answer - using - engine='langchain', - prompt_template='Answer the user in a useful way: {{question}}'; - """ - ) - self.wait_predictor("proj", "test_describe_model") - result_df = self.run_sql('DESCRIBE proj.test_describe_model') - assert not result_df.empty - - @pytest.mark.skipif(OPENAI_API_KEY is None, reason='Missing OpenAI API key (OPENAI_API_KEY env variable)') - def test_prompt_template_args(self): - self.run_sql( - f""" - create model proj.test_prompt_template_model - predict answer - using - engine='langchain', - prompt_template='Your name is {{{{name}}}}. Answer the user in a useful way: {{{{question}}}}', - openai_api_key='{OPENAI_API_KEY}'; - """ - ) - self.wait_predictor("proj", "test_prompt_template_model") - - agent_name = 'professor farnsworth' - result_df = self.run_sql( - f""" - SELECT answer - FROM proj.test_prompt_template_model - WHERE question='What is your name?' AND name='{agent_name}' - """ - ) - assert agent_name in result_df['answer'].iloc[0].lower() diff --git a/tests/unused/unit/ml_handlers/test_langchain_embedding.py b/tests/unused/unit/ml_handlers/test_langchain_embedding.py deleted file mode 100644 index 65029bb2c49..00000000000 --- a/tests/unused/unit/ml_handlers/test_langchain_embedding.py +++ /dev/null @@ -1,362 +0,0 @@ -import os -import time -from unittest.mock import patch - -import pandas as pd -import pytest -from mindsdb_sql_parser import parse_sql - -from ..executor_test_base import BaseExecutorTest - - -class TestLangchainEmbedding(BaseExecutorTest): - def wait_predictor(self, project, name): - # wait - done = False - for _ in range(200): - ret = self.run_sql(f"select * from {project}.models where name='{name}'") - if not ret.empty: - if ret["STATUS"][0] == "complete": - done = True - break - elif ret["STATUS"][0] == "error": - break - time.sleep(0.5) - if not done: - raise RuntimeError("predictor wasn't created") - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_dummy_embedding(self, mock_handler): - self.run_sql("create database proj") - # create the model - self.run_sql( - """ - CREATE MODEL proj.test_dummy_embedding - PREDICT embeddings_output_column - USING - engine='langchain_embedding', - class = 'FakeEmbeddings', - size = 512, - input_columns = ['content'] - """ - ) - - self.wait_predictor("proj", "test_dummy_embedding") - - # predictions - # one line - ret = self.run_sql( - """ - SELECT * FROM proj.test_dummy_embedding - WHERE content='hello' - """ - ) - assert "content" in ret.columns - assert "embeddings_output_column" in ret.columns - # the embeddings should be a list of 512 floats - assert len(ret["embeddings_output_column"][0]) == 512 - - # multiple lines - # insert data - df = pd.DataFrame( - [ - ["hello"], - ["world"], - ["foo"], - ["bar"], - ], - columns=["content"], - ) - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - # query - ret = self.run_sql( - """ - SELECT * FROM proj.test_dummy_embedding - JOIN pg.df - """ - ) - - assert "content" in ret.columns - assert "embeddings_output_column" in ret.columns - assert ret.shape[0] == 4 - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_embed_multiple_columns(self, mock_handler): - self.run_sql("create database proj") - # create the model - # with multiple input columns - self.run_sql( - """ - CREATE MODEL proj.test_dummy_embedding_multiple_columns - PREDICT embeddings - USING - engine='langchain_embedding', - class = 'fake', -- a more user friendly name - size = 512, - input_columns = ['content1', 'content2'] - """ - ) - - self.wait_predictor("proj", "test_dummy_embedding_multiple_columns") - - # predictions - # one line - ret = self.run_sql( - """ - SELECT * FROM proj.test_dummy_embedding_multiple_columns - WHERE content1='hello' - AND content2='world' - """ - ) - - assert "content1" in ret.columns - assert "content2" in ret.columns - assert "embeddings" in ret.columns - - df = pd.DataFrame( - { - "id": [1, 2, 3, 4], - "content1": ["hello", "world", "foo", "bar"], - "content2": ["world", "hello", "bar", "foo"], - } - ) - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - # query - ret = self.run_sql( - """ - SELECT * FROM proj.test_dummy_embedding_multiple_columns - JOIN pg.df - """ - ) - - assert "content1" in ret.columns - assert "content2" in ret.columns - assert "embeddings" in ret.columns - assert ret.shape[0] == 4 - - # if the input missing columns, it should throw an error - with pytest.raises(Exception): - self.run_sql( - """ - SELECT * FROM proj.test_dummy_embedding_multiple_columns - WHERE content1='hello' - """ - ) - - # if the input missing columns, it should throw an error - with pytest.raises(Exception): - df2 = pd.DataFrame( - { - "content1": ["hello", "world", "foo", "bar"], - } - ) - self.set_handler(mock_handler, name="pg", tables={"df": df2}) - self.run_sql( - """ - SELECT * FROM proj.test_dummy_embedding_multiple_columns - JOIN pg.df2 - """ - ) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_no_input_columns(self, mock_handler): - self.run_sql("create database proj") - - df = pd.DataFrame( - { - "id": [1, 2, 3, 4], - "content1": ["hello", "world", "foo", "bar"], - "content2": ["world", "hello", "bar", "foo"], - } - ) - self.save_file("df", df) - - # create the model with no input columns specified should use - # all columns when embedding the documents - self.run_sql( - """ - CREATE MODEL proj.test_dummy_no_input_columns - PREDICT embeddings - USING - engine='langchain_embedding', - class = 'fake', -- a more user friendly name - size = 512 - """ - ) - - self.wait_predictor("proj", "test_dummy_no_input_columns") - - # predictions - # one line - ret = self.run_sql( - """ - SELECT * FROM proj.test_dummy_no_input_columns - WHERE content1='hello' - AND content2='world' - AND id = 1 - """ - ) - - assert "content1" in ret.columns - assert "content2" in ret.columns - assert "id" in ret.columns or "`id`" in ret.columns - assert "embeddings" in ret.columns - - # multiple lines - ret = self.run_sql( - """ - SELECT * FROM proj.test_dummy_no_input_columns - JOIN files.df - """ - ) - - assert "content1" in ret.columns - assert "content2" in ret.columns - assert ret.shape[0] == 4 - - # create the model with no input columns specified, - # but with a given from dataframe should use all the columns - # from the dataframe when embedding the documents - ret = self.run_sql( - """ - CREATE MODEL proj.test_dummy_no_input_columns_from_df - FROM files ( - SELECT *, NULL as embeddings FROM df -- this requires an empty column called embeddings - ) - PREDICT embeddings - USING - engine='langchain_embedding', - class = 'fake', -- a more user friendly name - size = 512 - """ - ) - - self.wait_predictor("proj", "test_dummy_no_input_columns_from_df") - - # input columns == ['id', 'content1', 'content2'] - # predictions - # one line - ret = self.run_sql( - """ - SELECT * FROM proj.test_dummy_no_input_columns_from_df - WHERE content1='hello' - AND content2='world' - AND id = 1 -- looks like 'id' will be quoted - """ - ) - - # missing columns id - with pytest.raises(Exception): - self.run_sql( - """ - SELECT * FROM proj.test_dummy_no_input_columns_from_df - WHERE content1='hello' - AND content2='world' - """ - ) - - # skip if there is no openai key defined in the env - @pytest.mark.skipif( - "OPENAI_API_KEY" not in os.environ, - reason="OPENAI_API_KEY env variable is not defined", - ) - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_openai_embedding(self, mock_handler): - self.run_sql("create database proj") - # create the model - self.run_sql( - """ - CREATE MODEL proj.test_openai_embedding - PREDICT embeddings - USING - engine='langchain_embedding', - class = 'openai' - """ - ) - - self.wait_predictor("proj", "test_openai_embedding") - - # single line prediction - ret = self.run_sql( - """ - SELECT * FROM proj.test_openai_embedding - WHERE content='hello' - """ - ) - - assert "content" in ret.columns - assert "embeddings" in ret.columns - - # multiple lines - # insert data - df = pd.DataFrame( - [ - ["hello"], - ["world"], - ["foo"], - ["bar"], - ], - columns=["content"], - ) - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - # query - ret = self.run_sql( - """ - SELECT * FROM proj.test_openai_embedding - JOIN pg.df - """ - ) - - assert "content" in ret.columns - assert "embeddings" in ret.columns - assert ret.shape[0] == 4 - - def test_huggingface_embedding(self): - ... - - def test_missing_class_name(self): - self.run_sql("create database proj") - with pytest.raises(Exception): - self.run_sql( - """ - CREATE MODEL proj.test_missing_class_name - USING - engine='langchain_embedding', - size = 512 - """ - ) - - def test_wrong_class_name(self): - self.run_sql("create database proj") - with pytest.raises(Exception): - self.run_sql( - """ - CREATE MODEL proj.test_wrong_class_name - USING - engine='langchain_embedding', - class = 'SomethingDoesNotExist', - size = 512 - """ - ) - - def test_wrong_arguments(self): - self.run_sql("create database proj") - with pytest.raises(Exception): - self.run_sql( - """ - CREATE MODEL proj.test_wrong_arguments - USING - engine='langchain_embedding', - class = 'FakeEmbeddings', - wrong_argument_name = 512 - """ - ) diff --git a/tests/unused/unit/ml_handlers/test_rag.py b/tests/unused/unit/ml_handlers/test_rag.py deleted file mode 100644 index f5d7777276a..00000000000 --- a/tests/unused/unit/ml_handlers/test_rag.py +++ /dev/null @@ -1,324 +0,0 @@ -import os -import time - -import pandas as pd -import pytest -from mindsdb_sql_parser import parse_sql - -from tests.unit.executor_test_base import BaseExecutorTest - -OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") -os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY - -WRITER_API_KEY = os.environ.get("WRITER_API_KEY") -os.environ["WRITER_API_KEY"] = WRITER_API_KEY - -WRITER_ORG_ID = os.environ.get("WRITER_ORG_ID") -os.environ["WRITER_ORG_ID"] = WRITER_ORG_ID - - -class TestRAG(BaseExecutorTest): - def wait_predictor(self, project, name): - # wait - done = False - for attempt in range(200): - ret = self.run_sql(f"select * from {project}.models where name='{name}'") - if not ret.empty: - if ret["STATUS"][0] == "complete": - done = True - break - elif ret["STATUS"][0] == "error": - break - time.sleep(0.5) - if not done: - raise RuntimeError("predictor wasn't created") - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def test_missing_required_keys(self): - # create project - self.run_sql("create database proj") - - self.run_sql( - """ - CREATE MODEL proj.test_rag_handler_missing_required_args - PREDICT answer - USING - engine="rag" - """ - ) - with pytest.raises(Exception): - self.wait_predictor("proj", "test_rag_handler_missing_required_args") - - def test_invalid_model_id_parameter(self): - # create project - - self.run_sql("create database proj") - self.run_sql( - f""" - create model proj.test_rag_openai_nonexistant_model - predict answer - using - engine='rag', - llm_type='openai', - model_id='this-model-does-not-exist', - openai_api_key='{OPENAI_API_KEY}'; - """ - ) - with pytest.raises(Exception): - self.wait_predictor("proj", "test_rag_openai_nonexistant_model") - - self.run_sql( - f""" - create model proj.test_rag_writer_nonexistant_model - predict answer - using - engine='rag', - llm_type='writer', - model_id='this-model-does-not-exist', - writer_api_key='{WRITER_API_KEY}', - writer_org_id='{WRITER_ORG_ID}'; - """ - ) - - with pytest.raises(Exception): - self.wait_predictor("proj", "test_rag_writer_nonexistant_model") - - def test_unsupported_llm_type(self): - self.run_sql("create database proj") - self.run_sql( - """ - create model proj.test_unsupported_llm - predict answer - using - engine='rag', - llm_type='unsupported_llm' - """ - ) - with pytest.raises(Exception): - self.wait_predictor("proj", "test_unsupported_llm") - - def test_unsupported_vector_store(self): - self.run_sql("create database proj") - self.run_sql( - f""" - create model proj.test_unsupported_vector_store - predict answer - using - engine='rag', - llm_type='openai', - openai_api_key='{OPENAI_API_KEY}', - vector_store_name='unsupported_vector_store' - """ - ) - - with pytest.raises(Exception): - self.wait_predictor("proj", "test_unsupported_vector_store") - - def test_unknown_arguments(self): - self.run_sql("create database proj") - self.run_sql( - f""" - create model proj.test_openai_unknown_arguments - predict answer - using - engine='rag', - llm_type='openai', - openai_api_key='{OPENAI_API_KEY}', - evidently_wrong_argument='wrong value' --- this is a wrong argument name - """ - ) - with pytest.raises(Exception): - self.wait_predictor("proj", "test_openai_unknown_arguments") - - def test_qa(self): - # create project - self.run_sql("create database proj") - df = pd.DataFrame.from_dict( - { - "context": [ - "For adults and children age 5 and older, OTC decongestants, " - "antihistamines and pain relievers might offer some symptom relief. " - "However, they won't prevent a cold or shorten its duration, and most have some side effects.", - "Paracetamol, also known as acetaminophen and APAP, " - "is a medication used to treat pain and fever as well as colds and flu. " - "It is typically used for mild to moderate pain relief. " - "Evidence is mixed for its use to relieve fever in children. " - "It is often sold in combination with other medications, such as in many cold medications.", - "lemsip is a brand of over-the-counter pharmaceuticals used to treat cold and flu symptoms. " - "The brand is currently owned by Reckitt Benckiser. " - "The original Lemsip product contained paracetamol as its active ingredient. " - "However, other products marketed under the Lemsip " - "brand contain other active ingredients such as ibuprofen," - "pseudoephedrine, phenylephrine, and guaifenesin." - ], - "url": [ - "https://docs.mindsdb.com/sql/tutorials/recommenders/", - "https://docs.mindsdb.com/sql/tutorials/llm-chatbot-ui/", - "https://docs.mindsdb.com/sql/tutorials/house-sales-forecasting/", - ], - } - ) - self.save_file("df", df) - - # test openai qa with chromadb - - self.run_sql( - f""" - create model proj.test_rag_openai_qa - from files (select * from df) - predict answer - using - engine='rag', - llm_type='openai', - openai_api_key='{OPENAI_API_KEY}', - vector_store_folder_name='rag_openai_qa_test', - input_column='question' - """ - ) - self.wait_predictor("proj", "test_rag_openai_qa") - - result_df = self.run_sql( - """ - SELECT p.answer - FROM proj.test_rag_openai_qa as p - WHERE question='What is the best treatment for a cold?' - """ - ) - assert result_df["answer"].iloc[0] - - # test batching with openai qa chroma - - embeddings_batch_size = 1 - - self.run_sql( - f""" - create model proj.test_rag_openai_qa_batch - from files (select * from df) - predict answer - using - engine='rag', - llm_type='openai', - openai_api_key='{OPENAI_API_KEY}', - vector_store_folder_name='rag_openai_qa_test_batch', - embeddings_batch_size={embeddings_batch_size}, - input_column='question' - """ - ) - - self.wait_predictor("proj", "test_rag_openai_qa_batch") - - result_df = self.run_sql( - """ - SELECT p.answer - FROM proj.test_rag_openai_qa_batch as p - WHERE question='What is the best treatment for a cold?' - """ - ) - assert result_df["answer"].iloc[0] - - # test writer qa with FAISS - - self.run_sql( - f""" - create model proj.test_rag_writer_qa - from files (select * from df) - predict answer - using - engine='rag', - llm_type='writer', - vector_store_name='faiss', - writer_api_key='{WRITER_API_KEY}', - writer_org_id='{WRITER_ORG_ID}', - vector_store_folder_name='rag_writer_qa_test', - input_column='question' - """ - ) - self.wait_predictor("proj", "test_rag_writer_qa") - - result_df = self.run_sql( - """ - SELECT p.answer - FROM proj.test_rag_writer_qa as p - WHERE question='What is the best treatment for a cold?' - """ - ) - assert result_df["answer"].iloc[0] - - # test single url parsing - self.run_sql( - f""" - create model proj.test_rag_writer_qa_single_url - predict answer - using - engine='rag', - llm_type='writer', - url='https://docs.mindsdb.com/sql/tutorials/recommenders/', - vector_store_name='faiss', - writer_api_key='{WRITER_API_KEY}', - writer_org_id='{WRITER_ORG_ID}', - vector_store_folder_name='rag_writer_qa_test_single_url', - input_column='question' - """ - ) - self.wait_predictor("proj", "test_rag_writer_qa_single_url") - - result_df = self.run_sql( - """ - SELECT p.answer - FROM proj.test_rag_writer_qa as p - WHERE question='What recommender models does mindsdb support?' - """ - ) - assert result_df["answer"].iloc[0] - - # test multi url parsing - self.run_sql( - f""" - create model proj.test_rag_writer_qa_multi_url - from files (select * from df) - predict answer - using - engine='rag', - llm_type='writer', - vector_store_name='faiss', - url_column_name='url', - writer_api_key='{WRITER_API_KEY}', - writer_org_id='{WRITER_ORG_ID}', - vector_store_folder_name='rag_writer_qa_test_multi_url', - input_column='question' - """ - ) - - self.wait_predictor("proj", "test_rag_writer_qa_multi_url") - - result_df = self.run_sql( - """ - SELECT p.answer - FROM proj.test_rag_writer_qa_multi_url as p - WHERE question='which chat app currently works with mindsdb chatbot?' - """ - ) - - assert result_df["answer"].iloc[0] - - def test_invalid_prompt_template(self): - # create project - self.run_sql("create database proj") - self.run_sql( - f""" - create model proj.test_invalid_prompt_template_format - predict completion - using - engine='rag', - llm_type="openai", - prompt_template="not valid format", - openai_api_key='{OPENAI_API_KEY}'; - """ - ) - with pytest.raises(Exception): - self.wait_predictor("proj", "test_invalid_prompt_template_format") diff --git a/tests/unused/unit/ml_handlers/test_time_series_utils.py b/tests/unused/unit/ml_handlers/test_time_series_utils.py deleted file mode 100644 index 0204339c9d8..00000000000 --- a/tests/unused/unit/ml_handlers/test_time_series_utils.py +++ /dev/null @@ -1,144 +0,0 @@ -import pandas as pd -from statsforecast.utils import AirPassengersDF -from mindsdb.integrations.utilities.time_series_utils import ( - transform_to_nixtla_df, - get_results_from_nixtla_df, - infer_frequency, - get_best_model_from_results_df, - spec_hierarchy_from_list, - get_hierarchy_from_df, - reconcile_forecasts, -) - - -def create_mock_df(freq="Q-DEC"): - df2 = pd.DataFrame(pd.date_range(start="1/1/2010", periods=31, freq=freq), columns=["time_col"]) - df3 = df2.copy() - - df2["target_col"] = range(1, 32) - df2["group_col"] = "a" - df2["group_col_2"] = "a2" - df2["group_col_3"] = "a3" - - df3["target_col"] = range(11, 42) - df3["group_col"] = "b" - df3["group_col_2"] = "b2" - df3["group_col_3"] = "b3" - - return pd.concat([df2, df3]).reset_index(drop=True) - - -def test_infer_frequency(): - df = create_mock_df() - assert infer_frequency(df, "time_col") == "Q-DEC" - - df = create_mock_df(freq="M") - assert infer_frequency(df, "time_col") == "M" - - # Should still work if we pass string dates - df["time_col"] = df["time_col"].astype(str) - assert infer_frequency(df, "time_col") == "M" - - # Should still work if we pass unordered dates - unordered_df = pd.concat([df.iloc[:3, :], df.iloc[3:, :]]) - assert infer_frequency(unordered_df, "time_col") == "M" - - -def test_statsforecast_df_transformations(): - df = create_mock_df() - model_name = "ARIMA" - settings_dict = { - "order_by": "time_col", - "group_by": ["group_col"], - "target": "target_col", - "model_name": model_name, - } - - # Test transform for single groupby - nixtla_df = transform_to_nixtla_df(df, settings_dict) - assert [nixtla_df["unique_id"].iloc[i] == df["group_col"].iloc[i] for i in range(len(nixtla_df))] - assert [nixtla_df["y"].iloc[i] == df["target_col"].iloc[i] for i in range(len(nixtla_df))] - assert [nixtla_df["ds"].iloc[i] == df["time_col"].iloc[i] for i in range(len(nixtla_df))] - # Test reversing the transformation - nixtla_results_df = nixtla_df.rename({"y": model_name}, axis=1).set_index("unique_id") - mindsdb_results_df = get_results_from_nixtla_df(nixtla_results_df, settings_dict) - pd.testing.assert_frame_equal(mindsdb_results_df, df[["time_col", "target_col", "group_col"]]) - - # Test for multiple groups - settings_dict["group_by"] = ["group_col", "group_col_2", "group_col_3"] - nixtla_df = transform_to_nixtla_df(df, settings_dict) - assert nixtla_df["unique_id"][0] == "a/a2/a3" - # Test reversing the transformation - nixtla_results_df = nixtla_df.rename({"y": model_name}, axis=1).set_index("unique_id") - mindsdb_results_df = get_results_from_nixtla_df(nixtla_results_df, settings_dict) - pd.testing.assert_frame_equal(mindsdb_results_df, df) - - # Test with exogenous vars - settings_dict["group_by"] = ["group_col"] - settings_dict["exogenous_vars"] = ["group_col_2", "group_col_3"] - nixtla_df = transform_to_nixtla_df(df, settings_dict, exog_vars=["group_col_2", "group_col_3"]) - assert nixtla_df.columns.tolist() == ["unique_id", "ds", "y", "group_col_2", "group_col_3"] - - -def test_get_best_model_from_results_df(): - nixtla_df = AirPassengersDF.copy() - nixtla_df["AutoARIMA"] = nixtla_df["y"] + 1 - nixtla_df["AutoCES"] = nixtla_df["y"] - nixtla_df["AutoBadModel"] = nixtla_df["y"] - 2 - - assert get_best_model_from_results_df(nixtla_df) == "AutoCES" - - -def test_spec_hierarchy_from_list(): - hierachy_cols = ["col1", "col2"] - hierarchy_spec = spec_hierarchy_from_list(hierachy_cols) - - assert len(hierarchy_spec) == 3 - assert hierarchy_spec[0] == ["Total"] - assert hierarchy_spec[1] == ["Total", "col1"] - assert hierarchy_spec[2] == ["Total", "col1", "col2"] - - -def test_get_hierarchy_from_df(): - df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4], "target": [5, 6]}) - df["time_col"] = pd.date_range(start="1/1/2010", freq="M", periods=2) - model_args = { - "order_by": "time_col", - "group_by": ["col1", "col2"], - "target": "target", - "hierarchy": ["col1", "col2"], - } - - training_df, hier_df, hier_dict = get_hierarchy_from_df(df, model_args) - assert training_df.columns.tolist() == ["ds", "y"] - assert training_df.index.name == "unique_id" - # checks shape of hierarchy matrix, which is a [0, 1] matrix - assert hier_df.columns.tolist() == ["total/1/3", "total/2/4"] - assert hier_df.index.tolist() == ["total", "total/1", "total/2", "total/1/3", "total/2/4"] - - assert hier_dict["Total"].tolist() == ["total"] - assert hier_dict["Total/col1"].tolist() == ["total/1", "total/2"] - assert hier_dict["Total/col1/col2"] == ["total/1/3", "total/2/4"] - - -def test_reconcile_forecasts(): - df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4], "target": [5, 6]}) - df["time_col"] = pd.date_range(start="1/1/2010", freq="M", periods=1)[0] - model_args = { - "order_by": "time_col", - "group_by": ["col1", "col2"], - "target": "target", - "hierarchy": ["col1", "col2"], - } - - training_df, hier_df, hier_dict = get_hierarchy_from_df(df, model_args) - forecast_df = pd.DataFrame( - {"ARIMA": [15, 8, 7, 8, 7]}, index=["total", "total/1", "total/2", "total/1/3", "total/2/4"] - ) - forecast_df["ds"] = pd.date_range(start="1/3/2010", freq="M", periods=1)[0] - forecast_df.index.name = "unique_id" - results_df = reconcile_forecasts(training_df, forecast_df, hier_df, hier_dict) - - # Check we keep the hierarchically reconciled results, not the original forecast - assert "ARIMA/BottomUp" in results_df.columns - assert "ARIMA" not in results_df.columns diff --git a/tests/unused/unit/ml_handlers/test_vertex.py b/tests/unused/unit/ml_handlers/test_vertex.py deleted file mode 100644 index fd360f53443..00000000000 --- a/tests/unused/unit/ml_handlers/test_vertex.py +++ /dev/null @@ -1,247 +0,0 @@ -import time -import json - -import pandas as pd - -import pytest -from unittest.mock import Mock, patch -from mindsdb_sql_parser import parse_sql - -from tests.unit.executor_test_base import BaseExecutorTest -from mindsdb.integrations.handlers.vertex_handler.vertex_client import VertexClient - -path = "mindsdb.integrations.handlers.vertex_handler.vertex_client" - - -@pytest.fixture -def vertex_client(): - with patch(f"{path}.service_account.Credentials.from_service_account_file"), patch(f"{path}.aiplatform.init"): - client = VertexClient("fake_path", "fake_project_id") - return client - - -# Mocks -def mock_datasets(): - dataset_1 = Mock(display_name="Dataset1", name="ID1") - dataset_2 = Mock(display_name="Dataset2", name="ID2") - - # Set concrete return values for attributes - dataset_1.display_name = "Dataset1" - dataset_1.name = "ID1" - - dataset_2.display_name = "Dataset2" - dataset_2.name = "ID2" - - return [dataset_1, dataset_2] - - -def mock_endpoints(): - endpoint_1 = Mock(display_name="Endpoint1", name="EndpointID1") - endpoint_2 = Mock(display_name="Endpoint2", name="EndpointID2") - - # Set concrete return values for attributes - endpoint_1.display_name = "Endpoint1" - endpoint_1.name = "EndpointID1" - - endpoint_2.display_name = "Endpoint2" - endpoint_2.name = "EndpointID2" - - return [endpoint_1, endpoint_2] - - -def mock_models(): - model_1 = Mock(display_name="Model1", name="ModelID1") - model_2 = Mock(display_name="Model2", name="ModelID2") - - # Set concrete return values for attributes - model_1.display_name = "Model1" - model_1.name = "ModelID1" - - model_2.display_name = "Model2" - model_2.name = "ModelID2" - - return [model_1, model_2] - - -# Test of Vertex client class -def test_get_model_by_display_name(vertex_client): - with patch(f"{path}.aiplatform.Model.list", return_value=mock_models()): - model = vertex_client.get_model_by_display_name("Model1") - assert model.display_name == "Model1" - assert model.name == "ModelID1" - - -def test_get_endpoint_by_display_name(vertex_client): - with patch(f"{path}.aiplatform.Endpoint.list", return_value=mock_endpoints()): - endpoint = vertex_client.get_endpoint_by_display_name("Endpoint1") - assert endpoint.display_name == "Endpoint1" - assert endpoint.name == "EndpointID1" - - -def test_get_model_by_id(vertex_client): - with patch(f"{path}.aiplatform.Model", return_value=mock_models()[0]): - model = vertex_client.get_model_by_id("ModelID1") - assert model.display_name == "Model1" - assert model.name == "ModelID1" - - -def test_deploy_model(vertex_client): - mock_model = mock_models()[0] - with patch.object(mock_model, "deploy", return_value=mock_endpoints()[0]): - endpoint = vertex_client.deploy_model(mock_model) - assert endpoint.display_name == "Endpoint1" - assert endpoint.name == "EndpointID1" - - -def test_predict_from_csv(vertex_client, mocker): - mock_endpoint = mocker.MagicMock() - mock_endpoint.predict.return_value = "CSV Predictions" - - mocker.patch(f"{path}.pd.read_csv", return_value=pd.DataFrame({"col1": ["data1", "data2"]})) - mocker.patch(f"{path}.VertexClient.get_endpoint_by_display_name", return_value=mock_endpoint) - - predictions = vertex_client.predict_from_csv("Endpoint1", "path_to_csv") - assert predictions == "CSV Predictions" - - -def test_predict_from_json(vertex_client, mocker): - mock_endpoint = mocker.MagicMock() - mock_endpoint.predict.return_value = "JSON Predictions" - - mock_open = mocker.mock_open(read_data='{"col1": ["data1", "data2"]}') - mocker.patch("builtins.open", mock_open) - - mocker.patch(f"{path}.json.load", return_value={"col1": ["data1", "data2"]}) - mocker.patch(f"{path}.VertexClient.get_endpoint_by_display_name", return_value=mock_endpoint) - - """Make a prediction from a JSON file""" - with open("path_to_json", "r") as f: - data = json.load(f) - - predictions = vertex_client.predict_from_dict("Endpoint1", data) - assert predictions == "JSON Predictions" - - -# Test of Vertex handler - - -class TestVertex(BaseExecutorTest): - def wait_predictor(self, project, name): - # wait - done = False - for attempt in range(200): - ret = self.run_sql(f"select * from {project}.models where name='{name}'") - if not ret.empty: - if ret["STATUS"][0] == "complete": - done = True - break - elif ret["STATUS"][0] == "error": - break - time.sleep(0.5) - if not done: - raise RuntimeError("predictor wasn't created") - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_anomaly_detection_model(self, mock_handler): - # create project - self.run_sql("create database proj") - df = pd.read_csv("tests/unit/ml_handlers/data/vertex_anomaly_detection.csv") - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - # create predictor - self.run_sql( - """ - create model proj.modelx - from pg (select * from df) - predict cut - using - engine='vertex', - model_name='diamonds_anomaly_detection', - custom_model='True', - vertex_args_path='tests/unit/ml_handlers/data/vertex_args.json', - service_key_path='tests/unit/ml_handlers/data/vertex_service_key.json' - """ - ) - self.wait_predictor("proj", "modelx") - - # run predict - ret = self.run_sql( - """ - SELECT p.* - FROM pg.df as t - JOIN proj.modelx as p - """ - ) - assert len(ret) == len(df) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_regression_model(self, mock_handler): - # create database - self.run_sql("create database proj") - df = pd.read_csv("tests/unit/ml_handlers/data/vertex_regression.csv") - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - # create predictor - self.run_sql( - """ - create model proj.modelx - from pg (select * from df) - predict actual_productivity - using - engine='vertex', - model_name='productivity_regression', - vertex_args_path='tests/unit/ml_handlers/data/vertex_args.json', - service_key_path='tests/unit/ml_handlers/data/vertex_service_key.json' - """ - ) - self.wait_predictor("proj", "modelx") - - # run predict - ret = self.run_sql( - """ - SELECT p.* - FROM pg.df as t - JOIN proj.modelx as p - """ - ) - assert len(ret) == len(df) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_classification_model(self, mock_handler): - # dataset, string values - df = pd.read_csv("tests/unit/ml_handlers/data/vertex_classification.csv") - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - # create project - self.run_sql("create database proj") - - # create predictor - self.run_sql( - """ - create model proj.modelx - from pg (select * from df) - predict Class - using - engine='vertex', - model_name='fraud_detection', - vertex_args_path='tests/unit/ml_handlers/data/vertex_args.json', - service_key_path='tests/unit/ml_handlers/data/vertex_service_key.json' - """ - ) - self.wait_predictor("proj", "modelx") - - # run predict - ret = self.run_sql( - """ - SELECT p.* - FROM pg.df as t - JOIN proj.modelx as p - """ - ) - assert len(ret) == len(df) diff --git a/tests/unused/unit/ml_handlers/test_writer.py b/tests/unused/unit/ml_handlers/test_writer.py deleted file mode 100644 index 8a7e4686e9e..00000000000 --- a/tests/unused/unit/ml_handlers/test_writer.py +++ /dev/null @@ -1,143 +0,0 @@ -import os -import time -from unittest.mock import patch - -import pandas as pd -import pytest -from mindsdb_sql_parser import parse_sql - -from tests.unit.executor_test_base import BaseExecutorTest - -WRITER_API_KEY = os.environ.get("WRITER_API_KEY") -os.environ["WRITER_API_KEY"] = WRITER_API_KEY - -WRITER_ORG_ID = os.environ.get("WRITER_ORG_ID") -os.environ["WRITER_ORG_ID"] = WRITER_ORG_ID - - -class TestWriter(BaseExecutorTest): - def wait_predictor(self, project, name): - # wait - done = False - for attempt in range(200): - ret = self.run_sql(f"select * from {project}.models where name='{name}'") - if not ret.empty: - if ret["STATUS"][0] == "complete": - done = True - break - elif ret["STATUS"][0] == "error": - break - time.sleep(0.5) - if not done: - raise RuntimeError("predictor wasn't created") - - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def test_missing_required_keys(self): - # create project - self.run_sql("create database proj") - - self.run_sql( - """ - CREATE MODEL proj.test_writer_handler_missing_required_args - PREDICT answer - USING - engine="writer" - """ - ) - with pytest.raises(Exception): - self.wait_predictor("proj", "test_writer_handler_missing_required_args") - - def test_unsupported_vector_store(self): - self.run_sql("create database proj") - self.run_sql( - f""" - create model proj.test_unsupported_vector_store - predict answer - using - engine='writer', - writer_api_key='{WRITER_API_KEY}', - writer_org_id='{WRITER_ORG_ID}', - vector_store_name='unsupported_vector_store' - """ - ) - - with pytest.raises(Exception): - self.wait_predictor("proj", "test_unsupported_vector_store") - - def test_unknown_arguments(self): - self.run_sql("create database proj") - self.run_sql( - f""" - create model proj.test_writer_unknown_arguments - predict answer - using - engine='writer', - writer_api_key='{WRITER_API_KEY}', - writer_org_id='{WRITER_ORG_ID}', - evidently_wrong_argument='wrong value' --- this is a wrong argument name - """ - ) - with pytest.raises(Exception): - self.wait_predictor("proj", "test_writer_unknown_arguments") - - @pytest.mark.xfail( - reason="there seems to be an issue with running inner queries, it appears to be a potential bug in the mock handler" - ) - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_qa(self, postgres_mock_handler): - # create project - self.run_sql("create database proj") - df = pd.DataFrame.from_dict( - { - "context": [ - "For adults and children age 5 and older, OTC decongestants, " - "antihistamines and pain relievers might offer some symptom relief. " - "However, they won't prevent a cold or shorten its duration, and most have some side effects.", - ] - } - ) - self.set_handler(postgres_mock_handler, name="pg", tables={"df": df}) - - self.run_sql( - f""" - create model proj.test_writer_writer_qa - from pg (select * from df) - predict answer - using - engine='writer', - writer_api_key='{WRITER_API_KEY}', - writer_org_id='{WRITER_ORG_ID}'; - """ - ) - self.wait_predictor("proj", "test_writer_writer_qa") - - result_df = self.run_sql( - """ - SELECT p.answer - FROM proj.test_writer_writer_qa as p - WHERE question='What is the best treatment for a cold?' - """ - ) - assert "cold" in result_df["answer"].iloc[0].lower() - - def test_invalid_prompt_template(self): - # create project - self.run_sql("create database proj") - self.run_sql( - f""" - create model proj.test_invalid_prompt_template_format - predict completion - using - engine='writer', - prompt_template="not valid format", - writer_api_key='{WRITER_API_KEY}', - writer_org_id='{WRITER_ORG_ID}'; - """ - ) - with pytest.raises(Exception): - self.wait_predictor("proj", "test_invalid_prompt_template_format")