Update README and configuration files; add Question Answering to CLI and API, and adjust default models
This commit is contained in:
parent
8fb16726d6
commit
f081ef0db0
|
|
@ -22,6 +22,7 @@ var/
|
|||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
.github/
|
||||
|
||||
# Virtual environments
|
||||
venv/
|
||||
|
|
|
|||
466
README.md
466
README.md
|
|
@ -1,16 +1,54 @@
|
|||
# 🧠 AI Lab – Transformers CLI Playground
|
||||
|
||||
> A **pedagogical and technical project** designed for AI practitioners and students to experiment with Hugging Face Transformers through an **interactive Command‑Line Interface (CLI)**.
|
||||
> This playground provides ready‑to‑use NLP pipelines (Sentiment Analysis, Named Entity Recognition, Text Generation, Fill‑Mask, Moderation, etc.) in a modular, extensible, and educational codebase.
|
||||
> A **pedagogical and technical project** designed for AI practitioners and students to explore **Hugging Face Transformers** through an **interactive Command-Line Interface (CLI)** or a **REST API**.
|
||||
> This playground provides ready-to-use NLP pipelines — including **Sentiment Analysis**, **Named Entity Recognition**, **Text Generation**, **Fill-Mask**, **Question Answering (QA)**, **Moderation**, and more — in a modular, extensible, and educational codebase.
|
||||
|
||||
---
|
||||
|
||||
<p align="center">
|
||||
<img src="https://img.shields.io/badge/Python-3.13-blue.svg" alt="Python"/>
|
||||
<img src="https://img.shields.io/badge/Built_with-Poetry-purple.svg" alt="Poetry"/>
|
||||
<img src="https://img.shields.io/badge/🤗-Transformers-orange.svg" alt="Transformers"/>
|
||||
<img src="https://img.shields.io/badge/License-MIT-green.svg" alt="License"/>
|
||||
</p>
|
||||
|
||||
---
|
||||
|
||||
## 📑 Table of Contents
|
||||
|
||||
- [📚 Overview](#-overview)
|
||||
- [🗂️ Project Structure](#️-project-structure)
|
||||
- [⚙️ Installation](#️-installation)
|
||||
- [🧾 Option 1 – Poetry (Recommended)](#-option-1--poetry-recommended)
|
||||
- [📦 Option 2 – Pip + Requirements](#-option-2--pip--requirements)
|
||||
- [▶️ Usage](#️-usage)
|
||||
- [🖥️ CLI Mode](#️-cli-mode)
|
||||
- [🌐 API Mode](#-api-mode)
|
||||
- [📡 API Endpoints](#-api-endpoints)
|
||||
- [🖥️ CLI Examples](#️-cli-examples)
|
||||
- [🧠 Architecture Overview](#-architecture-overview)
|
||||
- [⚙️ Configuration](#️-configuration)
|
||||
- [🧩 Extending the Playground](#-extending-the-playground)
|
||||
- [🧰 Troubleshooting](#-troubleshooting)
|
||||
- [🧭 Development Guidelines](#-development-guidelines)
|
||||
- [🧱 Roadmap](#-roadmap)
|
||||
- [📜 License](#-license)
|
||||
|
||||
---
|
||||
|
||||
## 📚 Overview
|
||||
|
||||
The **AI Lab – Transformers CLI Playground** allows you to explore multiple natural language processing tasks directly from the terminal.
|
||||
Each task (e.g., sentiment, NER, text generation) is implemented as a **Command Module**, which interacts with a **Pipeline Module** built on top of the `transformers` library.
|
||||
The **AI Lab – Transformers CLI Playground** enables users to explore **multiple NLP tasks directly from the terminal or via HTTP APIs**.
|
||||
Each task (sentiment, NER, text generation, etc.) is implemented as a **Command Module** that communicates with a **Pipeline Module** powered by Hugging Face’s `transformers` library.
|
||||
|
||||
The lab is intentionally structured to demonstrate **clean software design for ML codebases** — with strict separation between configuration, pipelines, CLI logic, and display formatting.
|
||||
The project demonstrates **clean ML code architecture** with strict separation between:
|
||||
|
||||
- Configuration
|
||||
- Pipelines
|
||||
- CLI logic
|
||||
- Display formatting
|
||||
|
||||
It’s a great educational resource for learning **how to structure ML applications** professionally.
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -18,61 +56,54 @@ The lab is intentionally structured to demonstrate **clean software design for M
|
|||
|
||||
```text
|
||||
src/
|
||||
├── __init__.py
|
||||
├── main.py # CLI entry point
|
||||
│
|
||||
├── cli/
|
||||
│ ├── __init__.py
|
||||
│ ├── base.py # CLICommand base class & interactive shell handler
|
||||
│ └── display.py # Console formatting utilities (tables, colors, results)
|
||||
│ ├── base.py # CLICommand base class & interactive shell
|
||||
│ └── display.py # Console formatting utilities (colors, tables, results)
|
||||
│
|
||||
├── commands/ # User-facing commands wrapping pipeline logic
|
||||
│ ├── __init__.py
|
||||
│ ├── sentiment.py # Sentiment analysis command
|
||||
│ ├── fillmask.py # Masked token prediction command
|
||||
│ ├── textgen.py # Text generation command
|
||||
│ ├── ner.py # Named Entity Recognition command
|
||||
│ └── moderation.py # Toxicity / content moderation command
|
||||
│ ├── fillmask.py # Masked token prediction
|
||||
│ ├── textgen.py # Text generation
|
||||
│ ├── ner.py # Named Entity Recognition
|
||||
│ ├── qa.py # Question Answering (extractive)
|
||||
│ └── moderation.py # Content moderation / toxicity detection
|
||||
│
|
||||
├── pipelines/ # Machine learning logic (Hugging Face Transformers)
|
||||
│ ├── __init__.py
|
||||
├── pipelines/ # ML logic based on Hugging Face pipelines
|
||||
│ ├── template.py # Blueprint for creating new pipelines
|
||||
│ ├── sentiment.py
|
||||
│ ├── fillmask.py
|
||||
│ ├── textgen.py
|
||||
│ ├── ner.py
|
||||
│ ├── qa.py
|
||||
│ └── moderation.py
|
||||
│
|
||||
├── api/
|
||||
│ ├── __init__.py
|
||||
│ ├── app.py # FastAPI application with all endpoints
|
||||
│ ├── models.py # Pydantic request/response models
|
||||
│ └── config.py # API-specific configuration
|
||||
│ ├── app.py # FastAPI app and endpoints
|
||||
│ ├── models.py # Pydantic schemas
|
||||
│ └── config.py # API configuration
|
||||
│
|
||||
└── config/
|
||||
├── __init__.py
|
||||
└── settings.py # Global configuration (default models, parameters)
|
||||
└── settings.py # Global configuration (models, params)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ Installation
|
||||
|
||||
### 🧾 Option 1 – Using Poetry (Recommended)
|
||||
### 🧾 Option 1 – Poetry (Recommended)
|
||||
|
||||
> Poetry is used as the main dependency manager.
|
||||
> Poetry is the main dependency manager for this project.
|
||||
|
||||
```bash
|
||||
# 1. Create and activate a new virtual environment
|
||||
poetry shell
|
||||
|
||||
# 2. Install dependencies
|
||||
poetry install
|
||||
```
|
||||
|
||||
This will automatically install all dependencies declared in `pyproject.toml`, including **transformers**, **torch**, and **FastAPI** for the API mode.
|
||||
This installs all dependencies defined in `pyproject.toml` (including `transformers`, `torch`, and `fastapi`).
|
||||
|
||||
To run the application inside the Poetry environment:
|
||||
Run the app:
|
||||
|
||||
```bash
|
||||
# CLI mode
|
||||
|
|
@ -84,21 +115,15 @@ poetry run python src/main.py --mode api
|
|||
|
||||
---
|
||||
|
||||
### 📦 Option 2 – Using pip and requirements.txt
|
||||
### 📦 Option 2 – Pip + requirements.txt
|
||||
|
||||
If you prefer using `requirements.txt` manually:
|
||||
If you prefer manual dependency management:
|
||||
|
||||
```bash
|
||||
# 1. Create a virtual environment
|
||||
python -m venv .venv
|
||||
source .venv/bin/activate # Linux/macOS
|
||||
.venv\Scripts\Activate.ps1 # Windows
|
||||
|
||||
# 2. Activate it
|
||||
# Linux/macOS
|
||||
source .venv/bin/activate
|
||||
# Windows PowerShell
|
||||
.venv\Scripts\Activate.ps1
|
||||
|
||||
# 3. Install dependencies
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
|
|
@ -106,19 +131,15 @@ pip install -r requirements.txt
|
|||
|
||||
## ▶️ Usage
|
||||
|
||||
The application supports two modes: **CLI** (interactive) and **API** (REST server).
|
||||
|
||||
### 🖥️ CLI Mode
|
||||
|
||||
Launch the interactive CLI with:
|
||||
Run the interactive CLI:
|
||||
|
||||
```bash
|
||||
python -m src.main --mode cli
|
||||
# or, if using Poetry
|
||||
poetry run python src/main.py --mode cli
|
||||
```
|
||||
|
||||
You'll see an interactive menu listing the available commands:
|
||||
Interactive menu:
|
||||
|
||||
```
|
||||
Welcome to AI Lab - Transformers CLI Playground
|
||||
|
|
@ -127,151 +148,89 @@ Available commands:
|
|||
• fillmask – Predict masked words in a sentence
|
||||
• textgen – Generate text from a prompt
|
||||
• ner – Extract named entities from text
|
||||
• qa – Answer questions from a context
|
||||
• moderation – Detect toxic or unsafe content
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 🌐 API Mode
|
||||
|
||||
Launch the FastAPI server with:
|
||||
Run FastAPI server:
|
||||
|
||||
```bash
|
||||
python -m src.main --mode api
|
||||
# or with custom settings
|
||||
# Custom config
|
||||
python -m src.main --mode api --host 0.0.0.0 --port 8000 --reload
|
||||
```
|
||||
|
||||
The API will be available at:
|
||||
API Docs:
|
||||
|
||||
- **Swagger Documentation**: http://localhost:8000/docs
|
||||
- **ReDoc Documentation**: http://localhost:8000/redoc
|
||||
- **OpenAPI Schema**: http://localhost:8000/openapi.json
|
||||
- **Swagger** → http://localhost:8000/docs
|
||||
- **ReDoc** → http://localhost:8000/redoc
|
||||
- **OpenAPI** → http://localhost:8000/openapi.json
|
||||
|
||||
---
|
||||
|
||||
## 📡 API Endpoints
|
||||
|
||||
The REST API provides all CLI functionality through HTTP endpoints:
|
||||
|
||||
### Core Endpoints
|
||||
|
||||
| Method | Endpoint | Description |
|
||||
| ------ | --------- | -------------------------------- |
|
||||
| `GET` | `/` | Health check and API information |
|
||||
| ------ | --------- | ------------------------- |
|
||||
| `GET` | `/` | Health check and API info |
|
||||
| `GET` | `/health` | Detailed health status |
|
||||
|
||||
### Individual Processing
|
||||
|
||||
| Method | Endpoint | Description | Input |
|
||||
| ------ | ------------- | ------------------------ | ------------------------------------------------------------------ |
|
||||
| `POST` | `/sentiment` | Analyze text sentiment | `{"text": "string", "model": "optional"}` |
|
||||
| `POST` | `/fillmask` | Fill masked words | `{"text": "Hello [MASK]", "model": "optional"}` |
|
||||
| `POST` | `/textgen` | Generate text | `{"text": "prompt", "model": "optional"}` |
|
||||
| `POST` | `/ner` | Named entity recognition | `{"text": "string", "model": "optional"}` |
|
||||
| `POST` | `/qa` | Question answering | `{"question": "string", "context": "string", "model": "optional"}` |
|
||||
| `POST` | `/moderation` | Content moderation | `{"text": "string", "model": "optional"}` |
|
||||
| Method | Endpoint | Description |
|
||||
| ------ | ------------- | ---------------------- |
|
||||
| `POST` | `/sentiment` | Analyze text sentiment |
|
||||
| `POST` | `/fillmask` | Predict masked words |
|
||||
| `POST` | `/textgen` | Generate text |
|
||||
| `POST` | `/ner` | Extract named entities |
|
||||
| `POST` | `/qa` | Question answering |
|
||||
| `POST` | `/moderation` | Content moderation |
|
||||
|
||||
### Batch Processing
|
||||
|
||||
| Method | Endpoint | Description | Input |
|
||||
| ------ | ------------------- | ------------------------------------ | ---------------------------------------------------- |
|
||||
| `POST` | `/sentiment/batch` | Process multiple texts | `{"texts": ["text1", "text2"], "model": "optional"}` |
|
||||
| `POST` | `/fillmask/batch` | Fill multiple masked texts | `{"texts": ["text1 [MASK]"], "model": "optional"}` |
|
||||
| `POST` | `/textgen/batch` | Generate from multiple prompts | `{"texts": ["prompt1"], "model": "optional"}` |
|
||||
| `POST` | `/ner/batch` | Extract entities from multiple texts | `{"texts": ["text1"], "model": "optional"}` |
|
||||
| `POST` | `/moderation/batch` | Moderate multiple texts | `{"texts": ["text1"], "model": "optional"}` |
|
||||
|
||||
### Example API Usage
|
||||
|
||||
#### 🔹 Sentiment Analysis
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:8000/sentiment" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"text": "I absolutely love this project!"}'
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"label": "POSITIVE",
|
||||
"score": 0.998,
|
||||
"model_used": "distilbert-base-uncased-finetuned-sst-2-english"
|
||||
}
|
||||
```
|
||||
|
||||
#### 🔹 Named Entity Recognition
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:8000/ner" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"text": "Elon Musk founded SpaceX in California."}'
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"entities": [
|
||||
{ "word": "Elon Musk", "label": "PERSON", "score": 0.999 },
|
||||
{ "word": "SpaceX", "label": "ORG", "score": 0.998 },
|
||||
{ "word": "California", "label": "LOC", "score": 0.995 }
|
||||
],
|
||||
"model_used": "dslim/bert-base-NER"
|
||||
}
|
||||
```
|
||||
|
||||
#### 🔹 Batch Processing
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:8000/sentiment/batch" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"texts": ["Great product!", "Terrible experience", "It was okay"]}'
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"results": [
|
||||
{ "label": "POSITIVE", "score": 0.998 },
|
||||
{ "label": "NEGATIVE", "score": 0.995 },
|
||||
{ "label": "NEUTRAL", "score": 0.876 }
|
||||
],
|
||||
"model_used": "distilbert-base-uncased-finetuned-sst-2-english"
|
||||
}
|
||||
```
|
||||
| Method | Endpoint | Description |
|
||||
| ------ | ------------------- | -------------------------- |
|
||||
| `POST` | `/sentiment/batch` | Process multiple texts |
|
||||
| `POST` | `/fillmask/batch` | Fill multiple masked texts |
|
||||
| `POST` | `/textgen/batch` | Generate from prompts |
|
||||
| `POST` | `/ner/batch` | Extract entities in batch |
|
||||
| `POST` | `/qa/batch` | Answer questions in batch |
|
||||
| `POST` | `/moderation/batch` | Moderate multiple texts |
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ CLI Examples
|
||||
|
||||
#### 🔹 Sentiment Analysis
|
||||
### 🔹 Sentiment Analysis
|
||||
|
||||
```text
|
||||
💬 Enter text: I absolutely love this project!
|
||||
→ Sentiment: POSITIVE (score: 0.998)
|
||||
```
|
||||
|
||||
#### 🔹 Fill‑Mask
|
||||
### 🔹 Fill-Mask
|
||||
|
||||
```text
|
||||
💬 Enter text: The capital of France is [MASK].
|
||||
→ Predictions:
|
||||
1) Paris score: 0.87
|
||||
2) Lyon score: 0.04
|
||||
3) London score: 0.02
|
||||
```
|
||||
|
||||
#### 🔹 Text Generation
|
||||
### 🔹 Text Generation
|
||||
|
||||
```text
|
||||
💬 Prompt: Once upon a time
|
||||
→ Output: Once upon a time there was a young AI learning to code...
|
||||
```
|
||||
|
||||
#### 🔹 NER (Named Entity Recognition)
|
||||
### 🔹 NER
|
||||
|
||||
```text
|
||||
💬 Enter text: Elon Musk founded SpaceX in California.
|
||||
|
|
@ -281,7 +240,15 @@ Response:
|
|||
- California (LOC)
|
||||
```
|
||||
|
||||
#### 🔹 Moderation
|
||||
### 🔹 QA (Question Answering)
|
||||
|
||||
```text
|
||||
💬 Enter question: What is the capital of France?
|
||||
💬 Enter context: France is a country in Europe. Its capital is Paris.
|
||||
→ Answer: The capital of France is Paris.
|
||||
```
|
||||
|
||||
### 🔹 Moderation
|
||||
|
||||
```text
|
||||
💬 Enter text: I hate everything!
|
||||
|
|
@ -292,90 +259,34 @@ Response:
|
|||
|
||||
## 🧠 Architecture Overview
|
||||
|
||||
The application supports dual-mode architecture: **CLI** (interactive) and **API** (REST server), both sharing the same pipeline layer:
|
||||
Both CLI and API share the **same pipeline layer**, ensuring code reusability and consistency.
|
||||
|
||||
### CLI Architecture
|
||||
|
||||
```text
|
||||
┌──────────────────────┐
|
||||
│ InteractiveCLI │
|
||||
│ (src/cli/base.py) │
|
||||
└──────────┬───────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Command Layer │ ← e.g. sentiment.py
|
||||
│ (user commands) │
|
||||
└───────┬─────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Pipeline Layer │ ← e.g. pipelines/sentiment.py
|
||||
│ (ML logic) │
|
||||
└───────┬─────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Display Layer │ ← cli/display.py
|
||||
│ (format output) │
|
||||
└─────────────────┘
|
||||
InteractiveCLI → Command Layer → Pipeline Layer → Display Layer
|
||||
```
|
||||
|
||||
### API Architecture
|
||||
|
||||
```text
|
||||
┌──────────────────────┐
|
||||
│ FastAPI App │
|
||||
│ (src/api/app.py) │
|
||||
└──────────┬───────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Pydantic Models │ ← api/models.py
|
||||
│ (validation) │
|
||||
└───────┬─────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Pipeline Layer │ ← e.g. pipelines/sentiment.py
|
||||
│ (ML logic) │ (shared with CLI)
|
||||
└───────┬─────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ JSON Response │ ← automatic serialization
|
||||
│ (HTTP output) │
|
||||
└─────────────────┘
|
||||
FastAPI App → Pydantic Models → Pipeline Layer → JSON Response
|
||||
```
|
||||
|
||||
### Key Concepts
|
||||
|
||||
| Layer | Description |
|
||||
| ------------ | -------------------------------------------------------------------------- |
|
||||
| **CLI** | Manages user input/output, help menus, and navigation between commands. |
|
||||
| **API** | FastAPI application serving HTTP endpoints with automatic documentation. |
|
||||
| **Command** | Encapsulates a single user-facing operation (e.g., run sentiment). |
|
||||
| **Pipeline** | Wraps Hugging Face's `transformers.pipeline()` to perform inference. |
|
||||
| **Models** | Pydantic schemas for request/response validation and serialization. |
|
||||
| **Display** | Handles clean console rendering (colored output, tables, JSON formatting). |
|
||||
|
||||
### Key Concepts
|
||||
|
||||
| Layer | Description |
|
||||
| ------------ | -------------------------------------------------------------------------- |
|
||||
| **CLI** | Manages user input/output, help menus, and navigation between commands. |
|
||||
| **Command** | Encapsulates a single user-facing operation (e.g., run sentiment). |
|
||||
| **Pipeline** | Wraps Hugging Face’s `transformers.pipeline()` to perform inference. |
|
||||
| **Display** | Handles clean console rendering (colored output, tables, JSON formatting). |
|
||||
| **Config** | Centralizes model names, limits, and global constants. |
|
||||
| ------------ | ---------------------------------------------- |
|
||||
| **CLI** | Manages user input/output and navigation. |
|
||||
| **API** | Exposes endpoints with automatic OpenAPI docs. |
|
||||
| **Command** | Encapsulates user-facing operations. |
|
||||
| **Pipeline** | Wraps Hugging Face’s pipelines. |
|
||||
| **Models** | Validates requests/responses. |
|
||||
| **Display** | Formats console output. |
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ Configuration
|
||||
|
||||
All configuration is centralized in `src/config/settings.py`.
|
||||
|
||||
Example:
|
||||
All configuration is centralized in `src/config/settings.py`:
|
||||
|
||||
```python
|
||||
class Config:
|
||||
|
|
@ -384,140 +295,75 @@ class Config:
|
|||
"fillmask": "bert-base-uncased",
|
||||
"textgen": "gpt2",
|
||||
"ner": "dslim/bert-base-NER",
|
||||
"moderation":"unitary/toxic-bert"
|
||||
"qa": "distilbert-base-cased-distilled-squad",
|
||||
"moderation":"unitary/toxic-bert",
|
||||
}
|
||||
MAX_LENGTH = 512
|
||||
BATCH_SIZE = 8
|
||||
```
|
||||
|
||||
You can easily modify model names to experiment with different checkpoints.
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Extending the Playground
|
||||
|
||||
To create a new experiment (e.g., keyword extraction):
|
||||
To add a new NLP experiment (e.g., keyword extraction):
|
||||
|
||||
### For CLI Support
|
||||
1. Duplicate `src/pipelines/template.py` → `src/pipelines/keywords.py`
|
||||
2. Create a command: `src/commands/keywords.py`
|
||||
3. Register it in `src/main.py`
|
||||
4. Add Pydantic models and API endpoint
|
||||
5. Update `Config.DEFAULT_MODELS`
|
||||
|
||||
1. **Duplicate** `src/pipelines/template.py` → `src/pipelines/keywords.py`
|
||||
Implement the `run()` or `analyze()` logic using a new Hugging Face pipeline.
|
||||
|
||||
2. **Create a Command** in `src/commands/keywords.py` to interact with users.
|
||||
|
||||
3. **Register the command** inside `src/main.py`:
|
||||
|
||||
```python
|
||||
from src.commands.keywords import KeywordsCommand
|
||||
cli.register_command(KeywordsCommand())
|
||||
```
|
||||
|
||||
### For API Support
|
||||
|
||||
4. **Add Pydantic models** in `src/api/models.py`:
|
||||
|
||||
```python
|
||||
class KeywordsRequest(BaseModel):
|
||||
text: str
|
||||
model: Optional[str] = None
|
||||
|
||||
class KeywordsResponse(BaseModel):
|
||||
success: bool
|
||||
keywords: List[str]
|
||||
model_used: str
|
||||
```
|
||||
|
||||
5. **Add endpoint** in `src/api/app.py`:
|
||||
|
||||
```python
|
||||
@app.post("/keywords", response_model=KeywordsResponse)
|
||||
async def extract_keywords(request: KeywordsRequest):
|
||||
# Implementation using KeywordsAnalyzer pipeline
|
||||
pass
|
||||
```
|
||||
|
||||
6. **Update configuration** in `Config.DEFAULT_MODELS`.
|
||||
|
||||
Both CLI and API will automatically share the same pipeline implementation!
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing
|
||||
|
||||
You can use `pytest` for lightweight validation:
|
||||
|
||||
```bash
|
||||
pip install pytest
|
||||
pytest -q
|
||||
```
|
||||
|
||||
Recommended structure:
|
||||
|
||||
```
|
||||
tests/
|
||||
├── test_sentiment.py
|
||||
├── test_textgen.py
|
||||
└── ...
|
||||
```
|
||||
Both CLI and API will automatically share this logic.
|
||||
|
||||
---
|
||||
|
||||
## 🧰 Troubleshooting
|
||||
|
||||
### General Issues
|
||||
| Issue | Solution |
|
||||
| ------------------------ | ----------------------- |
|
||||
| `transformers` not found | Activate your venv. |
|
||||
| Torch install fails | Use CPU-only wheel. |
|
||||
| Models download slowly | Cached after first use. |
|
||||
| Encoding issues | Ensure UTF-8 terminal. |
|
||||
|
||||
| Issue | Cause / Solution |
|
||||
| ---------------------------- | -------------------------------------------- |
|
||||
| **`transformers` not found** | Check virtual environment activation. |
|
||||
| **Torch fails to install** | Install CPU-only version from PyTorch index. |
|
||||
| **Models download slowly** | Hugging Face caches them after first run. |
|
||||
| **Unicode / accents broken** | Ensure terminal encoding is UTF‑8. |
|
||||
### API Issues
|
||||
|
||||
### API-Specific Issues
|
||||
|
||||
| Issue | Cause / Solution |
|
||||
| ----------------------------- | ----------------------------------------------------- |
|
||||
| **`FastAPI` not found** | Install with `pip install fastapi uvicorn[standard]`. |
|
||||
| **Port already in use** | Use `--port 8001` or kill process on port 8000. |
|
||||
| **CORS errors in browser** | Check `allow_origins` in `src/api/config.py`. |
|
||||
| **422 Validation Error** | Check request body matches Pydantic models. |
|
||||
| **500 Internal Server Error** | Check model loading and pipeline initialization. |
|
||||
|
||||
### Quick API Health Check
|
||||
|
||||
```bash
|
||||
# Test if API is running
|
||||
curl http://localhost:8000/health
|
||||
|
||||
# Test basic endpoint
|
||||
curl -X POST "http://localhost:8000/sentiment" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"text": "test"}'
|
||||
```
|
||||
| Issue | Solution |
|
||||
| -------------------- | --------------------------------------- |
|
||||
| `FastAPI` missing | `pip install fastapi uvicorn[standard]` |
|
||||
| Port in use | Change with `--port 8001` |
|
||||
| CORS error | Edit `allow_origins` in `api/config.py` |
|
||||
| Validation error 422 | Check request body |
|
||||
| 500 error | Verify model loading |
|
||||
|
||||
---
|
||||
|
||||
## 🧭 Development Guidelines
|
||||
|
||||
- Keep **Command** classes lightweight — no ML logic inside them.
|
||||
- Reuse the **Pipeline Template** for new experiments.
|
||||
- Format outputs consistently via the `DisplayFormatter`.
|
||||
- Document all new models or commands in `README.md` and `settings.py`.
|
||||
- Keep command classes lightweight (no ML inside)
|
||||
- Use the pipeline template for new tasks
|
||||
- Format all outputs via `DisplayFormatter`
|
||||
- Document new commands and models
|
||||
|
||||
---
|
||||
|
||||
## 🧱 Roadmap
|
||||
|
||||
- [ ] Add non-interactive CLI flags (`--text`, `--task`)
|
||||
- [ ] Add multilingual model options
|
||||
- [ ] Add automatic test coverage
|
||||
- [ ] Add logging and profiling utilities
|
||||
- [ ] Add export to JSON/CSV results
|
||||
- [ ] Non-interactive CLI flags (`--text`, `--task`)
|
||||
- [ ] Multilingual models
|
||||
- [ ] Test coverage
|
||||
- [ ] Logging & profiling
|
||||
- [ ] Export to JSON/CSV
|
||||
|
||||
---
|
||||
|
||||
## 📜 License
|
||||
|
||||
This project is licensed under the [MIT License](./LICENSE) — feel free to use it, modify it, and share it!
|
||||
Licensed under the [MIT License](./LICENSE).
|
||||
You are free to use, modify, and distribute this project.
|
||||
|
||||
---
|
||||
|
||||
✨ **End of Documentation**
|
||||
_The AI Lab – Transformers CLI Playground: built for learning, experimenting, and sharing NLP excellence._
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import logging
|
|||
from .models import (
|
||||
TextRequest, TextListRequest, QARequest, FillMaskRequest,
|
||||
SentimentResponse, NERResponse, QAResponse, FillMaskResponse,
|
||||
ModerationResponse, TextGenResponse, BatchResponse, ErrorResponse
|
||||
ModerationResponse, TextGenResponse, BatchResponse
|
||||
)
|
||||
|
||||
# Global pipeline instances
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
API configuration settings
|
||||
"""
|
||||
from typing import Dict, Any
|
||||
from src.config.settings import Config
|
||||
|
||||
|
||||
class APIConfig:
|
||||
|
|
@ -25,21 +26,6 @@ class APIConfig:
|
|||
MAX_TEXT_LENGTH = 10000
|
||||
MAX_BATCH_SIZE = 100
|
||||
|
||||
# Model defaults
|
||||
DEFAULT_MODELS: Dict[str, str] = {
|
||||
"sentiment": "cardiffnlp/twitter-roberta-base-sentiment-latest",
|
||||
"ner": "dbmdz/bert-large-cased-finetuned-conll03-english",
|
||||
"qa": "distilbert-base-cased-distilled-squad",
|
||||
"fillmask": "bert-base-uncased",
|
||||
"moderation": "martin-ha/toxic-comment-model",
|
||||
"textgen": "gpt2"
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_default_model(cls, task: str) -> str:
|
||||
"""Get default model for a task"""
|
||||
return cls.DEFAULT_MODELS.get(task, "")
|
||||
|
||||
@classmethod
|
||||
def get_all_settings(cls) -> Dict[str, Any]:
|
||||
"""Get all configuration settings"""
|
||||
|
|
@ -57,5 +43,4 @@ class APIConfig:
|
|||
"max_text_length": cls.MAX_TEXT_LENGTH,
|
||||
"max_batch_size": cls.MAX_BATCH_SIZE
|
||||
},
|
||||
"default_models": cls.DEFAULT_MODELS
|
||||
}
|
||||
|
|
@ -15,11 +15,11 @@ class Config:
|
|||
|
||||
# Default models
|
||||
DEFAULT_MODELS = {
|
||||
"sentiment": "cardiffnlp/twitter-roberta-base-sentiment-latest",
|
||||
"fillmask": "distilbert-base-uncased",
|
||||
"sentiment": "distilbert-base-uncased-finetuned-sst-2-english",
|
||||
"fillmask": "bert-base-uncased",
|
||||
"textgen": "gpt2",
|
||||
"moderation": "unitary/toxic-bert",
|
||||
"ner": "dbmdz/bert-large-cased-finetuned-conll03-english",
|
||||
"ner": "dslim/bert-base-NER",
|
||||
"moderation":"unitary/toxic-bert",
|
||||
"qa": "distilbert-base-cased-distilled-squad",
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue