Update README and configuration files; add Question Answering to CLI and API, and adjust default models
This commit is contained in:
parent
8fb16726d6
commit
f081ef0db0
|
|
@ -22,6 +22,7 @@ var/
|
||||||
*.egg-info/
|
*.egg-info/
|
||||||
.installed.cfg
|
.installed.cfg
|
||||||
*.egg
|
*.egg
|
||||||
|
.github/
|
||||||
|
|
||||||
# Virtual environments
|
# Virtual environments
|
||||||
venv/
|
venv/
|
||||||
|
|
|
||||||
472
README.md
472
README.md
|
|
@ -1,16 +1,54 @@
|
||||||
# 🧠 AI Lab – Transformers CLI Playground
|
# 🧠 AI Lab – Transformers CLI Playground
|
||||||
|
|
||||||
> A **pedagogical and technical project** designed for AI practitioners and students to experiment with Hugging Face Transformers through an **interactive Command‑Line Interface (CLI)**.
|
> A **pedagogical and technical project** designed for AI practitioners and students to explore **Hugging Face Transformers** through an **interactive Command-Line Interface (CLI)** or a **REST API**.
|
||||||
> This playground provides ready‑to‑use NLP pipelines (Sentiment Analysis, Named Entity Recognition, Text Generation, Fill‑Mask, Moderation, etc.) in a modular, extensible, and educational codebase.
|
> This playground provides ready-to-use NLP pipelines — including **Sentiment Analysis**, **Named Entity Recognition**, **Text Generation**, **Fill-Mask**, **Question Answering (QA)**, **Moderation**, and more — in a modular, extensible, and educational codebase.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<img src="https://img.shields.io/badge/Python-3.13-blue.svg" alt="Python"/>
|
||||||
|
<img src="https://img.shields.io/badge/Built_with-Poetry-purple.svg" alt="Poetry"/>
|
||||||
|
<img src="https://img.shields.io/badge/🤗-Transformers-orange.svg" alt="Transformers"/>
|
||||||
|
<img src="https://img.shields.io/badge/License-MIT-green.svg" alt="License"/>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📑 Table of Contents
|
||||||
|
|
||||||
|
- [📚 Overview](#-overview)
|
||||||
|
- [🗂️ Project Structure](#️-project-structure)
|
||||||
|
- [⚙️ Installation](#️-installation)
|
||||||
|
- [🧾 Option 1 – Poetry (Recommended)](#-option-1--poetry-recommended)
|
||||||
|
- [📦 Option 2 – Pip + Requirements](#-option-2--pip--requirements)
|
||||||
|
- [▶️ Usage](#️-usage)
|
||||||
|
- [🖥️ CLI Mode](#️-cli-mode)
|
||||||
|
- [🌐 API Mode](#-api-mode)
|
||||||
|
- [📡 API Endpoints](#-api-endpoints)
|
||||||
|
- [🖥️ CLI Examples](#️-cli-examples)
|
||||||
|
- [🧠 Architecture Overview](#-architecture-overview)
|
||||||
|
- [⚙️ Configuration](#️-configuration)
|
||||||
|
- [🧩 Extending the Playground](#-extending-the-playground)
|
||||||
|
- [🧰 Troubleshooting](#-troubleshooting)
|
||||||
|
- [🧭 Development Guidelines](#-development-guidelines)
|
||||||
|
- [🧱 Roadmap](#-roadmap)
|
||||||
|
- [📜 License](#-license)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 📚 Overview
|
## 📚 Overview
|
||||||
|
|
||||||
The **AI Lab – Transformers CLI Playground** allows you to explore multiple natural language processing tasks directly from the terminal.
|
The **AI Lab – Transformers CLI Playground** enables users to explore **multiple NLP tasks directly from the terminal or via HTTP APIs**.
|
||||||
Each task (e.g., sentiment, NER, text generation) is implemented as a **Command Module**, which interacts with a **Pipeline Module** built on top of the `transformers` library.
|
Each task (sentiment, NER, text generation, etc.) is implemented as a **Command Module** that communicates with a **Pipeline Module** powered by Hugging Face’s `transformers` library.
|
||||||
|
|
||||||
The lab is intentionally structured to demonstrate **clean software design for ML codebases** — with strict separation between configuration, pipelines, CLI logic, and display formatting.
|
The project demonstrates **clean ML code architecture** with strict separation between:
|
||||||
|
|
||||||
|
- Configuration
|
||||||
|
- Pipelines
|
||||||
|
- CLI logic
|
||||||
|
- Display formatting
|
||||||
|
|
||||||
|
It’s a great educational resource for learning **how to structure ML applications** professionally.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -18,61 +56,54 @@ The lab is intentionally structured to demonstrate **clean software design for M
|
||||||
|
|
||||||
```text
|
```text
|
||||||
src/
|
src/
|
||||||
├── __init__.py
|
|
||||||
├── main.py # CLI entry point
|
├── main.py # CLI entry point
|
||||||
│
|
│
|
||||||
├── cli/
|
├── cli/
|
||||||
│ ├── __init__.py
|
│ ├── base.py # CLICommand base class & interactive shell
|
||||||
│ ├── base.py # CLICommand base class & interactive shell handler
|
│ └── display.py # Console formatting utilities (colors, tables, results)
|
||||||
│ └── display.py # Console formatting utilities (tables, colors, results)
|
|
||||||
│
|
│
|
||||||
├── commands/ # User-facing commands wrapping pipeline logic
|
├── commands/ # User-facing commands wrapping pipeline logic
|
||||||
│ ├── __init__.py
|
|
||||||
│ ├── sentiment.py # Sentiment analysis command
|
│ ├── sentiment.py # Sentiment analysis command
|
||||||
│ ├── fillmask.py # Masked token prediction command
|
│ ├── fillmask.py # Masked token prediction
|
||||||
│ ├── textgen.py # Text generation command
|
│ ├── textgen.py # Text generation
|
||||||
│ ├── ner.py # Named Entity Recognition command
|
│ ├── ner.py # Named Entity Recognition
|
||||||
│ └── moderation.py # Toxicity / content moderation command
|
│ ├── qa.py # Question Answering (extractive)
|
||||||
|
│ └── moderation.py # Content moderation / toxicity detection
|
||||||
│
|
│
|
||||||
├── pipelines/ # Machine learning logic (Hugging Face Transformers)
|
├── pipelines/ # ML logic based on Hugging Face pipelines
|
||||||
│ ├── __init__.py
|
|
||||||
│ ├── template.py # Blueprint for creating new pipelines
|
│ ├── template.py # Blueprint for creating new pipelines
|
||||||
│ ├── sentiment.py
|
│ ├── sentiment.py
|
||||||
│ ├── fillmask.py
|
│ ├── fillmask.py
|
||||||
│ ├── textgen.py
|
│ ├── textgen.py
|
||||||
│ ├── ner.py
|
│ ├── ner.py
|
||||||
|
│ ├── qa.py
|
||||||
│ └── moderation.py
|
│ └── moderation.py
|
||||||
│
|
│
|
||||||
├── api/
|
├── api/
|
||||||
│ ├── __init__.py
|
│ ├── app.py # FastAPI app and endpoints
|
||||||
│ ├── app.py # FastAPI application with all endpoints
|
│ ├── models.py # Pydantic schemas
|
||||||
│ ├── models.py # Pydantic request/response models
|
│ └── config.py # API configuration
|
||||||
│ └── config.py # API-specific configuration
|
|
||||||
│
|
│
|
||||||
└── config/
|
└── config/
|
||||||
├── __init__.py
|
└── settings.py # Global configuration (models, params)
|
||||||
└── settings.py # Global configuration (default models, parameters)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## ⚙️ Installation
|
## ⚙️ Installation
|
||||||
|
|
||||||
### 🧾 Option 1 – Using Poetry (Recommended)
|
### 🧾 Option 1 – Poetry (Recommended)
|
||||||
|
|
||||||
> Poetry is used as the main dependency manager.
|
> Poetry is the main dependency manager for this project.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 1. Create and activate a new virtual environment
|
|
||||||
poetry shell
|
poetry shell
|
||||||
|
|
||||||
# 2. Install dependencies
|
|
||||||
poetry install
|
poetry install
|
||||||
```
|
```
|
||||||
|
|
||||||
This will automatically install all dependencies declared in `pyproject.toml`, including **transformers**, **torch**, and **FastAPI** for the API mode.
|
This installs all dependencies defined in `pyproject.toml` (including `transformers`, `torch`, and `fastapi`).
|
||||||
|
|
||||||
To run the application inside the Poetry environment:
|
Run the app:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# CLI mode
|
# CLI mode
|
||||||
|
|
@ -84,21 +115,15 @@ poetry run python src/main.py --mode api
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### 📦 Option 2 – Using pip and requirements.txt
|
### 📦 Option 2 – Pip + requirements.txt
|
||||||
|
|
||||||
If you prefer using `requirements.txt` manually:
|
If you prefer manual dependency management:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 1. Create a virtual environment
|
|
||||||
python -m venv .venv
|
python -m venv .venv
|
||||||
|
source .venv/bin/activate # Linux/macOS
|
||||||
|
.venv\Scripts\Activate.ps1 # Windows
|
||||||
|
|
||||||
# 2. Activate it
|
|
||||||
# Linux/macOS
|
|
||||||
source .venv/bin/activate
|
|
||||||
# Windows PowerShell
|
|
||||||
.venv\Scripts\Activate.ps1
|
|
||||||
|
|
||||||
# 3. Install dependencies
|
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -106,19 +131,15 @@ pip install -r requirements.txt
|
||||||
|
|
||||||
## ▶️ Usage
|
## ▶️ Usage
|
||||||
|
|
||||||
The application supports two modes: **CLI** (interactive) and **API** (REST server).
|
|
||||||
|
|
||||||
### 🖥️ CLI Mode
|
### 🖥️ CLI Mode
|
||||||
|
|
||||||
Launch the interactive CLI with:
|
Run the interactive CLI:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m src.main --mode cli
|
python -m src.main --mode cli
|
||||||
# or, if using Poetry
|
|
||||||
poetry run python src/main.py --mode cli
|
|
||||||
```
|
```
|
||||||
|
|
||||||
You'll see an interactive menu listing the available commands:
|
Interactive menu:
|
||||||
|
|
||||||
```
|
```
|
||||||
Welcome to AI Lab - Transformers CLI Playground
|
Welcome to AI Lab - Transformers CLI Playground
|
||||||
|
|
@ -127,151 +148,89 @@ Available commands:
|
||||||
• fillmask – Predict masked words in a sentence
|
• fillmask – Predict masked words in a sentence
|
||||||
• textgen – Generate text from a prompt
|
• textgen – Generate text from a prompt
|
||||||
• ner – Extract named entities from text
|
• ner – Extract named entities from text
|
||||||
|
• qa – Answer questions from a context
|
||||||
• moderation – Detect toxic or unsafe content
|
• moderation – Detect toxic or unsafe content
|
||||||
```
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
### 🌐 API Mode
|
### 🌐 API Mode
|
||||||
|
|
||||||
Launch the FastAPI server with:
|
Run FastAPI server:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m src.main --mode api
|
python -m src.main --mode api
|
||||||
# or with custom settings
|
# Custom config
|
||||||
python -m src.main --mode api --host 0.0.0.0 --port 8000 --reload
|
python -m src.main --mode api --host 0.0.0.0 --port 8000 --reload
|
||||||
```
|
```
|
||||||
|
|
||||||
The API will be available at:
|
API Docs:
|
||||||
|
|
||||||
- **Swagger Documentation**: http://localhost:8000/docs
|
- **Swagger** → http://localhost:8000/docs
|
||||||
- **ReDoc Documentation**: http://localhost:8000/redoc
|
- **ReDoc** → http://localhost:8000/redoc
|
||||||
- **OpenAPI Schema**: http://localhost:8000/openapi.json
|
- **OpenAPI** → http://localhost:8000/openapi.json
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## 📡 API Endpoints
|
## 📡 API Endpoints
|
||||||
|
|
||||||
The REST API provides all CLI functionality through HTTP endpoints:
|
|
||||||
|
|
||||||
### Core Endpoints
|
### Core Endpoints
|
||||||
|
|
||||||
| Method | Endpoint | Description |
|
| Method | Endpoint | Description |
|
||||||
| ------ | --------- | -------------------------------- |
|
| ------ | --------- | ------------------------- |
|
||||||
| `GET` | `/` | Health check and API information |
|
| `GET` | `/` | Health check and API info |
|
||||||
| `GET` | `/health` | Detailed health status |
|
| `GET` | `/health` | Detailed health status |
|
||||||
|
|
||||||
### Individual Processing
|
### Individual Processing
|
||||||
|
|
||||||
| Method | Endpoint | Description | Input |
|
| Method | Endpoint | Description |
|
||||||
| ------ | ------------- | ------------------------ | ------------------------------------------------------------------ |
|
| ------ | ------------- | ---------------------- |
|
||||||
| `POST` | `/sentiment` | Analyze text sentiment | `{"text": "string", "model": "optional"}` |
|
| `POST` | `/sentiment` | Analyze text sentiment |
|
||||||
| `POST` | `/fillmask` | Fill masked words | `{"text": "Hello [MASK]", "model": "optional"}` |
|
| `POST` | `/fillmask` | Predict masked words |
|
||||||
| `POST` | `/textgen` | Generate text | `{"text": "prompt", "model": "optional"}` |
|
| `POST` | `/textgen` | Generate text |
|
||||||
| `POST` | `/ner` | Named entity recognition | `{"text": "string", "model": "optional"}` |
|
| `POST` | `/ner` | Extract named entities |
|
||||||
| `POST` | `/qa` | Question answering | `{"question": "string", "context": "string", "model": "optional"}` |
|
| `POST` | `/qa` | Question answering |
|
||||||
| `POST` | `/moderation` | Content moderation | `{"text": "string", "model": "optional"}` |
|
| `POST` | `/moderation` | Content moderation |
|
||||||
|
|
||||||
### Batch Processing
|
### Batch Processing
|
||||||
|
|
||||||
| Method | Endpoint | Description | Input |
|
| Method | Endpoint | Description |
|
||||||
| ------ | ------------------- | ------------------------------------ | ---------------------------------------------------- |
|
| ------ | ------------------- | -------------------------- |
|
||||||
| `POST` | `/sentiment/batch` | Process multiple texts | `{"texts": ["text1", "text2"], "model": "optional"}` |
|
| `POST` | `/sentiment/batch` | Process multiple texts |
|
||||||
| `POST` | `/fillmask/batch` | Fill multiple masked texts | `{"texts": ["text1 [MASK]"], "model": "optional"}` |
|
| `POST` | `/fillmask/batch` | Fill multiple masked texts |
|
||||||
| `POST` | `/textgen/batch` | Generate from multiple prompts | `{"texts": ["prompt1"], "model": "optional"}` |
|
| `POST` | `/textgen/batch` | Generate from prompts |
|
||||||
| `POST` | `/ner/batch` | Extract entities from multiple texts | `{"texts": ["text1"], "model": "optional"}` |
|
| `POST` | `/ner/batch` | Extract entities in batch |
|
||||||
| `POST` | `/moderation/batch` | Moderate multiple texts | `{"texts": ["text1"], "model": "optional"}` |
|
| `POST` | `/qa/batch` | Answer questions in batch |
|
||||||
|
| `POST` | `/moderation/batch` | Moderate multiple texts |
|
||||||
### Example API Usage
|
|
||||||
|
|
||||||
#### 🔹 Sentiment Analysis
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X POST "http://localhost:8000/sentiment" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"text": "I absolutely love this project!"}'
|
|
||||||
```
|
|
||||||
|
|
||||||
Response:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"success": true,
|
|
||||||
"label": "POSITIVE",
|
|
||||||
"score": 0.998,
|
|
||||||
"model_used": "distilbert-base-uncased-finetuned-sst-2-english"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 🔹 Named Entity Recognition
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X POST "http://localhost:8000/ner" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"text": "Elon Musk founded SpaceX in California."}'
|
|
||||||
```
|
|
||||||
|
|
||||||
Response:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"success": true,
|
|
||||||
"entities": [
|
|
||||||
{ "word": "Elon Musk", "label": "PERSON", "score": 0.999 },
|
|
||||||
{ "word": "SpaceX", "label": "ORG", "score": 0.998 },
|
|
||||||
{ "word": "California", "label": "LOC", "score": 0.995 }
|
|
||||||
],
|
|
||||||
"model_used": "dslim/bert-base-NER"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 🔹 Batch Processing
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X POST "http://localhost:8000/sentiment/batch" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"texts": ["Great product!", "Terrible experience", "It was okay"]}'
|
|
||||||
```
|
|
||||||
|
|
||||||
Response:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"success": true,
|
|
||||||
"results": [
|
|
||||||
{ "label": "POSITIVE", "score": 0.998 },
|
|
||||||
{ "label": "NEGATIVE", "score": 0.995 },
|
|
||||||
{ "label": "NEUTRAL", "score": 0.876 }
|
|
||||||
],
|
|
||||||
"model_used": "distilbert-base-uncased-finetuned-sst-2-english"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🖥️ CLI Examples
|
## 🖥️ CLI Examples
|
||||||
|
|
||||||
#### 🔹 Sentiment Analysis
|
### 🔹 Sentiment Analysis
|
||||||
|
|
||||||
```text
|
```text
|
||||||
💬 Enter text: I absolutely love this project!
|
💬 Enter text: I absolutely love this project!
|
||||||
→ Sentiment: POSITIVE (score: 0.998)
|
→ Sentiment: POSITIVE (score: 0.998)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 🔹 Fill‑Mask
|
### 🔹 Fill-Mask
|
||||||
|
|
||||||
```text
|
```text
|
||||||
💬 Enter text: The capital of France is [MASK].
|
💬 Enter text: The capital of France is [MASK].
|
||||||
→ Predictions:
|
→ Predictions:
|
||||||
1) Paris score: 0.87
|
1) Paris score: 0.87
|
||||||
2) Lyon score: 0.04
|
2) Lyon score: 0.04
|
||||||
3) London score: 0.02
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 🔹 Text Generation
|
### 🔹 Text Generation
|
||||||
|
|
||||||
```text
|
```text
|
||||||
💬 Prompt: Once upon a time
|
💬 Prompt: Once upon a time
|
||||||
→ Output: Once upon a time there was a young AI learning to code...
|
→ Output: Once upon a time there was a young AI learning to code...
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 🔹 NER (Named Entity Recognition)
|
### 🔹 NER
|
||||||
|
|
||||||
```text
|
```text
|
||||||
💬 Enter text: Elon Musk founded SpaceX in California.
|
💬 Enter text: Elon Musk founded SpaceX in California.
|
||||||
|
|
@ -281,7 +240,15 @@ Response:
|
||||||
- California (LOC)
|
- California (LOC)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 🔹 Moderation
|
### 🔹 QA (Question Answering)
|
||||||
|
|
||||||
|
```text
|
||||||
|
💬 Enter question: What is the capital of France?
|
||||||
|
💬 Enter context: France is a country in Europe. Its capital is Paris.
|
||||||
|
→ Answer: The capital of France is Paris.
|
||||||
|
```
|
||||||
|
|
||||||
|
### 🔹 Moderation
|
||||||
|
|
||||||
```text
|
```text
|
||||||
💬 Enter text: I hate everything!
|
💬 Enter text: I hate everything!
|
||||||
|
|
@ -292,90 +259,34 @@ Response:
|
||||||
|
|
||||||
## 🧠 Architecture Overview
|
## 🧠 Architecture Overview
|
||||||
|
|
||||||
The application supports dual-mode architecture: **CLI** (interactive) and **API** (REST server), both sharing the same pipeline layer:
|
Both CLI and API share the **same pipeline layer**, ensuring code reusability and consistency.
|
||||||
|
|
||||||
### CLI Architecture
|
### CLI Architecture
|
||||||
|
|
||||||
```text
|
```text
|
||||||
┌──────────────────────┐
|
InteractiveCLI → Command Layer → Pipeline Layer → Display Layer
|
||||||
│ InteractiveCLI │
|
|
||||||
│ (src/cli/base.py) │
|
|
||||||
└──────────┬───────────┘
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌─────────────────┐
|
|
||||||
│ Command Layer │ ← e.g. sentiment.py
|
|
||||||
│ (user commands) │
|
|
||||||
└───────┬─────────┘
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌─────────────────┐
|
|
||||||
│ Pipeline Layer │ ← e.g. pipelines/sentiment.py
|
|
||||||
│ (ML logic) │
|
|
||||||
└───────┬─────────┘
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌─────────────────┐
|
|
||||||
│ Display Layer │ ← cli/display.py
|
|
||||||
│ (format output) │
|
|
||||||
└─────────────────┘
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### API Architecture
|
### API Architecture
|
||||||
|
|
||||||
```text
|
```text
|
||||||
┌──────────────────────┐
|
FastAPI App → Pydantic Models → Pipeline Layer → JSON Response
|
||||||
│ FastAPI App │
|
|
||||||
│ (src/api/app.py) │
|
|
||||||
└──────────┬───────────┘
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌─────────────────┐
|
|
||||||
│ Pydantic Models │ ← api/models.py
|
|
||||||
│ (validation) │
|
|
||||||
└───────┬─────────┘
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌─────────────────┐
|
|
||||||
│ Pipeline Layer │ ← e.g. pipelines/sentiment.py
|
|
||||||
│ (ML logic) │ (shared with CLI)
|
|
||||||
└───────┬─────────┘
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌─────────────────┐
|
|
||||||
│ JSON Response │ ← automatic serialization
|
|
||||||
│ (HTTP output) │
|
|
||||||
└─────────────────┘
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Key Concepts
|
| Layer | Description |
|
||||||
|
| ------------ | ---------------------------------------------- |
|
||||||
| Layer | Description |
|
| **CLI** | Manages user input/output and navigation. |
|
||||||
| ------------ | -------------------------------------------------------------------------- |
|
| **API** | Exposes endpoints with automatic OpenAPI docs. |
|
||||||
| **CLI** | Manages user input/output, help menus, and navigation between commands. |
|
| **Command** | Encapsulates user-facing operations. |
|
||||||
| **API** | FastAPI application serving HTTP endpoints with automatic documentation. |
|
| **Pipeline** | Wraps Hugging Face’s pipelines. |
|
||||||
| **Command** | Encapsulates a single user-facing operation (e.g., run sentiment). |
|
| **Models** | Validates requests/responses. |
|
||||||
| **Pipeline** | Wraps Hugging Face's `transformers.pipeline()` to perform inference. |
|
| **Display** | Formats console output. |
|
||||||
| **Models** | Pydantic schemas for request/response validation and serialization. |
|
|
||||||
| **Display** | Handles clean console rendering (colored output, tables, JSON formatting). |
|
|
||||||
|
|
||||||
### Key Concepts
|
|
||||||
|
|
||||||
| Layer | Description |
|
|
||||||
| ------------ | -------------------------------------------------------------------------- |
|
|
||||||
| **CLI** | Manages user input/output, help menus, and navigation between commands. |
|
|
||||||
| **Command** | Encapsulates a single user-facing operation (e.g., run sentiment). |
|
|
||||||
| **Pipeline** | Wraps Hugging Face’s `transformers.pipeline()` to perform inference. |
|
|
||||||
| **Display** | Handles clean console rendering (colored output, tables, JSON formatting). |
|
|
||||||
| **Config** | Centralizes model names, limits, and global constants. |
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## ⚙️ Configuration
|
## ⚙️ Configuration
|
||||||
|
|
||||||
All configuration is centralized in `src/config/settings.py`.
|
All configuration is centralized in `src/config/settings.py`:
|
||||||
|
|
||||||
Example:
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
class Config:
|
class Config:
|
||||||
|
|
@ -384,140 +295,75 @@ class Config:
|
||||||
"fillmask": "bert-base-uncased",
|
"fillmask": "bert-base-uncased",
|
||||||
"textgen": "gpt2",
|
"textgen": "gpt2",
|
||||||
"ner": "dslim/bert-base-NER",
|
"ner": "dslim/bert-base-NER",
|
||||||
"moderation":"unitary/toxic-bert"
|
"qa": "distilbert-base-cased-distilled-squad",
|
||||||
|
"moderation":"unitary/toxic-bert",
|
||||||
}
|
}
|
||||||
MAX_LENGTH = 512
|
MAX_LENGTH = 512
|
||||||
BATCH_SIZE = 8
|
BATCH_SIZE = 8
|
||||||
```
|
```
|
||||||
|
|
||||||
You can easily modify model names to experiment with different checkpoints.
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🧩 Extending the Playground
|
## 🧩 Extending the Playground
|
||||||
|
|
||||||
To create a new experiment (e.g., keyword extraction):
|
To add a new NLP experiment (e.g., keyword extraction):
|
||||||
|
|
||||||
### For CLI Support
|
1. Duplicate `src/pipelines/template.py` → `src/pipelines/keywords.py`
|
||||||
|
2. Create a command: `src/commands/keywords.py`
|
||||||
|
3. Register it in `src/main.py`
|
||||||
|
4. Add Pydantic models and API endpoint
|
||||||
|
5. Update `Config.DEFAULT_MODELS`
|
||||||
|
|
||||||
1. **Duplicate** `src/pipelines/template.py` → `src/pipelines/keywords.py`
|
Both CLI and API will automatically share this logic.
|
||||||
Implement the `run()` or `analyze()` logic using a new Hugging Face pipeline.
|
|
||||||
|
|
||||||
2. **Create a Command** in `src/commands/keywords.py` to interact with users.
|
|
||||||
|
|
||||||
3. **Register the command** inside `src/main.py`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from src.commands.keywords import KeywordsCommand
|
|
||||||
cli.register_command(KeywordsCommand())
|
|
||||||
```
|
|
||||||
|
|
||||||
### For API Support
|
|
||||||
|
|
||||||
4. **Add Pydantic models** in `src/api/models.py`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
class KeywordsRequest(BaseModel):
|
|
||||||
text: str
|
|
||||||
model: Optional[str] = None
|
|
||||||
|
|
||||||
class KeywordsResponse(BaseModel):
|
|
||||||
success: bool
|
|
||||||
keywords: List[str]
|
|
||||||
model_used: str
|
|
||||||
```
|
|
||||||
|
|
||||||
5. **Add endpoint** in `src/api/app.py`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
@app.post("/keywords", response_model=KeywordsResponse)
|
|
||||||
async def extract_keywords(request: KeywordsRequest):
|
|
||||||
# Implementation using KeywordsAnalyzer pipeline
|
|
||||||
pass
|
|
||||||
```
|
|
||||||
|
|
||||||
6. **Update configuration** in `Config.DEFAULT_MODELS`.
|
|
||||||
|
|
||||||
Both CLI and API will automatically share the same pipeline implementation!
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 🧪 Testing
|
|
||||||
|
|
||||||
You can use `pytest` for lightweight validation:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install pytest
|
|
||||||
pytest -q
|
|
||||||
```
|
|
||||||
|
|
||||||
Recommended structure:
|
|
||||||
|
|
||||||
```
|
|
||||||
tests/
|
|
||||||
├── test_sentiment.py
|
|
||||||
├── test_textgen.py
|
|
||||||
└── ...
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🧰 Troubleshooting
|
## 🧰 Troubleshooting
|
||||||
|
|
||||||
### General Issues
|
| Issue | Solution |
|
||||||
|
| ------------------------ | ----------------------- |
|
||||||
|
| `transformers` not found | Activate your venv. |
|
||||||
|
| Torch install fails | Use CPU-only wheel. |
|
||||||
|
| Models download slowly | Cached after first use. |
|
||||||
|
| Encoding issues | Ensure UTF-8 terminal. |
|
||||||
|
|
||||||
| Issue | Cause / Solution |
|
### API Issues
|
||||||
| ---------------------------- | -------------------------------------------- |
|
|
||||||
| **`transformers` not found** | Check virtual environment activation. |
|
|
||||||
| **Torch fails to install** | Install CPU-only version from PyTorch index. |
|
|
||||||
| **Models download slowly** | Hugging Face caches them after first run. |
|
|
||||||
| **Unicode / accents broken** | Ensure terminal encoding is UTF‑8. |
|
|
||||||
|
|
||||||
### API-Specific Issues
|
| Issue | Solution |
|
||||||
|
| -------------------- | --------------------------------------- |
|
||||||
| Issue | Cause / Solution |
|
| `FastAPI` missing | `pip install fastapi uvicorn[standard]` |
|
||||||
| ----------------------------- | ----------------------------------------------------- |
|
| Port in use | Change with `--port 8001` |
|
||||||
| **`FastAPI` not found** | Install with `pip install fastapi uvicorn[standard]`. |
|
| CORS error | Edit `allow_origins` in `api/config.py` |
|
||||||
| **Port already in use** | Use `--port 8001` or kill process on port 8000. |
|
| Validation error 422 | Check request body |
|
||||||
| **CORS errors in browser** | Check `allow_origins` in `src/api/config.py`. |
|
| 500 error | Verify model loading |
|
||||||
| **422 Validation Error** | Check request body matches Pydantic models. |
|
|
||||||
| **500 Internal Server Error** | Check model loading and pipeline initialization. |
|
|
||||||
|
|
||||||
### Quick API Health Check
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Test if API is running
|
|
||||||
curl http://localhost:8000/health
|
|
||||||
|
|
||||||
# Test basic endpoint
|
|
||||||
curl -X POST "http://localhost:8000/sentiment" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"text": "test"}'
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🧭 Development Guidelines
|
## 🧭 Development Guidelines
|
||||||
|
|
||||||
- Keep **Command** classes lightweight — no ML logic inside them.
|
- Keep command classes lightweight (no ML inside)
|
||||||
- Reuse the **Pipeline Template** for new experiments.
|
- Use the pipeline template for new tasks
|
||||||
- Format outputs consistently via the `DisplayFormatter`.
|
- Format all outputs via `DisplayFormatter`
|
||||||
- Document all new models or commands in `README.md` and `settings.py`.
|
- Document new commands and models
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🧱 Roadmap
|
## 🧱 Roadmap
|
||||||
|
|
||||||
- [ ] Add non-interactive CLI flags (`--text`, `--task`)
|
- [ ] Non-interactive CLI flags (`--text`, `--task`)
|
||||||
- [ ] Add multilingual model options
|
- [ ] Multilingual models
|
||||||
- [ ] Add automatic test coverage
|
- [ ] Test coverage
|
||||||
- [ ] Add logging and profiling utilities
|
- [ ] Logging & profiling
|
||||||
- [ ] Add export to JSON/CSV results
|
- [ ] Export to JSON/CSV
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 📜 License
|
## 📜 License
|
||||||
|
|
||||||
This project is licensed under the [MIT License](./LICENSE) — feel free to use it, modify it, and share it!
|
Licensed under the [MIT License](./LICENSE).
|
||||||
|
You are free to use, modify, and distribute this project.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
✨ **End of Documentation**
|
||||||
|
_The AI Lab – Transformers CLI Playground: built for learning, experimenting, and sharing NLP excellence._
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ import logging
|
||||||
from .models import (
|
from .models import (
|
||||||
TextRequest, TextListRequest, QARequest, FillMaskRequest,
|
TextRequest, TextListRequest, QARequest, FillMaskRequest,
|
||||||
SentimentResponse, NERResponse, QAResponse, FillMaskResponse,
|
SentimentResponse, NERResponse, QAResponse, FillMaskResponse,
|
||||||
ModerationResponse, TextGenResponse, BatchResponse, ErrorResponse
|
ModerationResponse, TextGenResponse, BatchResponse
|
||||||
)
|
)
|
||||||
|
|
||||||
# Global pipeline instances
|
# Global pipeline instances
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
API configuration settings
|
API configuration settings
|
||||||
"""
|
"""
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
from src.config.settings import Config
|
||||||
|
|
||||||
|
|
||||||
class APIConfig:
|
class APIConfig:
|
||||||
|
|
@ -25,21 +26,6 @@ class APIConfig:
|
||||||
MAX_TEXT_LENGTH = 10000
|
MAX_TEXT_LENGTH = 10000
|
||||||
MAX_BATCH_SIZE = 100
|
MAX_BATCH_SIZE = 100
|
||||||
|
|
||||||
# Model defaults
|
|
||||||
DEFAULT_MODELS: Dict[str, str] = {
|
|
||||||
"sentiment": "cardiffnlp/twitter-roberta-base-sentiment-latest",
|
|
||||||
"ner": "dbmdz/bert-large-cased-finetuned-conll03-english",
|
|
||||||
"qa": "distilbert-base-cased-distilled-squad",
|
|
||||||
"fillmask": "bert-base-uncased",
|
|
||||||
"moderation": "martin-ha/toxic-comment-model",
|
|
||||||
"textgen": "gpt2"
|
|
||||||
}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_default_model(cls, task: str) -> str:
|
|
||||||
"""Get default model for a task"""
|
|
||||||
return cls.DEFAULT_MODELS.get(task, "")
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_all_settings(cls) -> Dict[str, Any]:
|
def get_all_settings(cls) -> Dict[str, Any]:
|
||||||
"""Get all configuration settings"""
|
"""Get all configuration settings"""
|
||||||
|
|
@ -57,5 +43,4 @@ class APIConfig:
|
||||||
"max_text_length": cls.MAX_TEXT_LENGTH,
|
"max_text_length": cls.MAX_TEXT_LENGTH,
|
||||||
"max_batch_size": cls.MAX_BATCH_SIZE
|
"max_batch_size": cls.MAX_BATCH_SIZE
|
||||||
},
|
},
|
||||||
"default_models": cls.DEFAULT_MODELS
|
|
||||||
}
|
}
|
||||||
|
|
@ -15,11 +15,11 @@ class Config:
|
||||||
|
|
||||||
# Default models
|
# Default models
|
||||||
DEFAULT_MODELS = {
|
DEFAULT_MODELS = {
|
||||||
"sentiment": "cardiffnlp/twitter-roberta-base-sentiment-latest",
|
"sentiment": "distilbert-base-uncased-finetuned-sst-2-english",
|
||||||
"fillmask": "distilbert-base-uncased",
|
"fillmask": "bert-base-uncased",
|
||||||
"textgen": "gpt2",
|
"textgen": "gpt2",
|
||||||
"moderation": "unitary/toxic-bert",
|
"ner": "dslim/bert-base-NER",
|
||||||
"ner": "dbmdz/bert-large-cased-finetuned-conll03-english",
|
"moderation":"unitary/toxic-bert",
|
||||||
"qa": "distilbert-base-cased-distilled-squad",
|
"qa": "distilbert-base-cased-distilled-squad",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue