develop #2

Merged
Cyril merged 6 commits from develop into main 2025-10-12 20:11:20 +00:00
8 changed files with 585 additions and 12 deletions
Showing only changes of commit 8115bd1eb7 - Show all commits

View File

@ -1,14 +1,14 @@
# 🧠 AI Lab Transformers CLI Playground
> A **pedagogical and technical project** designed for AI practitioners and students to experiment with Hugging Face Transformers through an **interactive CommandLine Interface (CLI)**.
> This playground provides readytouse NLP pipelines (Sentiment Analysis, Named Entity Recognition, Text Generation, FillMask, Moderation, etc.) in a modular, extensible, and educational codebase.
> This playground provides readytouse NLP pipelines (Sentiment Analysis, Named Entity Recognition, Text Generation, FillMask, Question Answering, Moderation, etc.) in a modular, extensible, and educational codebase.
---
## 📚 Overview
The **AI Lab Transformers CLI Playground** allows you to explore multiple natural language processing tasks directly from the terminal.
Each task (e.g., sentiment, NER, text generation) is implemented as a **Command Module**, which interacts with a **Pipeline Module** built on top of the `transformers` library.
Each task (e.g., sentiment, NER, text generation, question answering) is implemented as a **Command Module**, which interacts with a **Pipeline Module** built on top of the `transformers` library.
The lab is intentionally structured to demonstrate **clean software design for ML codebases** — with strict separation between configuration, pipelines, CLI logic, and display formatting.
@ -32,7 +32,8 @@ src/
│ ├── fillmask.py # Masked token prediction command
│ ├── textgen.py # Text generation command
│ ├── ner.py # Named Entity Recognition command
│ └── moderation.py # Toxicity / content moderation command
│ ├── moderation.py # Toxicity / content moderation command
│ └── qa.py # Question Answering command
├── pipelines/ # Machine learning logic (Hugging Face Transformers)
│ ├── __init__.py
@ -41,7 +42,8 @@ src/
│ ├── fillmask.py
│ ├── textgen.py
│ ├── ner.py
│ └── moderation.py
│ ├── moderation.py
│ └── qa.py # Question Answering pipeline
└── config/
├── __init__.py
@ -104,7 +106,7 @@ python -m src.main
poetry run python src/main.py
```
Youll see an interactive menu listing the available commands:
You'll see an interactive menu listing the available commands:
```
Welcome to AI Lab - Transformers CLI Playground
@ -114,6 +116,7 @@ Available commands:
• textgen Generate text from a prompt
• ner Extract named entities from text
• moderation Detect toxic or unsafe content
• qa Question Answering on given text context
```
### Example Sessions
@ -152,6 +155,14 @@ Available commands:
- California (LOC)
```
#### 🔹 Question Answering
```text
💬 Context: Albert Einstein was born in 1879 in Germany. He developed the theory of relativity.
❓ Question: When was Einstein born?
→ Answer: 1879 (confidence: 0.95)
```
#### 🔹 Moderation
```text
@ -173,13 +184,13 @@ The internal structure follows a clean **Command ↔ Pipeline ↔ Display** patt
┌─────────────────┐
│ Command Layer │ ← e.g. sentiment.py
│ Command Layer │ ← e.g. sentiment.py, qa.py
│ (user commands) │
└───────┬─────────┘
┌─────────────────┐
│ Pipeline Layer │ ← e.g. pipelines/sentiment.py
│ Pipeline Layer │ ← e.g. pipelines/sentiment.py, pipelines/qa.py
│ (ML logic) │
└───────┬─────────┘
@ -195,8 +206,8 @@ The internal structure follows a clean **Command ↔ Pipeline ↔ Display** patt
| Layer | Description |
| ------------ | -------------------------------------------------------------------------- |
| **CLI** | Manages user input/output, help menus, and navigation between commands. |
| **Command** | Encapsulates a single user-facing operation (e.g., run sentiment). |
| **Pipeline** | Wraps Hugging Faces `transformers.pipeline()` to perform inference. |
| **Command** | Encapsulates a single user-facing operation (e.g., run sentiment, QA). |
| **Pipeline** | Wraps Hugging Face's `transformers.pipeline()` to perform inference. |
| **Display** | Handles clean console rendering (colored output, tables, JSON formatting). |
| **Config** | Centralizes model names, limits, and global constants. |
@ -215,7 +226,8 @@ class Config:
"fillmask": "bert-base-uncased",
"textgen": "gpt2",
"ner": "dslim/bert-base-NER",
"moderation":"unitary/toxic-bert"
"moderation":"unitary/toxic-bert",
"qa": "distilbert-base-cased-distilled-squad"
}
MAX_LENGTH = 512
BATCH_SIZE = 8
@ -260,6 +272,7 @@ Recommended structure:
tests/
├── test_sentiment.py
├── test_textgen.py
├── test_qa.py
└── ...
```

View File

@ -190,3 +190,78 @@ class DisplayFormatter:
output.append(f"{entity} ({count}x)")
return "\n".join(output)
@staticmethod
def format_qa_result(result: Dict[str, Any]) -> str:
"""Format Question Answering result for display"""
if "error" in result:
return f"{result['error']}"
output = []
output.append(f"❓ Question: {result['question']}")
# Confidence indicator
confidence = result['confidence']
confidence_emoji = "" if result['is_confident'] else "⚠️"
confidence_bar = "" * int(confidence * 10)
output.append(f"{confidence_emoji} Answer: {result['answer']}")
output.append(f"📊 Confidence: {result['confidence_level']} ({confidence:.1%}) {confidence_bar}")
if not result['is_confident']:
output.append("⚠️ Low confidence - answer might not be reliable")
output.append(f"\n📍 Position: characters {result['start_position']}-{result['end_position']}")
output.append(f"📄 Context with answer highlighted:")
output.append(f" {result['highlighted_context']}")
return "\n".join(output)
@staticmethod
def format_qa_context_analysis(analysis: Dict[str, Any]) -> str:
"""Format QA context analysis for display"""
if "error" in analysis:
return f"{analysis['error']}"
output = []
output.append("✅ Context set successfully!")
output.append(f"📊 Context Statistics:")
stats = analysis['context_stats']
output.append(f" • Words: {stats['word_count']}")
output.append(f" • Sentences: ~{stats['sentence_count']}")
output.append(f" • Characters: {stats['character_count']}")
if analysis['suggested_questions']:
output.append(f"\n💡 Suggested question types:")
for suggestion in analysis['suggested_questions']:
output.append(f"{suggestion}")
if analysis['tips']:
output.append(f"\n📝 Tips for good questions:")
for tip in analysis['tips']:
output.append(f"{tip}")
return "\n".join(output)
@staticmethod
def format_qa_multiple_result(result: Dict[str, Any]) -> str:
"""Format multiple QA results for display"""
if "error" in result:
return f"{result['error']}"
output = []
output.append(f"📊 Multiple Questions Analysis")
output.append("=" * 50)
output.append(f"Total Questions: {result['total_questions']}")
output.append(f"Successfully Processed: {result['processed_questions']}")
output.append(f"Confident Answers: {result['confident_answers']}")
output.append(f"Average Confidence: {result['average_confidence']:.1%}")
output.append(f"\n📋 Results:")
for qa_result in result['results']:
confidence_emoji = "" if qa_result['is_confident'] else "⚠️"
output.append(f"\n{qa_result['question_number']}. {qa_result['question']}")
output.append(f" {confidence_emoji} {qa_result['answer']} ({qa_result['confidence']:.1%})")
return "\n".join(output)

View File

@ -6,5 +6,6 @@ from .fillmask import FillMaskCommand
from .textgen import TextGenCommand
from .moderation import ModerationCommand
from .ner import NERCommand
from .qa import QACommand
__all__ = ['SentimentCommand', 'FillMaskCommand', 'TextGenCommand', 'ModerationCommand', 'NERCommand']
__all__ = ['SentimentCommand', 'FillMaskCommand', 'TextGenCommand', 'ModerationCommand', 'NERCommand', 'QACommand']

214
src/commands/qa.py Normal file
View File

@ -0,0 +1,214 @@
from src.cli.base import CLICommand
from src.cli.display import DisplayFormatter
from src.pipelines.qa import QuestionAnsweringSystem
class QACommand(CLICommand):
"""Interactive Question Answering command"""
def __init__(self):
self.qa_system = None
self.current_context = None
self.session_questions = []
@property
def name(self) -> str:
return "qa"
@property
def description(self) -> str:
return "Question Answering - Ask questions about a given text"
def _initialize_qa_system(self):
"""Lazy initialization of the QA system"""
if self.qa_system is None:
print("🔄 Loading Question Answering model...")
self.qa_system = QuestionAnsweringSystem()
DisplayFormatter.show_success("QA model loaded!")
def _show_instructions(self):
"""Show usage instructions and examples"""
print("\n❓ Question Answering System")
print("Ask questions about a text context and get precise answers.")
print("\n📝 How it works:")
print(" 1. First, provide a context (text containing information)")
print(" 2. Then ask questions about that context")
print(" 3. The system extracts answers directly from the text")
print("\n💡 Example context:")
print(" 'Albert Einstein was born in 1879 in Germany. He developed the theory of relativity.'")
print("💡 Example questions:")
print(" - When was Einstein born?")
print(" - Where was Einstein born?")
print(" - What theory did Einstein develop?")
print("\n🎛️ Commands:")
print(" 'back' - Return to main menu")
print(" 'help' - Show these instructions")
print(" 'context' - Set new context")
print(" 'multi' - Ask multiple questions at once")
print(" 'session' - Review session history")
print(" 'settings' - Adjust confidence threshold")
print("-" * 70)
def _set_context(self):
"""Allow user to set or change the context"""
print("\n📄 Set Context")
print("Enter the text that will serve as context for your questions.")
print("You can enter multiple lines. Type 'done' when finished.")
print("-" * 50)
lines = []
while True:
line = input("📝 ").strip()
if line.lower() == 'done':
break
if line:
lines.append(line)
if not lines:
DisplayFormatter.show_warning("No context provided")
return False
self.current_context = " ".join(lines)
# Analyze context
analysis = self.qa_system.interactive_qa(self.current_context)
if "error" in analysis:
DisplayFormatter.show_error(analysis["error"])
return False
formatted_analysis = DisplayFormatter.format_qa_context_analysis(analysis)
print(formatted_analysis)
return True
def _ask_single_question(self):
"""Ask a single question about the current context"""
if not self.current_context:
DisplayFormatter.show_warning("Please set a context first using 'context' command")
return
question = input("\n❓ Your question: ").strip()
if not question:
DisplayFormatter.show_warning("Please enter a question")
return
DisplayFormatter.show_loading("Finding answer...")
result = self.qa_system.answer(question, self.current_context)
if "error" not in result:
self.session_questions.append(result)
formatted_result = DisplayFormatter.format_qa_result(result)
print(formatted_result)
def _multi_question_mode(self):
"""Allow asking multiple questions at once"""
if not self.current_context:
DisplayFormatter.show_warning("Please set a context first using 'context' command")
return
print("\n❓ Multiple Questions Mode")
print("Enter your questions one by one. Type 'done' when finished.")
print("-" * 50)
questions = []
while True:
question = input(f"Question #{len(questions)+1}: ").strip()
if question.lower() == 'done':
break
if question:
questions.append(question)
if not questions:
DisplayFormatter.show_warning("No questions provided")
return
DisplayFormatter.show_loading(f"Processing {len(questions)} questions...")
result = self.qa_system.answer_multiple(questions, self.current_context)
if "error" not in result:
self.session_questions.extend(result["results"])
formatted_result = DisplayFormatter.format_qa_multiple_result(result)
print(formatted_result)
def _show_session_history(self):
"""Show the history of questions asked in this session"""
if not self.session_questions:
DisplayFormatter.show_warning("No questions asked in this session yet")
return
print(f"\n📚 Session History ({len(self.session_questions)} questions)")
print("=" * 60)
for i, qa in enumerate(self.session_questions, 1):
confidence_emoji = "" if qa["is_confident"] else "⚠️"
print(f"\n{i}. {qa['question']}")
print(f" {confidence_emoji} {qa['answer']} (confidence: {qa['confidence']:.1%})")
def _adjust_settings(self):
"""Allow user to adjust QA settings"""
current_threshold = self.qa_system.confidence_threshold
print(f"\n⚙️ Current Settings:")
print(f"Confidence threshold: {current_threshold:.2f}")
print("\nLower threshold = more answers accepted (less strict)")
print("Higher threshold = fewer answers accepted (more strict)")
try:
new_threshold = input(f"Enter new threshold (0.0-1.0, current: {current_threshold}): ").strip()
if new_threshold:
threshold = float(new_threshold)
self.qa_system.set_confidence_threshold(threshold)
DisplayFormatter.show_success(f"Threshold set to {threshold:.2f}")
except ValueError:
DisplayFormatter.show_error("Invalid threshold value")
def run(self):
"""Run interactive Question Answering"""
self._initialize_qa_system()
self._show_instructions()
while True:
if self.current_context:
context_preview = (self.current_context[:50] + "...") if len(self.current_context) > 50 else self.current_context
prompt = f"\n💬 [{context_preview}] Ask a question: "
else:
prompt = "\n💬 Enter command or set context first: "
user_input = input(prompt).strip()
if user_input.lower() == 'back':
break
elif user_input.lower() == 'help':
self._show_instructions()
continue
elif user_input.lower() == 'context':
self._set_context()
continue
elif user_input.lower() == 'multi':
self._multi_question_mode()
continue
elif user_input.lower() == 'session':
self._show_session_history()
continue
elif user_input.lower() == 'settings':
self._adjust_settings()
continue
if not user_input:
DisplayFormatter.show_warning("Please enter a question or command")
continue
# If we have a context and user input is not a command, treat it as a question
if self.current_context:
DisplayFormatter.show_loading("Finding answer...")
result = self.qa_system.answer(user_input, self.current_context)
if "error" not in result:
self.session_questions.append(result)
formatted_result = DisplayFormatter.format_qa_result(result)
print(formatted_result)
else:
DisplayFormatter.show_warning("Please set a context first using 'context' command")

View File

@ -19,6 +19,7 @@ class Config:
"textgen": "gpt2",
"moderation": "unitary/toxic-bert",
"ner": "dbmdz/bert-large-cased-finetuned-conll03-english",
"qa": "distilbert-base-cased-distilled-squad",
}
# Interface

View File

@ -13,6 +13,7 @@ from src.commands import (
FillMaskCommand,
ModerationCommand,
NERCommand,
QACommand,
SentimentCommand,
TextGenCommand,
)
@ -31,6 +32,7 @@ def main():
TextGenCommand,
ModerationCommand,
NERCommand,
QACommand,
]
for command in commands_to_register:
cli.register_command(command())

View File

@ -6,6 +6,7 @@ from .fillmask import FillMaskAnalyzer
from .textgen import TextGenerator
from .moderation import ContentModerator
from .ner import NamedEntityRecognizer
from .qa import QuestionAnsweringSystem
from .template import TemplatePipeline
__all__ = ['SentimentAnalyzer', 'FillMaskAnalyzer', 'TextGenerator', 'ContentModerator', 'NamedEntityRecognizer', 'TemplatePipeline']
__all__ = ['SentimentAnalyzer', 'FillMaskAnalyzer', 'TextGenerator', 'ContentModerator', 'NamedEntityRecognizer', 'QuestionAnsweringSystem', 'TemplatePipeline']

266
src/pipelines/qa.py Normal file
View File

@ -0,0 +1,266 @@
from transformers import pipeline
from typing import Dict, List, Optional, Tuple
from src.config import Config
import re
class QuestionAnsweringSystem:
"""Question Answering system using transformers"""
def __init__(self, model_name: Optional[str] = None):
"""
Initialize the question-answering pipeline
Args:
model_name: Name of the model to use (optional)
"""
self.model_name = model_name or Config.get_model("qa")
print(f"Loading Question Answering model: {self.model_name}")
self.pipeline = pipeline("question-answering", model=self.model_name)
print("QA model loaded successfully!")
# Default confidence threshold
self.confidence_threshold = 0.1
def answer(self, question: str, context: str, max_answer_len: int = 50) -> Dict:
"""
Answer a question based on the given context
Args:
question: Question to answer
context: Context text containing the answer
max_answer_len: Maximum length of the answer
Returns:
Dictionary with answer, score, and position information
"""
if not question.strip():
return {"error": "Empty question"}
if not context.strip():
return {"error": "Empty context"}
try:
result = self.pipeline(
question=question,
context=context,
max_answer_len=max_answer_len
)
confidence_level = self._get_confidence_level(result["score"])
highlighted_context = self._highlight_answer_in_context(
context, result["answer"], result["start"], result["end"]
)
return {
"question": question,
"context": context,
"answer": result["answer"],
"confidence": round(result["score"], 4),
"confidence_level": confidence_level,
"start_position": result["start"],
"end_position": result["end"],
"highlighted_context": highlighted_context,
"is_confident": result["score"] >= self.confidence_threshold
}
except Exception as e:
return {"error": f"QA processing error: {str(e)}"}
def _get_confidence_level(self, score: float) -> str:
"""
Convert numerical score to confidence level
Args:
score: Confidence score (0-1)
Returns:
Confidence level description
"""
if score >= 0.8:
return "Very High"
elif score >= 0.6:
return "High"
elif score >= 0.4:
return "Medium"
elif score >= 0.2:
return "Low"
else:
return "Very Low"
def _highlight_answer_in_context(self, context: str, answer: str, start: int, end: int) -> str:
"""
Highlight the answer within the context
Args:
context: Original context
answer: Extracted answer
start: Start position of answer
end: End position of answer
Returns:
Context with highlighted answer
"""
if start < 0 or end > len(context):
return context
before = context[:start]
highlighted_answer = f"**{answer}**"
after = context[end:]
return before + highlighted_answer + after
def answer_multiple(self, questions: List[str], context: str, max_answer_len: int = 50) -> Dict:
"""
Answer multiple questions for the same context
Args:
questions: List of questions to answer
context: Context text
max_answer_len: Maximum length of answers
Returns:
Dictionary with all answers and summary statistics
"""
if not questions:
return {"error": "No questions provided"}
if not context.strip():
return {"error": "Empty context"}
results = []
confident_answers = 0
total_confidence = 0
for i, question in enumerate(questions, 1):
result = self.answer(question, context, max_answer_len)
if "error" not in result:
results.append({
"question_number": i,
**result
})
if result["is_confident"]:
confident_answers += 1
total_confidence += result["confidence"]
if not results:
return {"error": "No valid questions processed"}
average_confidence = total_confidence / len(results) if results else 0
return {
"context": context,
"total_questions": len(questions),
"processed_questions": len(results),
"confident_answers": confident_answers,
"average_confidence": round(average_confidence, 4),
"confidence_threshold": self.confidence_threshold,
"results": results
}
def interactive_qa(self, context: str) -> Dict:
"""
Prepare context for interactive Q&A session
Args:
context: Context text for questions
Returns:
Context analysis and preparation info
"""
if not context.strip():
return {"error": "Empty context"}
# Basic context analysis
word_count = len(context.split())
sentence_count = len([s for s in context.split('.') if s.strip()])
char_count = len(context)
# Suggest question types based on content
suggested_questions = self._generate_question_suggestions(context)
return {
"context": context,
"context_stats": {
"word_count": word_count,
"sentence_count": sentence_count,
"character_count": char_count
},
"suggested_questions": suggested_questions,
"tips": [
"Ask specific questions about facts mentioned in the text",
"Use question words: Who, What, When, Where, Why, How",
"Keep questions clear and focused",
"The answer should be present in the provided context"
]
}
def _generate_question_suggestions(self, context: str) -> List[str]:
"""
Generate suggested questions based on context analysis
Args:
context: Context text
Returns:
List of suggested question templates
"""
suggestions = []
# Check for common patterns and suggest relevant questions
if re.search(r'\b\d{4}\b', context): # Years
suggestions.append("When did [event] happen?")
if re.search(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', context): # Names
suggestions.append("Who is [person name]?")
if re.search(r'\b(founded|created|established|built)\b', context, re.IGNORECASE):
suggestions.append("Who founded/created [organization]?")
if re.search(r'\b(located|situated|based)\b', context, re.IGNORECASE):
suggestions.append("Where is [place/organization] located?")
if re.search(r'\b(because|due to|reason)\b', context, re.IGNORECASE):
suggestions.append("Why did [event] happen?")
if re.search(r'\b(how|method|process)\b', context, re.IGNORECASE):
suggestions.append("How does [process] work?")
if not suggestions:
suggestions = [
"What is the main topic of this text?",
"Who are the key people mentioned?",
"What important events are described?"
]
return suggestions[:5] # Limit to 5 suggestions
def set_confidence_threshold(self, threshold: float):
"""
Set the confidence threshold for answers
Args:
threshold: Threshold between 0 and 1
"""
if 0 <= threshold <= 1:
self.confidence_threshold = threshold
else:
raise ValueError("Threshold must be between 0 and 1")
def answer_batch(self, qa_pairs: List[Tuple[str, str]], max_answer_len: int = 50) -> List[Dict]:
"""
Process multiple question-context pairs
Args:
qa_pairs: List of (question, context) tuples
max_answer_len: Maximum length of answers
Returns:
List of QA results
"""
return [
self.answer(question, context, max_answer_len)
for question, context in qa_pairs
]