Add Question Answering feature with display formatting and command integration
This commit is contained in:
parent
b990f80263
commit
8115bd1eb7
33
README.md
33
README.md
|
|
@ -1,14 +1,14 @@
|
|||
# 🧠 AI Lab – Transformers CLI Playground
|
||||
|
||||
> A **pedagogical and technical project** designed for AI practitioners and students to experiment with Hugging Face Transformers through an **interactive Command‑Line Interface (CLI)**.
|
||||
> This playground provides ready‑to‑use NLP pipelines (Sentiment Analysis, Named Entity Recognition, Text Generation, Fill‑Mask, Moderation, etc.) in a modular, extensible, and educational codebase.
|
||||
> This playground provides ready‑to‑use NLP pipelines (Sentiment Analysis, Named Entity Recognition, Text Generation, Fill‑Mask, Question Answering, Moderation, etc.) in a modular, extensible, and educational codebase.
|
||||
|
||||
---
|
||||
|
||||
## 📚 Overview
|
||||
|
||||
The **AI Lab – Transformers CLI Playground** allows you to explore multiple natural language processing tasks directly from the terminal.
|
||||
Each task (e.g., sentiment, NER, text generation) is implemented as a **Command Module**, which interacts with a **Pipeline Module** built on top of the `transformers` library.
|
||||
Each task (e.g., sentiment, NER, text generation, question answering) is implemented as a **Command Module**, which interacts with a **Pipeline Module** built on top of the `transformers` library.
|
||||
|
||||
The lab is intentionally structured to demonstrate **clean software design for ML codebases** — with strict separation between configuration, pipelines, CLI logic, and display formatting.
|
||||
|
||||
|
|
@ -32,7 +32,8 @@ src/
|
|||
│ ├── fillmask.py # Masked token prediction command
|
||||
│ ├── textgen.py # Text generation command
|
||||
│ ├── ner.py # Named Entity Recognition command
|
||||
│ └── moderation.py # Toxicity / content moderation command
|
||||
│ ├── moderation.py # Toxicity / content moderation command
|
||||
│ └── qa.py # Question Answering command
|
||||
│
|
||||
├── pipelines/ # Machine learning logic (Hugging Face Transformers)
|
||||
│ ├── __init__.py
|
||||
|
|
@ -41,7 +42,8 @@ src/
|
|||
│ ├── fillmask.py
|
||||
│ ├── textgen.py
|
||||
│ ├── ner.py
|
||||
│ └── moderation.py
|
||||
│ ├── moderation.py
|
||||
│ └── qa.py # Question Answering pipeline
|
||||
│
|
||||
└── config/
|
||||
├── __init__.py
|
||||
|
|
@ -104,7 +106,7 @@ python -m src.main
|
|||
poetry run python src/main.py
|
||||
```
|
||||
|
||||
You’ll see an interactive menu listing the available commands:
|
||||
You'll see an interactive menu listing the available commands:
|
||||
|
||||
```
|
||||
Welcome to AI Lab - Transformers CLI Playground
|
||||
|
|
@ -114,6 +116,7 @@ Available commands:
|
|||
• textgen – Generate text from a prompt
|
||||
• ner – Extract named entities from text
|
||||
• moderation – Detect toxic or unsafe content
|
||||
• qa – Question Answering on given text context
|
||||
```
|
||||
|
||||
### Example Sessions
|
||||
|
|
@ -152,6 +155,14 @@ Available commands:
|
|||
- California (LOC)
|
||||
```
|
||||
|
||||
#### 🔹 Question Answering
|
||||
|
||||
```text
|
||||
💬 Context: Albert Einstein was born in 1879 in Germany. He developed the theory of relativity.
|
||||
❓ Question: When was Einstein born?
|
||||
→ Answer: 1879 (confidence: 0.95)
|
||||
```
|
||||
|
||||
#### 🔹 Moderation
|
||||
|
||||
```text
|
||||
|
|
@ -173,13 +184,13 @@ The internal structure follows a clean **Command ↔ Pipeline ↔ Display** patt
|
|||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Command Layer │ ← e.g. sentiment.py
|
||||
│ Command Layer │ ← e.g. sentiment.py, qa.py
|
||||
│ (user commands) │
|
||||
└───────┬─────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Pipeline Layer │ ← e.g. pipelines/sentiment.py
|
||||
│ Pipeline Layer │ ← e.g. pipelines/sentiment.py, pipelines/qa.py
|
||||
│ (ML logic) │
|
||||
└───────┬─────────┘
|
||||
│
|
||||
|
|
@ -195,8 +206,8 @@ The internal structure follows a clean **Command ↔ Pipeline ↔ Display** patt
|
|||
| Layer | Description |
|
||||
| ------------ | -------------------------------------------------------------------------- |
|
||||
| **CLI** | Manages user input/output, help menus, and navigation between commands. |
|
||||
| **Command** | Encapsulates a single user-facing operation (e.g., run sentiment). |
|
||||
| **Pipeline** | Wraps Hugging Face’s `transformers.pipeline()` to perform inference. |
|
||||
| **Command** | Encapsulates a single user-facing operation (e.g., run sentiment, QA). |
|
||||
| **Pipeline** | Wraps Hugging Face's `transformers.pipeline()` to perform inference. |
|
||||
| **Display** | Handles clean console rendering (colored output, tables, JSON formatting). |
|
||||
| **Config** | Centralizes model names, limits, and global constants. |
|
||||
|
||||
|
|
@ -215,7 +226,8 @@ class Config:
|
|||
"fillmask": "bert-base-uncased",
|
||||
"textgen": "gpt2",
|
||||
"ner": "dslim/bert-base-NER",
|
||||
"moderation":"unitary/toxic-bert"
|
||||
"moderation":"unitary/toxic-bert",
|
||||
"qa": "distilbert-base-cased-distilled-squad"
|
||||
}
|
||||
MAX_LENGTH = 512
|
||||
BATCH_SIZE = 8
|
||||
|
|
@ -260,6 +272,7 @@ Recommended structure:
|
|||
tests/
|
||||
├── test_sentiment.py
|
||||
├── test_textgen.py
|
||||
├── test_qa.py
|
||||
└── ...
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -190,3 +190,78 @@ class DisplayFormatter:
|
|||
output.append(f" • {entity} ({count}x)")
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
@staticmethod
|
||||
def format_qa_result(result: Dict[str, Any]) -> str:
|
||||
"""Format Question Answering result for display"""
|
||||
if "error" in result:
|
||||
return f"❌ {result['error']}"
|
||||
|
||||
output = []
|
||||
output.append(f"❓ Question: {result['question']}")
|
||||
|
||||
# Confidence indicator
|
||||
confidence = result['confidence']
|
||||
confidence_emoji = "✅" if result['is_confident'] else "⚠️"
|
||||
confidence_bar = "█" * int(confidence * 10)
|
||||
|
||||
output.append(f"{confidence_emoji} Answer: {result['answer']}")
|
||||
output.append(f"📊 Confidence: {result['confidence_level']} ({confidence:.1%}) {confidence_bar}")
|
||||
|
||||
if not result['is_confident']:
|
||||
output.append("⚠️ Low confidence - answer might not be reliable")
|
||||
|
||||
output.append(f"\n📍 Position: characters {result['start_position']}-{result['end_position']}")
|
||||
output.append(f"📄 Context with answer highlighted:")
|
||||
output.append(f" {result['highlighted_context']}")
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
@staticmethod
|
||||
def format_qa_context_analysis(analysis: Dict[str, Any]) -> str:
|
||||
"""Format QA context analysis for display"""
|
||||
if "error" in analysis:
|
||||
return f"❌ {analysis['error']}"
|
||||
|
||||
output = []
|
||||
output.append("✅ Context set successfully!")
|
||||
output.append(f"📊 Context Statistics:")
|
||||
|
||||
stats = analysis['context_stats']
|
||||
output.append(f" • Words: {stats['word_count']}")
|
||||
output.append(f" • Sentences: ~{stats['sentence_count']}")
|
||||
output.append(f" • Characters: {stats['character_count']}")
|
||||
|
||||
if analysis['suggested_questions']:
|
||||
output.append(f"\n💡 Suggested question types:")
|
||||
for suggestion in analysis['suggested_questions']:
|
||||
output.append(f" • {suggestion}")
|
||||
|
||||
if analysis['tips']:
|
||||
output.append(f"\n📝 Tips for good questions:")
|
||||
for tip in analysis['tips']:
|
||||
output.append(f" • {tip}")
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
@staticmethod
|
||||
def format_qa_multiple_result(result: Dict[str, Any]) -> str:
|
||||
"""Format multiple QA results for display"""
|
||||
if "error" in result:
|
||||
return f"❌ {result['error']}"
|
||||
|
||||
output = []
|
||||
output.append(f"📊 Multiple Questions Analysis")
|
||||
output.append("=" * 50)
|
||||
output.append(f"Total Questions: {result['total_questions']}")
|
||||
output.append(f"Successfully Processed: {result['processed_questions']}")
|
||||
output.append(f"Confident Answers: {result['confident_answers']}")
|
||||
output.append(f"Average Confidence: {result['average_confidence']:.1%}")
|
||||
|
||||
output.append(f"\n📋 Results:")
|
||||
for qa_result in result['results']:
|
||||
confidence_emoji = "✅" if qa_result['is_confident'] else "⚠️"
|
||||
output.append(f"\n{qa_result['question_number']}. {qa_result['question']}")
|
||||
output.append(f" {confidence_emoji} {qa_result['answer']} ({qa_result['confidence']:.1%})")
|
||||
|
||||
return "\n".join(output)
|
||||
|
|
|
|||
|
|
@ -6,5 +6,6 @@ from .fillmask import FillMaskCommand
|
|||
from .textgen import TextGenCommand
|
||||
from .moderation import ModerationCommand
|
||||
from .ner import NERCommand
|
||||
from .qa import QACommand
|
||||
|
||||
__all__ = ['SentimentCommand', 'FillMaskCommand', 'TextGenCommand', 'ModerationCommand', 'NERCommand']
|
||||
__all__ = ['SentimentCommand', 'FillMaskCommand', 'TextGenCommand', 'ModerationCommand', 'NERCommand', 'QACommand']
|
||||
|
|
|
|||
|
|
@ -0,0 +1,214 @@
|
|||
from src.cli.base import CLICommand
|
||||
from src.cli.display import DisplayFormatter
|
||||
from src.pipelines.qa import QuestionAnsweringSystem
|
||||
|
||||
|
||||
class QACommand(CLICommand):
|
||||
"""Interactive Question Answering command"""
|
||||
|
||||
def __init__(self):
|
||||
self.qa_system = None
|
||||
self.current_context = None
|
||||
self.session_questions = []
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "qa"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return "Question Answering - Ask questions about a given text"
|
||||
|
||||
def _initialize_qa_system(self):
|
||||
"""Lazy initialization of the QA system"""
|
||||
if self.qa_system is None:
|
||||
print("🔄 Loading Question Answering model...")
|
||||
self.qa_system = QuestionAnsweringSystem()
|
||||
DisplayFormatter.show_success("QA model loaded!")
|
||||
|
||||
def _show_instructions(self):
|
||||
"""Show usage instructions and examples"""
|
||||
print("\n❓ Question Answering System")
|
||||
print("Ask questions about a text context and get precise answers.")
|
||||
print("\n📝 How it works:")
|
||||
print(" 1. First, provide a context (text containing information)")
|
||||
print(" 2. Then ask questions about that context")
|
||||
print(" 3. The system extracts answers directly from the text")
|
||||
print("\n💡 Example context:")
|
||||
print(" 'Albert Einstein was born in 1879 in Germany. He developed the theory of relativity.'")
|
||||
print("💡 Example questions:")
|
||||
print(" - When was Einstein born?")
|
||||
print(" - Where was Einstein born?")
|
||||
print(" - What theory did Einstein develop?")
|
||||
print("\n🎛️ Commands:")
|
||||
print(" 'back' - Return to main menu")
|
||||
print(" 'help' - Show these instructions")
|
||||
print(" 'context' - Set new context")
|
||||
print(" 'multi' - Ask multiple questions at once")
|
||||
print(" 'session' - Review session history")
|
||||
print(" 'settings' - Adjust confidence threshold")
|
||||
print("-" * 70)
|
||||
|
||||
def _set_context(self):
|
||||
"""Allow user to set or change the context"""
|
||||
print("\n📄 Set Context")
|
||||
print("Enter the text that will serve as context for your questions.")
|
||||
print("You can enter multiple lines. Type 'done' when finished.")
|
||||
print("-" * 50)
|
||||
|
||||
lines = []
|
||||
while True:
|
||||
line = input("📝 ").strip()
|
||||
if line.lower() == 'done':
|
||||
break
|
||||
if line:
|
||||
lines.append(line)
|
||||
|
||||
if not lines:
|
||||
DisplayFormatter.show_warning("No context provided")
|
||||
return False
|
||||
|
||||
self.current_context = " ".join(lines)
|
||||
|
||||
# Analyze context
|
||||
analysis = self.qa_system.interactive_qa(self.current_context)
|
||||
if "error" in analysis:
|
||||
DisplayFormatter.show_error(analysis["error"])
|
||||
return False
|
||||
|
||||
formatted_analysis = DisplayFormatter.format_qa_context_analysis(analysis)
|
||||
print(formatted_analysis)
|
||||
|
||||
return True
|
||||
|
||||
def _ask_single_question(self):
|
||||
"""Ask a single question about the current context"""
|
||||
if not self.current_context:
|
||||
DisplayFormatter.show_warning("Please set a context first using 'context' command")
|
||||
return
|
||||
|
||||
question = input("\n❓ Your question: ").strip()
|
||||
|
||||
if not question:
|
||||
DisplayFormatter.show_warning("Please enter a question")
|
||||
return
|
||||
|
||||
DisplayFormatter.show_loading("Finding answer...")
|
||||
result = self.qa_system.answer(question, self.current_context)
|
||||
|
||||
if "error" not in result:
|
||||
self.session_questions.append(result)
|
||||
|
||||
formatted_result = DisplayFormatter.format_qa_result(result)
|
||||
print(formatted_result)
|
||||
|
||||
def _multi_question_mode(self):
|
||||
"""Allow asking multiple questions at once"""
|
||||
if not self.current_context:
|
||||
DisplayFormatter.show_warning("Please set a context first using 'context' command")
|
||||
return
|
||||
|
||||
print("\n❓ Multiple Questions Mode")
|
||||
print("Enter your questions one by one. Type 'done' when finished.")
|
||||
print("-" * 50)
|
||||
|
||||
questions = []
|
||||
while True:
|
||||
question = input(f"Question #{len(questions)+1}: ").strip()
|
||||
if question.lower() == 'done':
|
||||
break
|
||||
if question:
|
||||
questions.append(question)
|
||||
|
||||
if not questions:
|
||||
DisplayFormatter.show_warning("No questions provided")
|
||||
return
|
||||
|
||||
DisplayFormatter.show_loading(f"Processing {len(questions)} questions...")
|
||||
result = self.qa_system.answer_multiple(questions, self.current_context)
|
||||
|
||||
if "error" not in result:
|
||||
self.session_questions.extend(result["results"])
|
||||
|
||||
formatted_result = DisplayFormatter.format_qa_multiple_result(result)
|
||||
print(formatted_result)
|
||||
|
||||
def _show_session_history(self):
|
||||
"""Show the history of questions asked in this session"""
|
||||
if not self.session_questions:
|
||||
DisplayFormatter.show_warning("No questions asked in this session yet")
|
||||
return
|
||||
|
||||
print(f"\n📚 Session History ({len(self.session_questions)} questions)")
|
||||
print("=" * 60)
|
||||
|
||||
for i, qa in enumerate(self.session_questions, 1):
|
||||
confidence_emoji = "✅" if qa["is_confident"] else "⚠️"
|
||||
print(f"\n{i}. {qa['question']}")
|
||||
print(f" {confidence_emoji} {qa['answer']} (confidence: {qa['confidence']:.1%})")
|
||||
|
||||
def _adjust_settings(self):
|
||||
"""Allow user to adjust QA settings"""
|
||||
current_threshold = self.qa_system.confidence_threshold
|
||||
print(f"\n⚙️ Current Settings:")
|
||||
print(f"Confidence threshold: {current_threshold:.2f}")
|
||||
print("\nLower threshold = more answers accepted (less strict)")
|
||||
print("Higher threshold = fewer answers accepted (more strict)")
|
||||
|
||||
try:
|
||||
new_threshold = input(f"Enter new threshold (0.0-1.0, current: {current_threshold}): ").strip()
|
||||
if new_threshold:
|
||||
threshold = float(new_threshold)
|
||||
self.qa_system.set_confidence_threshold(threshold)
|
||||
DisplayFormatter.show_success(f"Threshold set to {threshold:.2f}")
|
||||
except ValueError:
|
||||
DisplayFormatter.show_error("Invalid threshold value")
|
||||
|
||||
def run(self):
|
||||
"""Run interactive Question Answering"""
|
||||
self._initialize_qa_system()
|
||||
self._show_instructions()
|
||||
|
||||
while True:
|
||||
if self.current_context:
|
||||
context_preview = (self.current_context[:50] + "...") if len(self.current_context) > 50 else self.current_context
|
||||
prompt = f"\n💬 [{context_preview}] Ask a question: "
|
||||
else:
|
||||
prompt = "\n💬 Enter command or set context first: "
|
||||
|
||||
user_input = input(prompt).strip()
|
||||
|
||||
if user_input.lower() == 'back':
|
||||
break
|
||||
elif user_input.lower() == 'help':
|
||||
self._show_instructions()
|
||||
continue
|
||||
elif user_input.lower() == 'context':
|
||||
self._set_context()
|
||||
continue
|
||||
elif user_input.lower() == 'multi':
|
||||
self._multi_question_mode()
|
||||
continue
|
||||
elif user_input.lower() == 'session':
|
||||
self._show_session_history()
|
||||
continue
|
||||
elif user_input.lower() == 'settings':
|
||||
self._adjust_settings()
|
||||
continue
|
||||
|
||||
if not user_input:
|
||||
DisplayFormatter.show_warning("Please enter a question or command")
|
||||
continue
|
||||
|
||||
# If we have a context and user input is not a command, treat it as a question
|
||||
if self.current_context:
|
||||
DisplayFormatter.show_loading("Finding answer...")
|
||||
result = self.qa_system.answer(user_input, self.current_context)
|
||||
|
||||
if "error" not in result:
|
||||
self.session_questions.append(result)
|
||||
|
||||
formatted_result = DisplayFormatter.format_qa_result(result)
|
||||
print(formatted_result)
|
||||
else:
|
||||
DisplayFormatter.show_warning("Please set a context first using 'context' command")
|
||||
|
|
@ -19,6 +19,7 @@ class Config:
|
|||
"textgen": "gpt2",
|
||||
"moderation": "unitary/toxic-bert",
|
||||
"ner": "dbmdz/bert-large-cased-finetuned-conll03-english",
|
||||
"qa": "distilbert-base-cased-distilled-squad",
|
||||
}
|
||||
|
||||
# Interface
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from src.commands import (
|
|||
FillMaskCommand,
|
||||
ModerationCommand,
|
||||
NERCommand,
|
||||
QACommand,
|
||||
SentimentCommand,
|
||||
TextGenCommand,
|
||||
)
|
||||
|
|
@ -31,6 +32,7 @@ def main():
|
|||
TextGenCommand,
|
||||
ModerationCommand,
|
||||
NERCommand,
|
||||
QACommand,
|
||||
]
|
||||
for command in commands_to_register:
|
||||
cli.register_command(command())
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from .fillmask import FillMaskAnalyzer
|
|||
from .textgen import TextGenerator
|
||||
from .moderation import ContentModerator
|
||||
from .ner import NamedEntityRecognizer
|
||||
from .qa import QuestionAnsweringSystem
|
||||
from .template import TemplatePipeline
|
||||
|
||||
__all__ = ['SentimentAnalyzer', 'FillMaskAnalyzer', 'TextGenerator', 'ContentModerator', 'NamedEntityRecognizer', 'TemplatePipeline']
|
||||
__all__ = ['SentimentAnalyzer', 'FillMaskAnalyzer', 'TextGenerator', 'ContentModerator', 'NamedEntityRecognizer', 'QuestionAnsweringSystem', 'TemplatePipeline']
|
||||
|
|
|
|||
|
|
@ -0,0 +1,266 @@
|
|||
from transformers import pipeline
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from src.config import Config
|
||||
import re
|
||||
|
||||
|
||||
class QuestionAnsweringSystem:
|
||||
"""Question Answering system using transformers"""
|
||||
|
||||
def __init__(self, model_name: Optional[str] = None):
|
||||
"""
|
||||
Initialize the question-answering pipeline
|
||||
|
||||
Args:
|
||||
model_name: Name of the model to use (optional)
|
||||
"""
|
||||
self.model_name = model_name or Config.get_model("qa")
|
||||
print(f"Loading Question Answering model: {self.model_name}")
|
||||
self.pipeline = pipeline("question-answering", model=self.model_name)
|
||||
print("QA model loaded successfully!")
|
||||
|
||||
# Default confidence threshold
|
||||
self.confidence_threshold = 0.1
|
||||
|
||||
def answer(self, question: str, context: str, max_answer_len: int = 50) -> Dict:
|
||||
"""
|
||||
Answer a question based on the given context
|
||||
|
||||
Args:
|
||||
question: Question to answer
|
||||
context: Context text containing the answer
|
||||
max_answer_len: Maximum length of the answer
|
||||
|
||||
Returns:
|
||||
Dictionary with answer, score, and position information
|
||||
"""
|
||||
if not question.strip():
|
||||
return {"error": "Empty question"}
|
||||
|
||||
if not context.strip():
|
||||
return {"error": "Empty context"}
|
||||
|
||||
try:
|
||||
result = self.pipeline(
|
||||
question=question,
|
||||
context=context,
|
||||
max_answer_len=max_answer_len
|
||||
)
|
||||
|
||||
confidence_level = self._get_confidence_level(result["score"])
|
||||
highlighted_context = self._highlight_answer_in_context(
|
||||
context, result["answer"], result["start"], result["end"]
|
||||
)
|
||||
|
||||
return {
|
||||
"question": question,
|
||||
"context": context,
|
||||
"answer": result["answer"],
|
||||
"confidence": round(result["score"], 4),
|
||||
"confidence_level": confidence_level,
|
||||
"start_position": result["start"],
|
||||
"end_position": result["end"],
|
||||
"highlighted_context": highlighted_context,
|
||||
"is_confident": result["score"] >= self.confidence_threshold
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"error": f"QA processing error: {str(e)}"}
|
||||
|
||||
def _get_confidence_level(self, score: float) -> str:
|
||||
"""
|
||||
Convert numerical score to confidence level
|
||||
|
||||
Args:
|
||||
score: Confidence score (0-1)
|
||||
|
||||
Returns:
|
||||
Confidence level description
|
||||
"""
|
||||
if score >= 0.8:
|
||||
return "Very High"
|
||||
elif score >= 0.6:
|
||||
return "High"
|
||||
elif score >= 0.4:
|
||||
return "Medium"
|
||||
elif score >= 0.2:
|
||||
return "Low"
|
||||
else:
|
||||
return "Very Low"
|
||||
|
||||
def _highlight_answer_in_context(self, context: str, answer: str, start: int, end: int) -> str:
|
||||
"""
|
||||
Highlight the answer within the context
|
||||
|
||||
Args:
|
||||
context: Original context
|
||||
answer: Extracted answer
|
||||
start: Start position of answer
|
||||
end: End position of answer
|
||||
|
||||
Returns:
|
||||
Context with highlighted answer
|
||||
"""
|
||||
if start < 0 or end > len(context):
|
||||
return context
|
||||
|
||||
before = context[:start]
|
||||
highlighted_answer = f"**{answer}**"
|
||||
after = context[end:]
|
||||
|
||||
return before + highlighted_answer + after
|
||||
|
||||
def answer_multiple(self, questions: List[str], context: str, max_answer_len: int = 50) -> Dict:
|
||||
"""
|
||||
Answer multiple questions for the same context
|
||||
|
||||
Args:
|
||||
questions: List of questions to answer
|
||||
context: Context text
|
||||
max_answer_len: Maximum length of answers
|
||||
|
||||
Returns:
|
||||
Dictionary with all answers and summary statistics
|
||||
"""
|
||||
if not questions:
|
||||
return {"error": "No questions provided"}
|
||||
|
||||
if not context.strip():
|
||||
return {"error": "Empty context"}
|
||||
|
||||
results = []
|
||||
confident_answers = 0
|
||||
total_confidence = 0
|
||||
|
||||
for i, question in enumerate(questions, 1):
|
||||
result = self.answer(question, context, max_answer_len)
|
||||
|
||||
if "error" not in result:
|
||||
results.append({
|
||||
"question_number": i,
|
||||
**result
|
||||
})
|
||||
|
||||
if result["is_confident"]:
|
||||
confident_answers += 1
|
||||
total_confidence += result["confidence"]
|
||||
|
||||
if not results:
|
||||
return {"error": "No valid questions processed"}
|
||||
|
||||
average_confidence = total_confidence / len(results) if results else 0
|
||||
|
||||
return {
|
||||
"context": context,
|
||||
"total_questions": len(questions),
|
||||
"processed_questions": len(results),
|
||||
"confident_answers": confident_answers,
|
||||
"average_confidence": round(average_confidence, 4),
|
||||
"confidence_threshold": self.confidence_threshold,
|
||||
"results": results
|
||||
}
|
||||
|
||||
def interactive_qa(self, context: str) -> Dict:
|
||||
"""
|
||||
Prepare context for interactive Q&A session
|
||||
|
||||
Args:
|
||||
context: Context text for questions
|
||||
|
||||
Returns:
|
||||
Context analysis and preparation info
|
||||
"""
|
||||
if not context.strip():
|
||||
return {"error": "Empty context"}
|
||||
|
||||
# Basic context analysis
|
||||
word_count = len(context.split())
|
||||
sentence_count = len([s for s in context.split('.') if s.strip()])
|
||||
char_count = len(context)
|
||||
|
||||
# Suggest question types based on content
|
||||
suggested_questions = self._generate_question_suggestions(context)
|
||||
|
||||
return {
|
||||
"context": context,
|
||||
"context_stats": {
|
||||
"word_count": word_count,
|
||||
"sentence_count": sentence_count,
|
||||
"character_count": char_count
|
||||
},
|
||||
"suggested_questions": suggested_questions,
|
||||
"tips": [
|
||||
"Ask specific questions about facts mentioned in the text",
|
||||
"Use question words: Who, What, When, Where, Why, How",
|
||||
"Keep questions clear and focused",
|
||||
"The answer should be present in the provided context"
|
||||
]
|
||||
}
|
||||
|
||||
def _generate_question_suggestions(self, context: str) -> List[str]:
|
||||
"""
|
||||
Generate suggested questions based on context analysis
|
||||
|
||||
Args:
|
||||
context: Context text
|
||||
|
||||
Returns:
|
||||
List of suggested question templates
|
||||
"""
|
||||
suggestions = []
|
||||
|
||||
# Check for common patterns and suggest relevant questions
|
||||
if re.search(r'\b\d{4}\b', context): # Years
|
||||
suggestions.append("When did [event] happen?")
|
||||
|
||||
if re.search(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', context): # Names
|
||||
suggestions.append("Who is [person name]?")
|
||||
|
||||
if re.search(r'\b(founded|created|established|built)\b', context, re.IGNORECASE):
|
||||
suggestions.append("Who founded/created [organization]?")
|
||||
|
||||
if re.search(r'\b(located|situated|based)\b', context, re.IGNORECASE):
|
||||
suggestions.append("Where is [place/organization] located?")
|
||||
|
||||
if re.search(r'\b(because|due to|reason)\b', context, re.IGNORECASE):
|
||||
suggestions.append("Why did [event] happen?")
|
||||
|
||||
if re.search(r'\b(how|method|process)\b', context, re.IGNORECASE):
|
||||
suggestions.append("How does [process] work?")
|
||||
|
||||
if not suggestions:
|
||||
suggestions = [
|
||||
"What is the main topic of this text?",
|
||||
"Who are the key people mentioned?",
|
||||
"What important events are described?"
|
||||
]
|
||||
|
||||
return suggestions[:5] # Limit to 5 suggestions
|
||||
|
||||
def set_confidence_threshold(self, threshold: float):
|
||||
"""
|
||||
Set the confidence threshold for answers
|
||||
|
||||
Args:
|
||||
threshold: Threshold between 0 and 1
|
||||
"""
|
||||
if 0 <= threshold <= 1:
|
||||
self.confidence_threshold = threshold
|
||||
else:
|
||||
raise ValueError("Threshold must be between 0 and 1")
|
||||
|
||||
def answer_batch(self, qa_pairs: List[Tuple[str, str]], max_answer_len: int = 50) -> List[Dict]:
|
||||
"""
|
||||
Process multiple question-context pairs
|
||||
|
||||
Args:
|
||||
qa_pairs: List of (question, context) tuples
|
||||
max_answer_len: Maximum length of answers
|
||||
|
||||
Returns:
|
||||
List of QA results
|
||||
"""
|
||||
return [
|
||||
self.answer(question, context, max_answer_len)
|
||||
for question, context in qa_pairs
|
||||
]
|
||||
Loading…
Reference in New Issue