From 79227d6defec861d812f3dfa26dbf74d4f0f47dd Mon Sep 17 00:00:00 2001 From: Cyril Date: Sun, 12 Oct 2025 22:19:52 +0200 Subject: [PATCH 1/2] Increase default max_new_tokens in generate method to enhance text generation capacity --- src/pipelines/textgen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pipelines/textgen.py b/src/pipelines/textgen.py index e29d7c8..31b441a 100644 --- a/src/pipelines/textgen.py +++ b/src/pipelines/textgen.py @@ -30,7 +30,7 @@ class TextGenerator: print("Model loaded successfully!") - def generate(self, prompt: str, max_new_tokens: int = 100, num_return_sequences: int = 1, + def generate(self, prompt: str, max_new_tokens: int = 500, num_return_sequences: int = 1, temperature: float = 1.0, do_sample: bool = True) -> Dict: """ Generate text from a prompt -- 2.40.1 From c6c14f767dd14c6efca266fa0a2e372e93f5fb1b Mon Sep 17 00:00:00 2001 From: Cyril Date: Sat, 18 Oct 2025 17:11:13 +0200 Subject: [PATCH 2/2] Enhance text generation API: add configurable parameters, update models, and improve UI for better user experience --- src/api/app.py | 39 ++++- src/api/models.py | 13 ++ src/pipelines/textgen.py | 69 +++++++-- ui/index.html | 66 +++++++- ui/script.js | 321 ++++++++++++++++++++++++++++++++++++++- ui/style.css | 305 ++++++++++++++++++++++++++++++++++++- 6 files changed, 776 insertions(+), 37 deletions(-) diff --git a/src/api/app.py b/src/api/app.py index dded0e0..3f465a0 100644 --- a/src/api/app.py +++ b/src/api/app.py @@ -6,9 +6,10 @@ from fastapi.middleware.cors import CORSMiddleware from contextlib import asynccontextmanager from typing import Dict, Any import logging +import torch from .models import ( - TextRequest, TextListRequest, QARequest, FillMaskRequest, + TextRequest, TextListRequest, QARequest, FillMaskRequest, TextGenRequest, SentimentResponse, NERResponse, QAResponse, FillMaskResponse, ModerationResponse, TextGenResponse, BatchResponse ) @@ -278,20 +279,40 @@ async def moderate_content(request: TextRequest): @app.post("/textgen", response_model=TextGenResponse) -async def generate_text(request: TextRequest): - """Generate text from a prompt""" +async def generate_text(request: TextGenRequest): + """Generate text from a prompt with configurable parameters""" try: if "textgen" not in pipelines: raise HTTPException(status_code=503, detail="Text generation pipeline not available") logging.info(f"Generating text for prompt: {request.text[:50]}...") + # Extract generation parameters + gen_params = { + "system_prompt": request.system_prompt, + "max_new_tokens": request.max_new_tokens, + "num_return_sequences": request.num_return_sequences, + "temperature": request.temperature, + "do_sample": request.do_sample + } + if request.model_name: - from src.pipelines.textgen import TextGenerator - textgen = TextGenerator(request.model_name) - result = textgen.generate(request.text) + try: + from src.pipelines.textgen import TextGenerator + textgen = TextGenerator(request.model_name) + result = textgen.generate(request.text, **gen_params) + except torch.cuda.OutOfMemoryError: + logging.warning(f"CUDA OOM with model {request.model_name}, trying with default model on CPU") + result = pipelines["textgen"].generate(request.text, **gen_params) + except Exception as model_error: + logging.error(f"Error with custom model {request.model_name}: {model_error}") + return TextGenResponse( + success=False, + prompt=request.text, + message=f"Erreur avec le modèle {request.model_name}: {str(model_error)}. Essayez avec le modèle par défaut." + ) else: - result = pipelines["textgen"].generate(request.text) + result = pipelines["textgen"].generate(request.text, **gen_params) logging.info(f"Generation result keys: {list(result.keys())}") @@ -311,7 +332,9 @@ async def generate_text(request: TextRequest): return TextGenResponse( success=True, prompt=result["prompt"], - generated_text=result["prompt"] + " " + generated_text + generated_text=generated_text, + parameters=result.get("parameters", {}), + generations=result.get("generations", []) ) except Exception as e: logging.error(f"TextGen endpoint error: {str(e)}", exc_info=True) diff --git a/src/api/models.py b/src/api/models.py index eae6336..f33ec73 100644 --- a/src/api/models.py +++ b/src/api/models.py @@ -12,6 +12,17 @@ class TextRequest(BaseModel): model_name: Optional[str] = None +class TextGenRequest(BaseModel): + """Request model for text generation with configuration parameters""" + text: str + system_prompt: Optional[str] = None + model_name: Optional[str] = None + max_new_tokens: int = 500 + num_return_sequences: int = 1 + temperature: float = 1.0 + do_sample: bool = True + + class TextListRequest(BaseModel): """Request model for multiple texts""" texts: List[str] @@ -76,6 +87,8 @@ class TextGenResponse(BaseResponse): """Response model for Text Generation""" prompt: str generated_text: Optional[str] = None + parameters: Optional[Dict[str, Any]] = None + generations: Optional[List[Dict[str, Any]]] = None class BatchResponse(BaseResponse): diff --git a/src/pipelines/textgen.py b/src/pipelines/textgen.py index 31b441a..e1876fc 100644 --- a/src/pipelines/textgen.py +++ b/src/pipelines/textgen.py @@ -1,5 +1,7 @@ +from click import prompt from transformers import pipeline from typing import Dict, List, Optional +import torch from src.config import Config @@ -16,27 +18,55 @@ class TextGenerator: self.model_name = model_name or Config.get_model("textgen") print(f"Loading text generation model: {self.model_name}") - # Initialize pipeline with proper device configuration - self.pipeline = pipeline( - "text-generation", - model=self.model_name, - device=0 if Config.USE_GPU else -1, - torch_dtype="auto" - ) + # Clear GPU cache before loading new model + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + # Try GPU first, fallback to CPU if CUDA OOM + try: + # Initialize pipeline with proper device configuration + self.pipeline = pipeline( + "text-generation", + model=self.model_name, + device=0 if Config.USE_GPU else -1, + torch_dtype="auto" + ) + print(f"Model loaded successfully on {'GPU' if Config.USE_GPU else 'CPU'}!") + + except torch.cuda.OutOfMemoryError: + print("⚠️ GPU out of memory, falling back to CPU...") + # Force CPU usage + self.pipeline = pipeline( + "text-generation", + model=self.model_name, + device=-1, # CPU + torch_dtype="auto" + ) + print("Model loaded successfully on CPU!") + + except Exception as e: + print(f"⚠️ Error loading model on GPU, trying CPU: {e}") + # Fallback to CPU + self.pipeline = pipeline( + "text-generation", + model=self.model_name, + device=-1, # CPU + torch_dtype="auto" + ) + print("Model loaded successfully on CPU!") # Set pad token if not available if self.pipeline.tokenizer.pad_token is None: self.pipeline.tokenizer.pad_token = self.pipeline.tokenizer.eos_token - - print("Model loaded successfully!") - def generate(self, prompt: str, max_new_tokens: int = 500, num_return_sequences: int = 1, - temperature: float = 1.0, do_sample: bool = True) -> Dict: + def generate(self, prompt: str, system_prompt: Optional[str] = None, max_new_tokens: int = 500, + num_return_sequences: int = 1, temperature: float = 1.0, do_sample: bool = True) -> Dict: """ Generate text from a prompt Args: prompt: Input text prompt + system_prompt: Optional system prompt to set context/role max_new_tokens: Maximum number of new tokens to generate num_return_sequences: Number of sequences to generate temperature: Sampling temperature (higher = more random) @@ -48,9 +78,14 @@ class TextGenerator: if not prompt.strip(): return {"error": "Empty prompt"} + if system_prompt: + full_prompt = f"{system_prompt.strip()}\n\n{prompt.strip()}\n\n" + else: + full_prompt = f"{prompt.strip()}\n\n" + try: results = self.pipeline( - prompt, + full_prompt, max_new_tokens=max_new_tokens, num_return_sequences=num_return_sequences, temperature=temperature, @@ -58,17 +93,19 @@ class TextGenerator: pad_token_id=self.pipeline.tokenizer.eos_token_id, return_full_text=True ) - + generations = [ { "text": result["generated_text"], - "continuation": result["generated_text"][len(prompt):].strip() + "continuation": result["generated_text"][len(full_prompt):].strip() } for result in results ] - + return { "prompt": prompt, + "system_prompt": system_prompt, + "full_prompt": full_prompt, "parameters": { "max_new_tokens": max_new_tokens, "num_sequences": num_return_sequences, @@ -77,7 +114,7 @@ class TextGenerator: }, "generations": generations } - + except Exception as e: return {"error": f"Generation error: {str(e)}"} diff --git a/ui/index.html b/ui/index.html index 1ca4ea6..8648e4f 100644 --- a/ui/index.html +++ b/ui/index.html @@ -279,9 +279,16 @@

✍️ Génération de Texte

-

Générez du texte créatif à partir d'un prompt

+

Générez du texte créatif à partir d'un prompt avec des paramètres configurables

+
+ + + Définit le rôle ou le contexte du modèle (optionnel) +
+