# -*- coding: utf-8 -*- """ Flashcards Tools - Enhanced with FlashcardGenerator and DifficultyScorer """ import json import re from pathlib import Path from typing import Dict, List, Optional, Any from deep_translator import GoogleTranslator from .config import get_user_dir # Import advanced generators (with fallback) try: from .flashcard_generator import FlashcardGenerator HAS_FLASHCARD_GENERATOR = True except ImportError: HAS_FLASHCARD_GENERATOR = False try: from .difficulty_scorer import get_difficulty_scorer HAS_DIFFICULTY_SCORER = True except ImportError: HAS_DIFFICULTY_SCORER = False def _get_decks_dir(username: str) -> Path: """Returns the directory where all of a user's decks are stored.""" user_dir = get_user_dir(username) decks_dir = user_dir / "decks" decks_dir.mkdir(parents=True, exist_ok=True) return decks_dir def list_user_decks(username: str) -> Dict[str, Path]: """Returns a mapping of deck name -> deck json path.""" decks_dir = _get_decks_dir(username) deck_files = sorted(decks_dir.glob("*.json")) decks: Dict[str, Path] = {} for path in deck_files: try: data = json.loads(path.read_text(encoding="utf-8")) name = data.get("name") or path.stem except Exception: name = path.stem if name in decks and decks[name] != path: name = f"{name} ({path.stem})" decks[name] = path return decks def _ensure_card_stats(card: Dict) -> None: """Ensure that a card has simple spaced-repetition stats.""" if "score" not in card: card["score"] = 0 if "reviews" not in card: card["reviews"] = 0 def _add_difficulty_to_card(card: Dict) -> Dict: """Add difficulty scoring to a card if DifficultyScorer is available.""" if HAS_DIFFICULTY_SCORER: try: scorer = get_difficulty_scorer() return scorer.score_flashcard(card) except Exception: pass return card def load_deck(path: Path) -> Dict: """Loads a deck from JSON with stats for spaced repetition.""" try: data = json.loads(path.read_text(encoding="utf-8")) except Exception: data = {} if "cards" not in data or not isinstance(data["cards"], list): data["cards"] = [] if "name" not in data: data["name"] = path.stem if "tags" not in data or not isinstance(data["tags"], list): data["tags"] = [] for card in data["cards"]: _ensure_card_stats(card) return data def save_deck(path: Path, deck: Dict) -> None: """Saves deck to JSON.""" if "cards" not in deck: deck["cards"] = [] if "name" not in deck: deck["name"] = path.stem if "tags" not in deck or not isinstance(deck["tags"], list): deck["tags"] = [] for card in deck["cards"]: _ensure_card_stats(card) path.write_text(json.dumps(deck, indent=2, ensure_ascii=False), encoding="utf-8") def _extract_candidate_words(text: str) -> List[str]: """Simple tokenizer & filter for candidate vocab words.""" tokens = re.findall(r"\b\w+\b", text, flags=re.UNICODE) out = [] seen = set() for t in tokens: t_norm = t.strip() if len(t_norm) < 2: continue if any(ch.isdigit() for ch in t_norm): continue lower = t_norm.lower() if lower in seen: continue seen.add(lower) out.append(t_norm) return out def generate_flashcards_from_ocr_results( username: str, ocr_results: List[Dict], deck_name: str = "ocr", target_lang: str = "en", tags: Optional[List[str]] = None, use_advanced_generator: bool = True, ) -> Path: """ Takes OCR results and constructs a vocab deck. Args: username: User identifier ocr_results: List of OCR result dicts with 'text' key deck_name: Name for the deck target_lang: Target language for translations tags: Optional tags for the deck use_advanced_generator: Whether to use FlashcardGenerator Returns: Path to the saved deck """ # Try advanced generator first if use_advanced_generator and HAS_FLASHCARD_GENERATOR: try: generator = FlashcardGenerator() flashcard_data = generator.generate_flashcards(ocr_results, target_lang) cards = flashcard_data.get('cards', []) if cards: # Add difficulty scores if HAS_DIFFICULTY_SCORER: scorer = get_difficulty_scorer() cards = scorer.score_all_flashcards(cards) # Ensure stats for card in cards: _ensure_card_stats(card) decks_dir = _get_decks_dir(username) deck_path = decks_dir / f"{deck_name}.json" deck = { "name": deck_name, "cards": cards, "tags": tags or ["ocr"], "metadata": flashcard_data.get('metadata', {}) } save_deck(deck_path, deck) return deck_path except Exception as e: print(f"[flashcards_tools] Advanced generator failed: {e}, using fallback") # Fallback to simple extraction all_text = [] for res in ocr_results: t = res.get("text") or res.get("raw_text") or res.get("original_text") or "" if t: all_text.append(t) joined = "\n".join(all_text) words = _extract_candidate_words(joined) if not words: raise ValueError("No candidate words found in OCR results.") translator = GoogleTranslator(source="auto", target=target_lang) cards = [] for w in words[:20]: # Limit to 20 words try: trans = translator.translate(w) except Exception: continue if not trans: continue if trans.strip().lower() == w.strip().lower(): continue card = { "front": w, "back": trans, "content_type": "ocr_vocab", "language": target_lang, } card = _add_difficulty_to_card(card) _ensure_card_stats(card) cards.append(card) if not cards: raise ValueError("No translatable words found to build cards.") decks_dir = _get_decks_dir(username) deck_path = decks_dir / f"{deck_name}.json" deck = { "name": deck_name, "cards": cards, "tags": tags or [], } save_deck(deck_path, deck) return deck_path def generate_flashcards_from_text( username: str, text: str, deck_name: str = "conversation", target_lang: str = "en", tags: Optional[List[str]] = None, source_lang: Optional[str] = None, ) -> Path: """ Build a vocab deck from raw text. Args: username: User identifier text: Raw text to extract vocabulary from deck_name: Name for the deck target_lang: Target language for translations tags: Optional tags for the deck source_lang: Source language (auto-detect if None) Returns: Path to the saved deck """ # Try advanced generator first if HAS_FLASHCARD_GENERATOR: try: generator = FlashcardGenerator() # Create fake OCR result ocr_result = { 'original_text': text, 'text': text, 'detected_language': source_lang or 'auto', } flashcard_data = generator.generate_flashcards([ocr_result], target_lang) cards = flashcard_data.get('cards', []) if cards: if HAS_DIFFICULTY_SCORER: scorer = get_difficulty_scorer() cards = scorer.score_all_flashcards(cards) for card in cards: card['content_type'] = 'conversation_vocab' _ensure_card_stats(card) decks_dir = _get_decks_dir(username) deck_path = decks_dir / f"{deck_name}.json" deck = { "name": deck_name, "cards": cards, "tags": tags or ["conversation"], } save_deck(deck_path, deck) return deck_path except Exception as e: print(f"[flashcards_tools] Advanced generator failed: {e}, using fallback") # Fallback words = _extract_candidate_words(text) if not words: raise ValueError("No candidate words found in text.") translator = GoogleTranslator(source="auto", target=target_lang) cards = [] for w in words[:20]: try: trans = translator.translate(w) except Exception: continue if not trans: continue if trans.strip().lower() == w.strip().lower(): continue card = { "front": w, "back": trans, "content_type": "conversation_vocab", "language": target_lang, } card = _add_difficulty_to_card(card) _ensure_card_stats(card) cards.append(card) if not cards: raise ValueError("No translatable words found to build cards.") decks_dir = _get_decks_dir(username) deck_path = decks_dir / f"{deck_name}.json" deck = { "name": deck_name, "cards": cards, "tags": tags or ["conversation"], } save_deck(deck_path, deck) return deck_path def add_difficulty_to_deck(deck: Dict) -> Dict: """Add difficulty scores to all cards in a deck.""" if not HAS_DIFFICULTY_SCORER: return deck try: scorer = get_difficulty_scorer() deck["cards"] = scorer.score_all_flashcards(deck.get("cards", [])) deck["statistics"] = scorer.get_statistics(deck["cards"]) except Exception as e: print(f"[flashcards_tools] Difficulty scoring failed: {e}") return deck