ge-tool/backend/services/mongodb_service.py

"""
Module for MongoDB connection and data access.
Combines logic from the old project's mongodb.py and mongodb_submissions.py
"""

import os
import logging
import re
from datetime import datetime, timedelta
from typing import List, Dict, Optional, Any
from pymongo import MongoClient, ASCENDING, DESCENDING
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError, DuplicateKeyError
from dotenv import load_dotenv

# Load environment variables from .env.local first, then .env
load_dotenv('.env.local')
load_dotenv()  # Fallback to .env

# Use logger from root (configured in main.py)
logger = logging.getLogger(__name__)

# --- MongoDB Config ---
MONGODB_URI = os.getenv("MONGODB_URI")
if not MONGODB_URI:
    raise ValueError("MONGODB_URI not found in environment variables")

DATABASE_NAME = "schedule"
SUBMISSIONS_COLLECTION = "submissions"
TITLES_COLLECTION = "titles_data"

# --- Connection Caching ---
_mongodb_client = None
_submissions_collection = None
_titles_collection = None

# ----------------------
# Connection helpers
# ----------------------


def get_db_connection():
    """Initializes and returns the MongoDB database connection with caching."""
    global _mongodb_client
    if _mongodb_client is None:
        try:
            logger.debug("Initializing new MongoDB connection...")
            _mongodb_client = MongoClient(
                MONGODB_URI,
                serverSelectionTimeoutMS=5000,
                connectTimeoutMS=10000,
                socketTimeoutMS=10000
            )
            # Test connection
            _mongodb_client.admin.command('ping')
            logger.debug("MongoDB connection successful.")
        except (ConnectionFailure, ServerSelectionTimeoutError) as e:
            logger.error(f"Could not connect to MongoDB: {e}")
            _mongodb_client = None  # Reset on failure
            raise Exception(f"Không thể kết nối MongoDB: {e}")
        except Exception as e:
            logger.error(
                f"An unexpected error occurred during MongoDB initialization: {e}")
            _mongodb_client = None  # Reset on failure
            raise Exception(f"Lỗi khởi tạo MongoDB: {e}")

    return _mongodb_client[DATABASE_NAME]


def get_submissions_collection():
    """Returns the submissions collection, initializing the connection if needed."""
    global _submissions_collection
    if _submissions_collection is None:
        db = get_db_connection()
        _submissions_collection = db[SUBMISSIONS_COLLECTION]
        _create_submission_indexes()
    return _submissions_collection


def get_titles_collection():
    """Returns the titles collection, initializing the connection if needed."""
    global _titles_collection
    if _titles_collection is None:
        db = get_db_connection()
        _titles_collection = db[TITLES_COLLECTION]
    return _titles_collection


def close_mongodb_connection():
    """Closes the MongoDB connection if it exists."""
    global _mongodb_client
    if _mongodb_client:
        _mongodb_client.close()
        _mongodb_client = None
        logger.debug("MongoDB connection closed.")

# -------------------------------
# Indexes and initialization
# -------------------------------


def _create_submission_indexes():
    """Creates necessary indexes for the submissions collection."""
    try:
        collection = get_submissions_collection()
        # Unique submission_id
        collection.create_index(
            "submission_id", unique=True, name="idx_submission_id")
        # Timestamp for sorting
        collection.create_index(
            [("created_at", DESCENDING)], name="idx_created_at")
        # Status index
        collection.create_index("status", name="idx_status")
        # Compound index for queue ordering
        collection.create_index(
            [("status", ASCENDING), ("queue_position", ASCENDING)], name="idx_queue")
        # TTL index - automatically delete submissions after 30 days
        collection.create_index(
            "created_at", expireAfterSeconds=2592000, name="idx_ttl")
        logger.debug("Submission indexes created successfully.")
    except Exception as e:
        logger.error(f"Error creating submission indexes: {e}")

# ---------------------------------------------------
# Submissions Logic (adapted from mongodb_submissions.py)
# ---------------------------------------------------


def create_submission(submission_id: str, usernames: List[str], ge_input: str) -> Dict[str, Any]:
    """Creates a new submission with 'pending' status and assigns a queue_position."""
    try:
        collection = get_submissions_collection()
        now = datetime.utcnow()
        # Determine next queue position among pending
        max_doc = collection.find_one({"status": "pending"}, sort=[
                                      ("queue_position", DESCENDING)])
        next_position = (max_doc.get("queue_position") + 1) if (
            max_doc and max_doc.get("queue_position") is not None) else 1

        submission_doc = {
            "submission_id": submission_id,
            "timestamp": now,
            "status": "pending",
            "input": {
                "usernames": usernames,
                "ge_input": ge_input
            },
            "results": [],
            "error_message": None,
            "created_at": now,
            "updated_at": now,
            "processing_started_at": None,
            "processing_completed_at": None,
            "queue_position": next_position,
            "retry_count": 0,
            "last_retry_at": None
        }

        result = collection.insert_one(submission_doc)
        # Convert ObjectId to string
        submission_doc["_id"] = str(result.inserted_id)
        logger.debug(
            f"Created submission: {submission_id} at position {next_position}")
        return submission_doc
    except DuplicateKeyError:
        raise Exception(f"Submission ID {submission_id} đã tồn tại")
    except Exception as e:
        logger.error(f"Error creating submission: {e}")
        raise Exception(f"Không thể tạo submission: {e}")


def get_submission_by_id(submission_id: str) -> Optional[Dict[str, Any]]:
    """Fetches a submission by its submission_id."""
    try:
        collection = get_submissions_collection()
        doc = collection.find_one({"submission_id": submission_id})
        if doc:
            doc["_id"] = str(doc["_id"])
        return doc
    except Exception as e:
        logger.error(f"Error fetching submission {submission_id}: {e}")
        return None


def get_submissions(limit: int = 50, status: Optional[str] = None) -> List[Dict[str, Any]]:
    """Fetches submissions, optionally filtered by status, newest first."""
    try:
        collection = get_submissions_collection()
        query = {}
        if status:
            query["status"] = status
        cursor = collection.find(query).sort(
            "created_at", DESCENDING).limit(limit)
        subs = []
        for doc in cursor:
            doc["_id"] = str(doc["_id"])
            subs.append(doc)
        return subs
    except Exception as e:
        logger.error(f"Error fetching submissions: {e}")
        return []


def get_pending_submissions() -> List[Dict[str, Any]]:
    """Returns pending submissions ordered by queue_position ascending."""
    try:
        collection = get_submissions_collection()
        cursor = collection.find({"status": "pending"}).sort(
            "queue_position", ASCENDING)
        subs = []
        for doc in cursor:
            doc["_id"] = str(doc["_id"])
            subs.append(doc)
        return subs
    except Exception as e:
        logger.error(f"Error fetching pending submissions: {e}")
        return []


def get_next_pending_submission() -> Optional[Dict[str, Any]]:
    """Return the next pending submission (lowest queue_position)."""
    try:
        collection = get_submissions_collection()
        doc = collection.find_one({"status": "pending"}, sort=[
                                  ("queue_position", ASCENDING)])
        if doc:
            doc["_id"] = str(doc["_id"])
        return doc
    except Exception as e:
        logger.error(f"Error fetching next pending submission: {e}")
        return None


def update_submission(
    submission_id: str,
    status: str,
    results: Optional[List[Dict]] = None,
    error_message: Optional[str] = None
) -> bool:
    """Updates the status and results of a submission and manages timestamps/queue position."""
    try:
        collection = get_submissions_collection()
        update_data = {
            "status": status,
            "updated_at": datetime.utcnow()
        }

        if status == "processing":
            update_data["processing_started_at"] = datetime.utcnow()
        elif status in ["completed", "failed"]:
            update_data["processing_completed_at"] = datetime.utcnow()
            update_data["queue_position"] = None
            if status == "completed" and results is not None:
                update_data["results"] = results
            if status == "failed" and error_message is not None:
                update_data["error_message"] = error_message

        result = collection.update_one(
            {"submission_id": submission_id}, {"$set": update_data})
        if result.modified_count > 0:
            logger.debug(
                f"Updated submission {submission_id} to status {status}")
            return True
        else:
            logger.warning(f"No submission found with ID: {submission_id}")
            return False
    except Exception as e:
        logger.error(f"Error updating submission {submission_id}: {e}")
        return False


def delete_submission(submission_id: str) -> bool:
    """Deletes a submission by its ID."""
    try:
        collection = get_submissions_collection()
        result = collection.delete_one({"submission_id": submission_id})
        if result.deleted_count > 0:
            logger.debug(f"Deleted submission: {submission_id}")
            return True
        else:
            logger.warning(f"No submission found with ID: {submission_id}")
            return False
    except Exception as e:
        logger.error(f"Error deleting submission {submission_id}: {e}")
        return False


def increment_retry_count(submission_id: str) -> bool:
    """Increment retry_count and set last_retry_at/updated_at."""
    try:
        collection = get_submissions_collection()
        result = collection.update_one(
            {"submission_id": submission_id},
            {
                "$inc": {"retry_count": 1},
                "$set": {"last_retry_at": datetime.utcnow(), "updated_at": datetime.utcnow()}
            }
        )
        return result.modified_count > 0
    except Exception as e:
        logger.error(f"Error increment retry count for {submission_id}: {e}")
        return False


def requeue_stuck_submissions(timeout_minutes: int = 30) -> int:
    """Requeue submissions stuck in processing longer than timeout_minutes back to pending."""
    try:
        collection = get_submissions_collection()
        timeout_date = datetime.utcnow() - timedelta(minutes=timeout_minutes)
        result = collection.update_many(
            {"status": "processing", "processing_started_at": {"$lt": timeout_date}},
            {"$set": {"status": "pending", "updated_at": datetime.utcnow(
            ), "processing_started_at": None}, "$inc": {"retry_count": 1}}
        )
        logger.debug(f"Requeued {result.modified_count} stuck submissions")
        return result.modified_count
    except Exception as e:
        logger.error(f"Error requeue stuck submissions: {e}")
        return 0


def cleanup_excess_submissions(max_keep: int = 15) -> int:
    """Keep only the newest `max_keep` completed/failed submissions; delete older ones."""
    try:
        collection = get_submissions_collection()
        count = collection.count_documents(
            {"status": {"$in": ["completed", "failed"]}})
        if count <= max_keep:
            logger.debug(
                f"Current completed/failed count ({count}) <= max_keep ({max_keep}), nothing to cleanup")
            return 0
        to_delete = count - max_keep
        old_docs = list(collection.find({"status": {"$in": ["completed", "failed"]}}, {
                        "_id": 1}).sort("created_at", ASCENDING).limit(to_delete))
        if not old_docs:
            return 0
        ids = [d["_id"] for d in old_docs]
        result = collection.delete_many({"_id": {"$in": ids}})
        logger.debug(f"Cleaned up {result.deleted_count} excess submissions")
        return result.deleted_count
    except Exception as e:
        logger.error(f"Error cleanup excess submissions: {e}")
        return 0


def cleanup_old_submissions(days: int = 30) -> int:
    """Delete completed/failed submissions older than `days` days."""
    try:
        collection = get_submissions_collection()
        cutoff = datetime.utcnow() - timedelta(days=days)
        result = collection.delete_many(
            {"created_at": {"$lt": cutoff}, "status": {"$in": ["completed", "failed"]}})
        logger.debug(f"Cleaned up {result.deleted_count} old submissions")
        return result.deleted_count
    except Exception as e:
        logger.error(f"Error cleanup old submissions: {e}")
        return 0


def get_statistics() -> Dict[str, int]:
    """Return counts grouped by status and total."""
    try:
        collection = get_submissions_collection()
        pipeline = [{"$group": {"_id": "$status", "count": {"$sum": 1}}}]
        results = list(collection.aggregate(pipeline))
        stats = {"total": 0, "pending": 0,
                 "processing": 0, "completed": 0, "failed": 0}
        for item in results:
            status = item.get("_id")
            count = item.get("count", 0)
            if status in stats:
                stats[status] = count
            stats["total"] += count
        return stats
    except Exception as e:
        logger.error(f"Error getting statistics: {e}")
        return {"total": 0, "pending": 0, "processing": 0, "completed": 0, "failed": 0}

# ---------------------------------------------------
# Titles Logic (from mongodb.py)
# ---------------------------------------------------
# Note: This part is not directly used by the permission page,
# but it's good to have it here for future use.


def get_tms_data(ge_id: str, orig_lang: str) -> Optional[str]:
    """
    Fetches the TMS ID from the titles_data collection.
    Returns the TMS ID as a string or None if not found.
    """
    try:
        collection = get_titles_collection()
        query = {"geId": str(ge_id).strip(), "lang": str(
            orig_lang).strip().upper()}
        document = collection.find_one(query)
        if not document:
            logger.warning(
                f"No document found for geId: {ge_id}, lang: {orig_lang}")
            return None
        # Try extract from trTmsLink first
        tms_link = document.get("trTmsLink")
        if tms_link and isinstance(tms_link, str):
            match = re.search(r'/project/(\d+)', tms_link)
            if match:
                return match.group(1)
        tms_id_direct = document.get("tmsId")
        if tms_id_direct:
            return str(tms_id_direct).strip()
        return None
    except Exception as e:
        logger.error(f"Error querying MongoDB for TMS data: {e}")
        return None


def get_path_from_tms_data(ge_id: str, orig_lang: str) -> Optional[str]:
    """
    Fetches the NAS path from the titles_data collection for raw file downloads.
    Uses the same query logic as get_tms_data but returns the 'path' field.
    Returns the path as a string or None if not found.
    """
    try:
        collection = get_titles_collection()
        query = {"geId": str(ge_id).strip(), "lang": str(
            orig_lang).strip().upper()}
        document = collection.find_one(query)
        if not document:
            logger.warning(
                f"No document found for geId: {ge_id}, lang: {orig_lang}")
            return None

        # Get the path field directly
        path = document.get("path")
        if path and isinstance(path, str):
            return str(path).strip()

        logger.warning(
            f"No path field found for geId: {ge_id}, lang: {orig_lang}")
        return None
    except Exception as e:
        logger.error(f"Error querying MongoDB for path data: {e}")
        return None


def get_sharing_link_from_tms_data(ge_id: str, orig_lang: str) -> Optional[str]:
    """
    Fetches the sharing link (linkRaw) from the titles_data collection.
    Used for displaying source in sharing mode downloads.
    Returns the linkRaw as a string or None if not found.
    """
    try:
        collection = get_titles_collection()
        query = {"geId": str(ge_id).strip(), "lang": str(
            orig_lang).strip().upper()}
        document = collection.find_one(query)
        if not document:
            logger.warning(
                f"No document found for geId: {ge_id}, lang: {orig_lang}")
            return None

        # Get the linkRaw field
        link_raw = document.get("linkRaw")
        if link_raw and isinstance(link_raw, str):
            return str(link_raw).strip()

        logger.warning(
            f"No linkRaw field found for geId: {ge_id}, lang: {orig_lang}")
        return None
    except Exception as e:
        logger.error(f"Error querying MongoDB for linkRaw data: {e}")
        return None
push 2025-12-10 06:41:43 +00:00			`"""`
			`Module for MongoDB connection and data access.`
			`Combines logic from the old project's mongodb.py and mongodb_submissions.py`
			`"""`

			`import os`
			`import logging`
			`import re`
			`from datetime import datetime, timedelta`
			`from typing import List, Dict, Optional, Any`
			`from pymongo import MongoClient, ASCENDING, DESCENDING`
			`from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError, DuplicateKeyError`
			`from dotenv import load_dotenv`

			`# Load environment variables from .env.local first, then .env`
			`load_dotenv('.env.local')`
			`load_dotenv() # Fallback to .env`

			`# Use logger from root (configured in main.py)`
			`logger = logging.getLogger(__name__)`

			`# --- MongoDB Config ---`
			`MONGODB_URI = os.getenv("MONGODB_URI")`
			`if not MONGODB_URI:`
			`raise ValueError("MONGODB_URI not found in environment variables")`

			`DATABASE_NAME = "schedule"`
			`SUBMISSIONS_COLLECTION = "submissions"`
			`TITLES_COLLECTION = "titles_data"`

			`# --- Connection Caching ---`
			`_mongodb_client = None`
			`_submissions_collection = None`
			`_titles_collection = None`

			`# ----------------------`
			`# Connection helpers`
			`# ----------------------`


			`def get_db_connection():`
			`"""Initializes and returns the MongoDB database connection with caching."""`
			`global _mongodb_client`
			`if _mongodb_client is None:`
			`try:`
			`logger.debug("Initializing new MongoDB connection...")`
			`_mongodb_client = MongoClient(`
			`MONGODB_URI,`
			`serverSelectionTimeoutMS=5000,`
			`connectTimeoutMS=10000,`
			`socketTimeoutMS=10000`
			`)`
			`# Test connection`
			`_mongodb_client.admin.command('ping')`
			`logger.debug("MongoDB connection successful.")`
			`except (ConnectionFailure, ServerSelectionTimeoutError) as e:`
			`logger.error(f"Could not connect to MongoDB: {e}")`
			`_mongodb_client = None # Reset on failure`
			`raise Exception(f"Không thể kết nối MongoDB: {e}")`
			`except Exception as e:`
			`logger.error(`
			`f"An unexpected error occurred during MongoDB initialization: {e}")`
			`_mongodb_client = None # Reset on failure`
			`raise Exception(f"Lỗi khởi tạo MongoDB: {e}")`

			`return _mongodb_client[DATABASE_NAME]`


			`def get_submissions_collection():`
			`"""Returns the submissions collection, initializing the connection if needed."""`
			`global _submissions_collection`
			`if _submissions_collection is None:`
			`db = get_db_connection()`
			`_submissions_collection = db[SUBMISSIONS_COLLECTION]`
			`_create_submission_indexes()`
			`return _submissions_collection`


			`def get_titles_collection():`
			`"""Returns the titles collection, initializing the connection if needed."""`
			`global _titles_collection`
			`if _titles_collection is None:`
			`db = get_db_connection()`
			`_titles_collection = db[TITLES_COLLECTION]`
			`return _titles_collection`


			`def close_mongodb_connection():`
			`"""Closes the MongoDB connection if it exists."""`
			`global _mongodb_client`
			`if _mongodb_client:`
			`_mongodb_client.close()`
			`_mongodb_client = None`
			`logger.debug("MongoDB connection closed.")`

			`# -------------------------------`
			`# Indexes and initialization`
			`# -------------------------------`


			`def _create_submission_indexes():`
			`"""Creates necessary indexes for the submissions collection."""`
			`try:`
			`collection = get_submissions_collection()`
			`# Unique submission_id`
			`collection.create_index(`
			`"submission_id", unique=True, name="idx_submission_id")`
			`# Timestamp for sorting`
			`collection.create_index(`
			`[("created_at", DESCENDING)], name="idx_created_at")`
			`# Status index`
			`collection.create_index("status", name="idx_status")`
			`# Compound index for queue ordering`
			`collection.create_index(`
			`[("status", ASCENDING), ("queue_position", ASCENDING)], name="idx_queue")`
			`# TTL index - automatically delete submissions after 30 days`
			`collection.create_index(`
			`"created_at", expireAfterSeconds=2592000, name="idx_ttl")`
			`logger.debug("Submission indexes created successfully.")`
			`except Exception as e:`
			`logger.error(f"Error creating submission indexes: {e}")`

			`# ---------------------------------------------------`
			`# Submissions Logic (adapted from mongodb_submissions.py)`
			`# ---------------------------------------------------`


			`def create_submission(submission_id: str, usernames: List[str], ge_input: str) -> Dict[str, Any]:`
			`"""Creates a new submission with 'pending' status and assigns a queue_position."""`
			`try:`
			`collection = get_submissions_collection()`
			`now = datetime.utcnow()`
			`# Determine next queue position among pending`
			`max_doc = collection.find_one({"status": "pending"}, sort=[`
			`("queue_position", DESCENDING)])`
			`next_position = (max_doc.get("queue_position") + 1) if (`
			`max_doc and max_doc.get("queue_position") is not None) else 1`

			`submission_doc = {`
			`"submission_id": submission_id,`
			`"timestamp": now,`
			`"status": "pending",`
			`"input": {`
			`"usernames": usernames,`
			`"ge_input": ge_input`
			`},`
			`"results": [],`
			`"error_message": None,`
			`"created_at": now,`
			`"updated_at": now,`
			`"processing_started_at": None,`
			`"processing_completed_at": None,`
			`"queue_position": next_position,`
			`"retry_count": 0,`
			`"last_retry_at": None`
			`}`

			`result = collection.insert_one(submission_doc)`
			`# Convert ObjectId to string`
			`submission_doc["_id"] = str(result.inserted_id)`
			`logger.debug(`
			`f"Created submission: {submission_id} at position {next_position}")`
			`return submission_doc`
			`except DuplicateKeyError:`
			`raise Exception(f"Submission ID {submission_id} đã tồn tại")`
			`except Exception as e:`
			`logger.error(f"Error creating submission: {e}")`
			`raise Exception(f"Không thể tạo submission: {e}")`


			`def get_submission_by_id(submission_id: str) -> Optional[Dict[str, Any]]:`
			`"""Fetches a submission by its submission_id."""`
			`try:`
			`collection = get_submissions_collection()`
			`doc = collection.find_one({"submission_id": submission_id})`
			`if doc:`
			`doc["_id"] = str(doc["_id"])`
			`return doc`
			`except Exception as e:`
			`logger.error(f"Error fetching submission {submission_id}: {e}")`
			`return None`


			`def get_submissions(limit: int = 50, status: Optional[str] = None) -> List[Dict[str, Any]]:`
			`"""Fetches submissions, optionally filtered by status, newest first."""`
			`try:`
			`collection = get_submissions_collection()`
			`query = {}`
			`if status:`
			`query["status"] = status`
			`cursor = collection.find(query).sort(`
			`"created_at", DESCENDING).limit(limit)`
			`subs = []`
			`for doc in cursor:`
			`doc["_id"] = str(doc["_id"])`
			`subs.append(doc)`
			`return subs`
			`except Exception as e:`
			`logger.error(f"Error fetching submissions: {e}")`
			`return []`


			`def get_pending_submissions() -> List[Dict[str, Any]]:`
			`"""Returns pending submissions ordered by queue_position ascending."""`
			`try:`
			`collection = get_submissions_collection()`
			`cursor = collection.find({"status": "pending"}).sort(`
			`"queue_position", ASCENDING)`
			`subs = []`
			`for doc in cursor:`
			`doc["_id"] = str(doc["_id"])`
			`subs.append(doc)`
			`return subs`
			`except Exception as e:`
			`logger.error(f"Error fetching pending submissions: {e}")`
			`return []`


			`def get_next_pending_submission() -> Optional[Dict[str, Any]]:`
			`"""Return the next pending submission (lowest queue_position)."""`
			`try:`
			`collection = get_submissions_collection()`
			`doc = collection.find_one({"status": "pending"}, sort=[`
			`("queue_position", ASCENDING)])`
			`if doc:`
			`doc["_id"] = str(doc["_id"])`
			`return doc`
			`except Exception as e:`
			`logger.error(f"Error fetching next pending submission: {e}")`
			`return None`


			`def update_submission(`
			`submission_id: str,`
			`status: str,`
			`results: Optional[List[Dict]] = None,`
			`error_message: Optional[str] = None`
			`) -> bool:`
			`"""Updates the status and results of a submission and manages timestamps/queue position."""`
			`try:`
			`collection = get_submissions_collection()`
			`update_data = {`
			`"status": status,`
			`"updated_at": datetime.utcnow()`
			`}`

			`if status == "processing":`
			`update_data["processing_started_at"] = datetime.utcnow()`
			`elif status in ["completed", "failed"]:`
			`update_data["processing_completed_at"] = datetime.utcnow()`
			`update_data["queue_position"] = None`
			`if status == "completed" and results is not None:`
			`update_data["results"] = results`
			`if status == "failed" and error_message is not None:`
			`update_data["error_message"] = error_message`

			`result = collection.update_one(`
			`{"submission_id": submission_id}, {"$set": update_data})`
			`if result.modified_count > 0:`
			`logger.debug(`
			`f"Updated submission {submission_id} to status {status}")`
			`return True`
			`else:`
			`logger.warning(f"No submission found with ID: {submission_id}")`
			`return False`
			`except Exception as e:`
			`logger.error(f"Error updating submission {submission_id}: {e}")`
			`return False`


			`def delete_submission(submission_id: str) -> bool:`
			`"""Deletes a submission by its ID."""`
			`try:`
			`collection = get_submissions_collection()`
			`result = collection.delete_one({"submission_id": submission_id})`
			`if result.deleted_count > 0:`
			`logger.debug(f"Deleted submission: {submission_id}")`
			`return True`
			`else:`
			`logger.warning(f"No submission found with ID: {submission_id}")`
			`return False`
			`except Exception as e:`
			`logger.error(f"Error deleting submission {submission_id}: {e}")`
			`return False`


			`def increment_retry_count(submission_id: str) -> bool:`
			`"""Increment retry_count and set last_retry_at/updated_at."""`
			`try:`
			`collection = get_submissions_collection()`
			`result = collection.update_one(`
			`{"submission_id": submission_id},`
			`{`
			`"$inc": {"retry_count": 1},`
			`"$set": {"last_retry_at": datetime.utcnow(), "updated_at": datetime.utcnow()}`
			`}`
			`)`
			`return result.modified_count > 0`
			`except Exception as e:`
			`logger.error(f"Error increment retry count for {submission_id}: {e}")`
			`return False`


			`def requeue_stuck_submissions(timeout_minutes: int = 30) -> int:`
			`"""Requeue submissions stuck in processing longer than timeout_minutes back to pending."""`
			`try:`
			`collection = get_submissions_collection()`
			`timeout_date = datetime.utcnow() - timedelta(minutes=timeout_minutes)`
			`result = collection.update_many(`
			`{"status": "processing", "processing_started_at": {"$lt": timeout_date}},`
			`{"$set": {"status": "pending", "updated_at": datetime.utcnow(`
			`), "processing_started_at": None}, "$inc": {"retry_count": 1}}`
			`)`
			`logger.debug(f"Requeued {result.modified_count} stuck submissions")`
			`return result.modified_count`
			`except Exception as e:`
			`logger.error(f"Error requeue stuck submissions: {e}")`
			`return 0`


			`def cleanup_excess_submissions(max_keep: int = 15) -> int:`
			"""Keep only the newest `max_keep` completed/failed submissions; delete older ones."""
			`try:`
			`collection = get_submissions_collection()`
			`count = collection.count_documents(`
			`{"status": {"$in": ["completed", "failed"]}})`
			`if count <= max_keep:`
			`logger.debug(`
			`f"Current completed/failed count ({count}) <= max_keep ({max_keep}), nothing to cleanup")`
			`return 0`
			`to_delete = count - max_keep`
			`old_docs = list(collection.find({"status": {"$in": ["completed", "failed"]}}, {`
			`"_id": 1}).sort("created_at", ASCENDING).limit(to_delete))`
			`if not old_docs:`
			`return 0`
			`ids = [d["_id"] for d in old_docs]`
			`result = collection.delete_many({"_id": {"$in": ids}})`
			`logger.debug(f"Cleaned up {result.deleted_count} excess submissions")`
			`return result.deleted_count`
			`except Exception as e:`
			`logger.error(f"Error cleanup excess submissions: {e}")`
			`return 0`


			`def cleanup_old_submissions(days: int = 30) -> int:`
			"""Delete completed/failed submissions older than `days` days."""
			`try:`
			`collection = get_submissions_collection()`
			`cutoff = datetime.utcnow() - timedelta(days=days)`
			`result = collection.delete_many(`
			`{"created_at": {"$lt": cutoff}, "status": {"$in": ["completed", "failed"]}})`
			`logger.debug(f"Cleaned up {result.deleted_count} old submissions")`
			`return result.deleted_count`
			`except Exception as e:`
			`logger.error(f"Error cleanup old submissions: {e}")`
			`return 0`


			`def get_statistics() -> Dict[str, int]:`
			`"""Return counts grouped by status and total."""`
			`try:`
			`collection = get_submissions_collection()`
			`pipeline = [{"$group": {"_id": "$status", "count": {"$sum": 1}}}]`
			`results = list(collection.aggregate(pipeline))`
			`stats = {"total": 0, "pending": 0,`
			`"processing": 0, "completed": 0, "failed": 0}`
			`for item in results:`
			`status = item.get("_id")`
			`count = item.get("count", 0)`
			`if status in stats:`
			`stats[status] = count`
			`stats["total"] += count`
			`return stats`
			`except Exception as e:`
			`logger.error(f"Error getting statistics: {e}")`
			`return {"total": 0, "pending": 0, "processing": 0, "completed": 0, "failed": 0}`

			`# ---------------------------------------------------`
			`# Titles Logic (from mongodb.py)`
			`# ---------------------------------------------------`
			`# Note: This part is not directly used by the permission page,`
			`# but it's good to have it here for future use.`


			`def get_tms_data(ge_id: str, orig_lang: str) -> Optional[str]:`
			`"""`
			`Fetches the TMS ID from the titles_data collection.`
			`Returns the TMS ID as a string or None if not found.`
			`"""`
			`try:`
			`collection = get_titles_collection()`
			`query = {"geId": str(ge_id).strip(), "lang": str(`
			`orig_lang).strip().upper()}`
			`document = collection.find_one(query)`
			`if not document:`
			`logger.warning(`
			`f"No document found for geId: {ge_id}, lang: {orig_lang}")`
			`return None`
			`# Try extract from trTmsLink first`
			`tms_link = document.get("trTmsLink")`
			`if tms_link and isinstance(tms_link, str):`
			`match = re.search(r'/project/(\d+)', tms_link)`
			`if match:`
			`return match.group(1)`
			`tms_id_direct = document.get("tmsId")`
			`if tms_id_direct:`
			`return str(tms_id_direct).strip()`
			`return None`
			`except Exception as e:`
			`logger.error(f"Error querying MongoDB for TMS data: {e}")`
			`return None`


			`def get_path_from_tms_data(ge_id: str, orig_lang: str) -> Optional[str]:`
			`"""`
			`Fetches the NAS path from the titles_data collection for raw file downloads.`
			`Uses the same query logic as get_tms_data but returns the 'path' field.`
			`Returns the path as a string or None if not found.`
			`"""`
			`try:`
			`collection = get_titles_collection()`
			`query = {"geId": str(ge_id).strip(), "lang": str(`
			`orig_lang).strip().upper()}`
			`document = collection.find_one(query)`
			`if not document:`
			`logger.warning(`
			`f"No document found for geId: {ge_id}, lang: {orig_lang}")`
			`return None`

			`# Get the path field directly`
			`path = document.get("path")`
			`if path and isinstance(path, str):`
			`return str(path).strip()`

			`logger.warning(`
			`f"No path field found for geId: {ge_id}, lang: {orig_lang}")`
			`return None`
			`except Exception as e:`
			`logger.error(f"Error querying MongoDB for path data: {e}")`
			`return None`


			`def get_sharing_link_from_tms_data(ge_id: str, orig_lang: str) -> Optional[str]:`
			`"""`
			`Fetches the sharing link (linkRaw) from the titles_data collection.`
			`Used for displaying source in sharing mode downloads.`
			`Returns the linkRaw as a string or None if not found.`
			`"""`
			`try:`
			`collection = get_titles_collection()`
			`query = {"geId": str(ge_id).strip(), "lang": str(`
			`orig_lang).strip().upper()}`
			`document = collection.find_one(query)`
			`if not document:`
			`logger.warning(`
			`f"No document found for geId: {ge_id}, lang: {orig_lang}")`
			`return None`

			`# Get the linkRaw field`
			`link_raw = document.get("linkRaw")`
			`if link_raw and isinstance(link_raw, str):`
			`return str(link_raw).strip()`

			`logger.warning(`
			`f"No linkRaw field found for geId: {ge_id}, lang: {orig_lang}")`
			`return None`
			`except Exception as e:`
			`logger.error(f"Error querying MongoDB for linkRaw data: {e}")`
			`return None`