data-comparison/web_gui.py
2025-08-27 08:46:13 +07:00

1030 lines
42 KiB
Python

from flask import Flask, render_template, request, jsonify, send_file
import json
import os
import tempfile
import pandas as pd
from pathlib import Path
from werkzeug.utils import secure_filename
from data_comparator import KSTCoordiComparator
app = Flask(__name__)
app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50MB max file size
app.config['UPLOAD_FOLDER'] = tempfile.gettempdir()
# Global variable to store comparison results
comparison_results = None
comparator_instance = None
@app.route('/')
def index():
return render_template('index.html')
@app.route('/analyze', methods=['POST'])
def analyze_data():
global comparison_results, comparator_instance
try:
file_path = request.json.get('file_path', 'data/sample-data.xlsx')
sheet_filter = request.json.get('sheet_filter', None)
# Handle empty string as None
if sheet_filter == '' or sheet_filter == 'undefined':
sheet_filter = None
if not Path(file_path).exists():
return jsonify({'error': f'File not found: {file_path}'}), 400
# Create comparator and analyze
comparator_instance = KSTCoordiComparator(file_path)
if not comparator_instance.load_data():
return jsonify({'error': 'Failed to load Excel data'}), 500
# Debug: Print available sheets
available_sheets = list(comparator_instance.data.keys())
print(f"Available sheets: {available_sheets}")
print(f"Requested sheet_filter: {repr(sheet_filter)}")
# Get comparison results with optional sheet filtering
comparison_results = comparator_instance.get_comparison_summary(sheet_filter)
# Get matched items from the grouped data
matched_items_data = []
for title, items in comparison_results['grouped_by_title']['matched_by_title'].items():
for item in items[:500]: # Limit for performance
matched_items_data.append({
'title': item['title'],
'episode': item['episode'],
'sheet': item['sheet'],
'row': item['row_index'] + 1 if item['row_index'] is not None else 'N/A',
'reason': 'Perfect match'
})
# Add matched data to results
comparison_results['matched_data'] = matched_items_data
# Generate visualize data
visualize_data = comparator_instance.generate_visualize_data(sheet_filter)
comparison_results['visualize_data'] = visualize_data
# Get dynamic coordi label for display
coordi_label = comparator_instance.get_coordi_label_for_sheet(sheet_filter)
comparison_results['coordi_label'] = coordi_label
return jsonify({
'success': True,
'results': comparison_results
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/upload', methods=['POST'])
def upload_file():
try:
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
file = request.files['file']
if file.filename == '':
return jsonify({'error': 'No file selected'}), 400
if file and file.filename.lower().endswith(('.xlsx', '.xls')):
# Save uploaded file
filename = secure_filename(file.filename)
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(file_path)
return jsonify({
'success': True,
'file_path': file_path,
'filename': filename
})
else:
return jsonify({'error': 'Please upload an Excel file (.xlsx or .xls)'}), 400
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/get_results')
def get_results():
if comparison_results is None:
return jsonify({'error': 'No analysis results available'}), 404
return jsonify(comparison_results)
@app.route('/get_sheets', methods=['POST'])
def get_sheets():
"""Get available sheet names from an Excel file"""
try:
file_path = request.json.get('file_path', 'data/sample-data.xlsx')
if not Path(file_path).exists():
return jsonify({'error': f'File not found: {file_path}'}), 400
# Create comparator and load data to get sheet names
temp_comparator = KSTCoordiComparator(file_path)
if not temp_comparator.load_data():
return jsonify({'error': 'Failed to load Excel data'}), 500
available_sheets = list(temp_comparator.data.keys())
return jsonify({
'success': True,
'sheets': available_sheets,
'default_sheet': available_sheets[0] if available_sheets else None
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/download_excel')
def download_excel():
"""Generate and download Excel file with all sheets in visualize format"""
global comparator_instance
try:
if not comparator_instance:
return jsonify({'error': 'No data available. Please analyze data first.'}), 400
# Generate export data for all sheets
export_data = comparator_instance.generate_excel_export_data()
if not export_data:
return jsonify({'error': 'No data available for export'}), 400
# Create temporary Excel file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
temp_path = temp_file.name
temp_file.close()
try:
# Create Excel writer with multiple sheets
with pd.ExcelWriter(temp_path, engine='openpyxl') as writer:
for sheet_name, sheet_data in export_data.items():
if sheet_data: # Only create sheet if there's data
df = pd.DataFrame(sheet_data)
# Clean sheet name for Excel (remove invalid characters)
clean_sheet_name = str(sheet_name).replace('/', '_').replace('\\', '_')[:31]
df.to_excel(writer, sheet_name=clean_sheet_name, index=False)
# Get the workbook and worksheet to apply formatting
workbook = writer.book
worksheet = writer.sheets[clean_sheet_name]
# Apply professional color formatting based on Type column
from openpyxl.styles import PatternFill, Font
# Define professional colors matching the new web interface design system
colors = {
'Coordi Only': {
'fill': PatternFill(start_color='F0F9FF', end_color='F0F9FF', fill_type='solid'),
'font': Font(color='0C4A6E', bold=True)
},
'Kst Only': {
'fill': PatternFill(start_color='F7FEE7', end_color='F7FEE7', fill_type='solid'),
'font': Font(color='365314', bold=True)
},
'Mixed Duplicate': {
'fill': PatternFill(start_color='FEFBEB', end_color='FEFBEB', fill_type='solid'),
'font': Font(color='92400E', bold=True)
},
'Pure Duplicate': {
'fill': PatternFill(start_color='FEF2F2', end_color='FEF2F2', fill_type='solid'),
'font': Font(color='991B1B', bold=True)
},
'Matched': {
'fill': PatternFill(start_color='FFFFFF', end_color='FFFFFF', fill_type='solid'),
'font': Font(color='374151', bold=False)
}
}
# Find the Type column (should be column F, index 5)
type_col_idx = None
for idx, col in enumerate(df.columns):
if col == 'Type':
type_col_idx = idx + 1 # Excel is 1-indexed
break
# Apply professional formatting to data rows (skip header)
if type_col_idx:
for row_idx, row_data in enumerate(sheet_data, start=2): # Start from row 2 (after header)
row_type = row_data.get('Type', '')
style_config = colors.get(row_type)
if style_config:
for col_idx in range(1, len(df.columns) + 1):
cell = worksheet.cell(row=row_idx, column=col_idx)
cell.fill = style_config['fill']
cell.font = style_config['font']
# Auto-adjust column widths
for column in worksheet.columns:
max_length = 0
column_letter = column[0].column_letter
for cell in column:
try:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
except:
pass
adjusted_width = min(max_length + 2, 50) # Cap at 50 characters
worksheet.column_dimensions[column_letter].width = adjusted_width
# Send file for download
return send_file(
temp_path,
as_attachment=True,
download_name='data_comparison_export.xlsx',
mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
)
finally:
# Clean up temporary file after a delay (Flask handles this)
pass
except Exception as e:
return jsonify({'error': f'Export failed: {str(e)}'}), 500
def create_templates_dir():
"""Create templates directory and HTML file"""
templates_dir = Path('templates')
templates_dir.mkdir(exist_ok=True)
# Only create the HTML file if it doesn't exist (don't overwrite existing customizations)
html_file = templates_dir / 'index.html'
if html_file.exists():
print("Template file already exists, skipping auto-generation to preserve customizations.")
return
html_content = '''<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>KST vs Coordi Data Comparison</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 0;
padding: 20px;
background-color: #f5f5f5;
}
.container {
max-width: 1400px;
margin: 0 auto;
background: white;
padding: 20px;
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}
h1 {
text-align: center;
color: #333;
margin-bottom: 30px;
}
.file-section {
background: #f8f9fa;
padding: 20px;
border-radius: 6px;
margin-bottom: 20px;
}
.file-input {
display: flex;
gap: 10px;
align-items: center;
margin-bottom: 10px;
}
input[type="text"], input[type="file"] {
flex: 1;
padding: 8px 12px;
border: 1px solid #ddd;
border-radius: 4px;
}
input[type="file"] {
padding: 6px 8px;
}
button {
padding: 8px 16px;
background: #007bff;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
}
button:hover {
background: #0056b3;
}
button:disabled {
background: #6c757d;
cursor: not-allowed;
}
.loading {
text-align: center;
color: #666;
font-style: italic;
}
.tabs {
border-bottom: 2px solid #ddd;
margin-bottom: 20px;
}
.tab {
display: inline-block;
padding: 10px 20px;
background: #f8f9fa;
border: 1px solid #ddd;
border-bottom: none;
cursor: pointer;
margin-right: 5px;
border-radius: 4px 4px 0 0;
}
.tab.active {
background: white;
border-bottom: 2px solid white;
margin-bottom: -2px;
}
.tab-content {
display: none;
}
.tab-content.active {
display: block;
}
.summary-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
margin-bottom: 20px;
}
.summary-card {
background: #f8f9fa;
padding: 15px;
border-radius: 6px;
border-left: 4px solid #007bff;
}
.summary-card h3 {
margin-top: 0;
margin-bottom: 15px;
color: #333;
font-size: 1.1em;
}
.summary-card p {
margin: 8px 0;
color: #555;
}
.summary-card span {
font-weight: bold;
color: #007bff;
}
.count-badge {
display: inline-block;
background: #007bff;
color: white;
padding: 4px 8px;
border-radius: 12px;
font-size: 0.9em;
margin-left: 10px;
}
.reconciliation {
background: #d4edda;
border: 1px solid #c3e6cb;
padding: 15px;
border-radius: 6px;
margin-top: 15px;
}
.reconciliation.mismatch {
background: #f8d7da;
border-color: #f5c6cb;
}
table {
width: 100%;
border-collapse: collapse;
margin-top: 10px;
}
th, td {
padding: 10px;
text-align: left;
border-bottom: 1px solid #ddd;
}
th {
background-color: #f8f9fa;
font-weight: bold;
}
tr:hover {
background-color: #f5f5f5;
}
.error {
background: #f8d7da;
color: #721c24;
padding: 15px;
border-radius: 6px;
margin: 10px 0;
}
.success {
background: #d4edda;
color: #155724;
padding: 15px;
border-radius: 6px;
margin: 10px 0;
}
.table-container {
max-height: 500px;
overflow-y: auto;
border: 1px solid #ddd;
border-radius: 4px;
}
/* Professional UI Design System - Semantic Color Palette */
.coordi-only-row {
background-color: #f0f9ff !important; /* Sky blue 50 - Information state */
color: #0c4a6e !important; /* Sky blue 900 - High contrast text */
border-left: 4px solid #0ea5e9 !important; /* Sky blue 500 - Primary accent */
font-weight: 500;
}
.kst-only-row {
background-color: #f7fee7 !important; /* Lime 50 - Success/Available state */
color: #365314 !important; /* Lime 900 - High contrast text */
border-left: 4px solid #65a30d !important; /* Lime 600 - Success accent */
font-weight: 500;
}
.mixed-duplicate-row {
background-color: #fefbeb !important; /* Amber 50 - Warning state */
color: #92400e !important; /* Amber 800 - High contrast text */
border-left: 4px solid #f59e0b !important; /* Amber 500 - Warning accent */
font-weight: 500;
}
.pure-duplicate-row {
background-color: #fef2f2 !important; /* Red 50 - Error/Critical state */
color: #991b1b !important; /* Red 800 - High contrast text */
border-left: 4px solid #ef4444 !important; /* Red 500 - Error accent */
font-weight: 500;
}
.matched-row {
background-color: #ffffff !important; /* Pure white - Neutral/Default state */
color: #374151 !important; /* Gray 700 - Standard text */
border-left: 4px solid #10b981 !important; /* Emerald 500 - Success indicator */
font-weight: 400;
}
</style>
</head>
<body>
<div class="container">
<h1>KST vs Coordi Data Comparison Tool</h1>
<div class="file-section">
<div class="file-input">
<label for="filePath">Excel File Path:</label>
<input type="text" id="filePath" value="data/sample-data.xlsx" placeholder="Enter file path">
<button onclick="analyzeData()" id="analyzeBtn">Analyze Data</button>
</div>
<div class="file-input" style="margin-top: 10px;">
<label>Or Upload File:</label>
<input type="file" id="fileUpload" accept=".xlsx,.xls" onchange="handleFileUpload()">
<button onclick="uploadAndAnalyze()" id="uploadBtn" disabled>Upload & Analyze</button>
</div>
<div class="file-input" style="margin-top: 10px;">
<label for="sheetFilter">Sheet Filter:</label>
<select id="sheetFilter" onchange="filterBySheet()" disabled>
<!-- Options will be populated dynamically -->
</select>
</div>
<div id="status"></div>
</div>
<div id="results" style="display: none;">
<div class="tabs">
<div class="tab active" onclick="showTab('summary')">Summary</div>
<div class="tab" onclick="showTab('different')">Different</div>
<div class="tab" onclick="showTab('visualize')">Visualize</div>
</div>
<div id="summary" class="tab-content active">
<!-- Summary Cards Section -->
<div class="summary-grid">
<div class="summary-card">
<h3>📊 Sheet Summary</h3>
<p><strong>Current Sheet:</strong> <span id="current-sheet-name">-</span></p>
<p><strong>Matched Items:</strong> <span id="summary-matched-count">0</span> (Same in both KST and Coordi)</p>
<p><strong>Different Items:</strong> <span id="summary-different-count">0</span> (Total tasks excluding matched items)</p>
</div>
<div class="summary-card">
<h3>🔍 Breakdown</h3>
<p><strong>KST Only:</strong> <span id="summary-kst-only">0</span></p>
<p><strong>Coordi Only:</strong> <span id="summary-coordi-only">0</span></p>
<p><strong>Duplicates:</strong> <span id="summary-duplicates">0</span></p>
</div>
</div>
<h3>Matched Items (Same in both KST and Coordi) <span id="matched-count-display" class="count-badge">0</span></h3>
<div class="table-container">
<table>
<thead>
<tr>
<th>Korean Title</th>
<th>Episode</th>
<th>Sheet</th>
<th>Row</th>
</tr>
</thead>
<tbody id="summary-table">
</tbody>
</table>
</div>
</div>
<div id="different" class="tab-content">
<h3>Different Items <span id="different-count-display" class="count-badge">0</span></h3>
<div class="table-container">
<table>
<thead>
<tr>
<th>KST Data</th>
<th>Coordi Data</th>
<th>Reason</th>
</tr>
</thead>
<tbody id="different-table">
</tbody>
</table>
</div>
</div>
<div id="visualize" class="tab-content">
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 15px;">
<h3>Excel-like Visualization</h3>
<button onclick="downloadExcel()" id="downloadBtn" style="background: #28a745; padding: 8px 16px;">📥 Download All Sheets</button>
</div>
<div class="table-container">
<table id="visualize-table">
<thead>
<tr>
<th id="coordi-lang-header">Coordi Lang</th>
<th id="coordi-title-header">Coordi Title</th>
<th id="coordi-chapter-header">Coordi Chapter</th>
<th>KST Lang</th>
<th>KST Title</th>
<th>KST Chapter</th>
<th>Status</th>
</tr>
</thead>
<tbody id="visualize-table-body">
</tbody>
</table>
</div>
</div>
</div>
</div>
<script>
function showTab(tabName) {
// Hide all tab contents
document.querySelectorAll('.tab-content').forEach(content => {
content.classList.remove('active');
});
// Remove active class from all tabs
document.querySelectorAll('.tab').forEach(tab => {
tab.classList.remove('active');
});
// Show selected tab content
document.getElementById(tabName).classList.add('active');
// Add active class to clicked tab
event.target.classList.add('active');
}
function analyzeData() {
const filePath = document.getElementById('filePath').value;
const sheetFilterElement = document.getElementById('sheetFilter');
const sheetFilter = sheetFilterElement.value || null; // Use null if empty
const statusDiv = document.getElementById('status');
const analyzeBtn = document.getElementById('analyzeBtn');
if (!filePath.trim()) {
statusDiv.innerHTML = '<div class="error">Please enter a file path</div>';
return;
}
// Show loading state
statusDiv.innerHTML = '<div class="loading">Analyzing data...</div>';
analyzeBtn.disabled = true;
analyzeBtn.textContent = 'Analyzing...';
// Make API call
fetch('/analyze', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
file_path: filePath,
sheet_filter: sheetFilter
})
})
.then(response => response.json())
.then(data => {
if (data.success) {
statusDiv.innerHTML = '<div class="success">Analysis complete!</div>';
updateResults(data.results);
updateSheetFilter(data.results.sheet_names, data.results.current_sheet_filter);
document.getElementById('results').style.display = 'block';
} else {
statusDiv.innerHTML = `<div class="error">Error: ${data.error}</div>`;
}
})
.catch(error => {
statusDiv.innerHTML = `<div class="error">Error: ${error.message}</div>`;
})
.finally(() => {
analyzeBtn.disabled = false;
analyzeBtn.textContent = 'Analyze Data';
});
}
function updateSheetFilter(sheetNames, currentFilter) {
const select = document.getElementById('sheetFilter');
select.innerHTML = '';
// Add a default option if no sheets are available yet
if (!sheetNames || sheetNames.length === 0) {
const option = document.createElement('option');
option.value = '';
option.textContent = 'Loading sheets...';
option.disabled = true;
option.selected = true;
select.appendChild(option);
select.disabled = true;
return;
}
sheetNames.forEach((sheetName, index) => {
const option = document.createElement('option');
option.value = sheetName;
option.textContent = sheetName;
// Select the first sheet by default, or the current filter if specified
if (sheetName === currentFilter || (!currentFilter && index === 0)) {
option.selected = true;
}
select.appendChild(option);
});
select.disabled = false;
}
function filterBySheet() {
// Re-analyze with the selected sheet filter
analyzeData();
}
function handleFileUpload() {
const fileInput = document.getElementById('fileUpload');
const uploadBtn = document.getElementById('uploadBtn');
if (fileInput.files.length > 0) {
uploadBtn.disabled = false;
uploadBtn.textContent = 'Upload & Analyze';
} else {
uploadBtn.disabled = true;
}
}
function uploadAndAnalyze() {
const fileInput = document.getElementById('fileUpload');
const statusDiv = document.getElementById('status');
const uploadBtn = document.getElementById('uploadBtn');
if (fileInput.files.length === 0) {
statusDiv.innerHTML = '<div class="error">Please select a file to upload</div>';
return;
}
const file = fileInput.files[0];
const formData = new FormData();
formData.append('file', file);
// Show uploading state
statusDiv.innerHTML = '<div class="loading">Uploading and analyzing file...</div>';
uploadBtn.disabled = true;
uploadBtn.textContent = 'Uploading...';
// Upload file
fetch('/upload', {
method: 'POST',
body: formData
})
.then(response => response.json())
.then(data => {
if (data.success) {
// File uploaded successfully, now analyze it
document.getElementById('filePath').value = data.file_path;
statusDiv.innerHTML = '<div class="loading">File uploaded! Analyzing data...</div>';
// Clear sheet filter for new file (let it default to first sheet)
const sheetFilterElement = document.getElementById('sheetFilter');
const sheetFilter = null; // Always use default (first sheet) for new uploads
return fetch('/analyze', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
file_path: data.file_path,
sheet_filter: sheetFilter
})
});
} else {
throw new Error(data.error);
}
})
.then(response => response.json())
.then(data => {
if (data.success) {
statusDiv.innerHTML = '<div class="success">File uploaded and analyzed successfully!</div>';
updateResults(data.results);
updateSheetFilter(data.results.sheet_names, data.results.current_sheet_filter);
document.getElementById('results').style.display = 'block';
} else {
statusDiv.innerHTML = `<div class="error">Analysis error: ${data.error}</div>`;
}
})
.catch(error => {
statusDiv.innerHTML = `<div class="error">Upload error: ${error.message}</div>`;
})
.finally(() => {
uploadBtn.disabled = false;
uploadBtn.textContent = 'Upload & Analyze';
handleFileUpload(); // Reset button state based on file selection
});
}
function updateResults(results) {
// Update dynamic headers with coordi label
const coordiLabel = results.coordi_label || 'Coordi';
document.getElementById('coordi-lang-header').textContent = `${coordiLabel} Lang`;
document.getElementById('coordi-title-header').textContent = `${coordiLabel} Title`;
document.getElementById('coordi-chapter-header').textContent = `${coordiLabel} Chapter`;
// Update count displays
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
// Count all different items including duplicates and mixed duplicates
const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count +
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
(results.mismatches.mixed_duplicates_count || 0);
document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
// Update summary section
document.getElementById('current-sheet-name').textContent = results.current_sheet_filter;
document.getElementById('summary-matched-count').textContent = results.matched_items_count.toLocaleString();
document.getElementById('summary-different-count').textContent = totalDifferent.toLocaleString();
document.getElementById('summary-kst-only').textContent = results.mismatches.kst_only_count.toLocaleString();
document.getElementById('summary-coordi-only').textContent = results.mismatches.coordi_only_count.toLocaleString();
// Calculate total duplicates (KST + Coordi + Mixed)
const totalDuplicates = results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
(results.mismatches.mixed_duplicates_count || 0);
document.getElementById('summary-duplicates').textContent = totalDuplicates.toLocaleString();
// Update Summary tab (matched items)
updateSummaryTable(results.matched_data);
// Update Different tab
updateDifferentTable(results.mismatch_details);
// Update Visualize tab
updateVisualizeTable(results.visualize_data);
}
function updateSummaryTable(matchedData) {
const tbody = document.getElementById('summary-table');
tbody.innerHTML = '';
// Sort by Korean title + episode
const sortedData = [...matchedData].sort((a, b) => {
const titleCompare = a.title.localeCompare(b.title, 'ko');
if (titleCompare !== 0) return titleCompare;
// Try to sort episodes numerically
const aEp = parseFloat(a.episode) || 0;
const bEp = parseFloat(b.episode) || 0;
return aEp - bEp;
});
sortedData.forEach(item => {
const row = tbody.insertRow();
row.insertCell(0).textContent = item.title;
row.insertCell(1).textContent = item.episode;
row.insertCell(2).textContent = item.sheet;
row.insertCell(3).textContent = item.row_index ? item.row_index + 1 : item.row;
});
}
function updateDifferentTable(mismatchDetails) {
const tbody = document.getElementById('different-table');
tbody.innerHTML = '';
const allDifferences = [];
// Add KST-only items (no special highlighting)
mismatchDetails.kst_only.forEach(item => {
allDifferences.push({
kstData: `${item.title} - Episode ${item.episode}`,
coordiData: '',
reason: 'Only appears in KST',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'none'
});
});
// Add Coordi-only items (no special highlighting)
mismatchDetails.coordi_only.forEach(item => {
allDifferences.push({
kstData: '',
coordiData: `${item.title} - Episode ${item.episode}`,
reason: 'Only appears in Coordi',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'none'
});
});
// Add KST duplicates (red highlighting)
mismatchDetails.kst_duplicates.forEach(item => {
allDifferences.push({
kstData: `${item.title} - Episode ${item.episode}`,
coordiData: '',
reason: 'Duplicate entry in KST data',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'red'
});
});
// Add Coordi duplicates (red highlighting)
mismatchDetails.coordi_duplicates.forEach(item => {
allDifferences.push({
kstData: '',
coordiData: `${item.title} - Episode ${item.episode}`,
reason: 'Duplicate entry in Coordi data',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'red'
});
});
// Add mixed duplicates (yellow highlighting)
if (mismatchDetails.mixed_duplicates) {
mismatchDetails.mixed_duplicates.forEach(item => {
allDifferences.push({
kstData: item.duplicate_side === 'KST' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
coordiData: item.duplicate_side === 'COORDI' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
reason: item.reason,
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'yellow'
});
});
}
// Sort by Korean title + episode
allDifferences.sort((a, b) => {
const titleCompare = a.sortTitle.localeCompare(b.sortTitle, 'ko');
if (titleCompare !== 0) return titleCompare;
return a.sortEpisode - b.sortEpisode;
});
// Populate table with highlighting
allDifferences.forEach(diff => {
const row = tbody.insertRow();
row.insertCell(0).textContent = diff.kstData;
row.insertCell(1).textContent = diff.coordiData;
row.insertCell(2).textContent = diff.reason;
// Apply highlighting based on type
if (diff.highlightType === 'red') {
row.style.backgroundColor = '#f8d7da'; // Light red
row.title = 'Pure duplicate entry';
} else if (diff.highlightType === 'yellow') {
row.style.backgroundColor = '#fff3cd'; // Light yellow
row.title = 'Item exists in both datasets but has duplicates on one side';
}
});
}
function updateVisualizeTable(visualizeData) {
const tbody = document.getElementById('visualize-table-body');
tbody.innerHTML = '';
// Data is already sorted by the backend (mismatches first, then matches, all by Korean title)
visualizeData.forEach(row => {
const tr = tbody.insertRow();
tr.insertCell(0).textContent = row.coordi_language || '';
tr.insertCell(1).textContent = row.coordi_title || '';
tr.insertCell(2).textContent = row.coordi_chapter || '';
tr.insertCell(3).textContent = row.kst_language || '';
tr.insertCell(4).textContent = row.kst_title || '';
tr.insertCell(5).textContent = row.kst_chapter || '';
tr.insertCell(6).textContent = row.reason || '';
// Apply vibrant color highlighting based on row type
switch (row.row_type) {
case 'coordi_only':
tr.className = 'coordi-only-row';
break;
case 'kst_only':
tr.className = 'kst-only-row';
break;
case 'mixed_duplicate':
tr.className = 'mixed-duplicate-row';
break;
case 'pure_duplicate':
tr.className = 'pure-duplicate-row';
break;
case 'matched':
tr.className = 'matched-row';
break;
}
});
}
function downloadExcel() {
const downloadBtn = document.getElementById('downloadBtn');
const originalText = downloadBtn.textContent;
// Show loading state
downloadBtn.disabled = true;
downloadBtn.textContent = '⏳ Generating...';
downloadBtn.style.background = '#6c757d';
// Create a temporary link and trigger download
fetch('/download_excel', {
method: 'GET'
})
.then(response => {
if (!response.ok) {
throw new Error('Download failed');
}
return response.blob();
})
.then(blob => {
// Create download link
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.style.display = 'none';
a.href = url;
a.download = 'data_comparison_export.xlsx';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
// Show success message
const statusDiv = document.getElementById('status');
statusDiv.innerHTML = '<div class="success">Excel file downloaded successfully!</div>';
setTimeout(() => {
statusDiv.innerHTML = '';
}, 3000);
})
.catch(error => {
console.error('Download error:', error);
const statusDiv = document.getElementById('status');
statusDiv.innerHTML = '<div class="error">Download failed. Please try again.</div>';
setTimeout(() => {
statusDiv.innerHTML = '';
}, 5000);
})
.finally(() => {
// Reset button state
downloadBtn.disabled = false;
downloadBtn.textContent = originalText;
downloadBtn.style.background = '#28a745';
});
}
// Auto-analyze on page load with default file
window.onload = function() {
// Initialize sheet filter with loading state
updateSheetFilter([], null);
analyzeData();
};
</script>
</body>
</html>'''
html_file = templates_dir / 'index.html'
html_file.write_text(html_content)
def main():
# Create templates directory and HTML file
create_templates_dir()
print("Starting web-based GUI...")
print("Open your browser and go to: http://localhost:8080")
app.run(debug=True, host='0.0.0.0', port=8080)
if __name__ == "__main__":
main()