1030 lines
42 KiB
Python
1030 lines
42 KiB
Python
from flask import Flask, render_template, request, jsonify, send_file
|
|
import json
|
|
import os
|
|
import tempfile
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
from werkzeug.utils import secure_filename
|
|
from data_comparator import KSTCoordiComparator
|
|
|
|
app = Flask(__name__)
|
|
app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50MB max file size
|
|
app.config['UPLOAD_FOLDER'] = tempfile.gettempdir()
|
|
|
|
# Global variable to store comparison results
|
|
comparison_results = None
|
|
comparator_instance = None
|
|
|
|
@app.route('/')
|
|
def index():
|
|
return render_template('index.html')
|
|
|
|
@app.route('/analyze', methods=['POST'])
|
|
def analyze_data():
|
|
global comparison_results, comparator_instance
|
|
|
|
try:
|
|
file_path = request.json.get('file_path', 'data/sample-data.xlsx')
|
|
sheet_filter = request.json.get('sheet_filter', None)
|
|
|
|
# Handle empty string as None
|
|
if sheet_filter == '' or sheet_filter == 'undefined':
|
|
sheet_filter = None
|
|
|
|
if not Path(file_path).exists():
|
|
return jsonify({'error': f'File not found: {file_path}'}), 400
|
|
|
|
# Create comparator and analyze
|
|
comparator_instance = KSTCoordiComparator(file_path)
|
|
if not comparator_instance.load_data():
|
|
return jsonify({'error': 'Failed to load Excel data'}), 500
|
|
|
|
# Debug: Print available sheets
|
|
available_sheets = list(comparator_instance.data.keys())
|
|
print(f"Available sheets: {available_sheets}")
|
|
print(f"Requested sheet_filter: {repr(sheet_filter)}")
|
|
|
|
# Get comparison results with optional sheet filtering
|
|
comparison_results = comparator_instance.get_comparison_summary(sheet_filter)
|
|
|
|
# Get matched items from the grouped data
|
|
matched_items_data = []
|
|
for title, items in comparison_results['grouped_by_title']['matched_by_title'].items():
|
|
for item in items[:500]: # Limit for performance
|
|
matched_items_data.append({
|
|
'title': item['title'],
|
|
'episode': item['episode'],
|
|
'sheet': item['sheet'],
|
|
'row': item['row_index'] + 1 if item['row_index'] is not None else 'N/A',
|
|
'reason': 'Perfect match'
|
|
})
|
|
|
|
# Add matched data to results
|
|
comparison_results['matched_data'] = matched_items_data
|
|
|
|
# Generate visualize data
|
|
visualize_data = comparator_instance.generate_visualize_data(sheet_filter)
|
|
comparison_results['visualize_data'] = visualize_data
|
|
|
|
# Get dynamic coordi label for display
|
|
coordi_label = comparator_instance.get_coordi_label_for_sheet(sheet_filter)
|
|
comparison_results['coordi_label'] = coordi_label
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'results': comparison_results
|
|
})
|
|
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/upload', methods=['POST'])
|
|
def upload_file():
|
|
try:
|
|
if 'file' not in request.files:
|
|
return jsonify({'error': 'No file provided'}), 400
|
|
|
|
file = request.files['file']
|
|
if file.filename == '':
|
|
return jsonify({'error': 'No file selected'}), 400
|
|
|
|
if file and file.filename.lower().endswith(('.xlsx', '.xls')):
|
|
# Save uploaded file
|
|
filename = secure_filename(file.filename)
|
|
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
|
file.save(file_path)
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'file_path': file_path,
|
|
'filename': filename
|
|
})
|
|
else:
|
|
return jsonify({'error': 'Please upload an Excel file (.xlsx or .xls)'}), 400
|
|
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/get_results')
|
|
def get_results():
|
|
if comparison_results is None:
|
|
return jsonify({'error': 'No analysis results available'}), 404
|
|
return jsonify(comparison_results)
|
|
|
|
@app.route('/get_sheets', methods=['POST'])
|
|
def get_sheets():
|
|
"""Get available sheet names from an Excel file"""
|
|
try:
|
|
file_path = request.json.get('file_path', 'data/sample-data.xlsx')
|
|
|
|
if not Path(file_path).exists():
|
|
return jsonify({'error': f'File not found: {file_path}'}), 400
|
|
|
|
# Create comparator and load data to get sheet names
|
|
temp_comparator = KSTCoordiComparator(file_path)
|
|
if not temp_comparator.load_data():
|
|
return jsonify({'error': 'Failed to load Excel data'}), 500
|
|
|
|
available_sheets = list(temp_comparator.data.keys())
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'sheets': available_sheets,
|
|
'default_sheet': available_sheets[0] if available_sheets else None
|
|
})
|
|
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/download_excel')
|
|
def download_excel():
|
|
"""Generate and download Excel file with all sheets in visualize format"""
|
|
global comparator_instance
|
|
|
|
try:
|
|
if not comparator_instance:
|
|
return jsonify({'error': 'No data available. Please analyze data first.'}), 400
|
|
|
|
# Generate export data for all sheets
|
|
export_data = comparator_instance.generate_excel_export_data()
|
|
|
|
if not export_data:
|
|
return jsonify({'error': 'No data available for export'}), 400
|
|
|
|
# Create temporary Excel file
|
|
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
|
|
temp_path = temp_file.name
|
|
temp_file.close()
|
|
|
|
try:
|
|
# Create Excel writer with multiple sheets
|
|
with pd.ExcelWriter(temp_path, engine='openpyxl') as writer:
|
|
for sheet_name, sheet_data in export_data.items():
|
|
if sheet_data: # Only create sheet if there's data
|
|
df = pd.DataFrame(sheet_data)
|
|
|
|
# Clean sheet name for Excel (remove invalid characters)
|
|
clean_sheet_name = str(sheet_name).replace('/', '_').replace('\\', '_')[:31]
|
|
|
|
df.to_excel(writer, sheet_name=clean_sheet_name, index=False)
|
|
|
|
# Get the workbook and worksheet to apply formatting
|
|
workbook = writer.book
|
|
worksheet = writer.sheets[clean_sheet_name]
|
|
|
|
# Apply professional color formatting based on Type column
|
|
from openpyxl.styles import PatternFill, Font
|
|
|
|
# Define professional colors matching the new web interface design system
|
|
colors = {
|
|
'Coordi Only': {
|
|
'fill': PatternFill(start_color='F0F9FF', end_color='F0F9FF', fill_type='solid'),
|
|
'font': Font(color='0C4A6E', bold=True)
|
|
},
|
|
'Kst Only': {
|
|
'fill': PatternFill(start_color='F7FEE7', end_color='F7FEE7', fill_type='solid'),
|
|
'font': Font(color='365314', bold=True)
|
|
},
|
|
'Mixed Duplicate': {
|
|
'fill': PatternFill(start_color='FEFBEB', end_color='FEFBEB', fill_type='solid'),
|
|
'font': Font(color='92400E', bold=True)
|
|
},
|
|
'Pure Duplicate': {
|
|
'fill': PatternFill(start_color='FEF2F2', end_color='FEF2F2', fill_type='solid'),
|
|
'font': Font(color='991B1B', bold=True)
|
|
},
|
|
'Matched': {
|
|
'fill': PatternFill(start_color='FFFFFF', end_color='FFFFFF', fill_type='solid'),
|
|
'font': Font(color='374151', bold=False)
|
|
}
|
|
}
|
|
|
|
# Find the Type column (should be column F, index 5)
|
|
type_col_idx = None
|
|
for idx, col in enumerate(df.columns):
|
|
if col == 'Type':
|
|
type_col_idx = idx + 1 # Excel is 1-indexed
|
|
break
|
|
|
|
# Apply professional formatting to data rows (skip header)
|
|
if type_col_idx:
|
|
for row_idx, row_data in enumerate(sheet_data, start=2): # Start from row 2 (after header)
|
|
row_type = row_data.get('Type', '')
|
|
style_config = colors.get(row_type)
|
|
if style_config:
|
|
for col_idx in range(1, len(df.columns) + 1):
|
|
cell = worksheet.cell(row=row_idx, column=col_idx)
|
|
cell.fill = style_config['fill']
|
|
cell.font = style_config['font']
|
|
|
|
# Auto-adjust column widths
|
|
for column in worksheet.columns:
|
|
max_length = 0
|
|
column_letter = column[0].column_letter
|
|
for cell in column:
|
|
try:
|
|
if len(str(cell.value)) > max_length:
|
|
max_length = len(str(cell.value))
|
|
except:
|
|
pass
|
|
adjusted_width = min(max_length + 2, 50) # Cap at 50 characters
|
|
worksheet.column_dimensions[column_letter].width = adjusted_width
|
|
|
|
# Send file for download
|
|
return send_file(
|
|
temp_path,
|
|
as_attachment=True,
|
|
download_name='data_comparison_export.xlsx',
|
|
mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
|
)
|
|
|
|
finally:
|
|
# Clean up temporary file after a delay (Flask handles this)
|
|
pass
|
|
|
|
except Exception as e:
|
|
return jsonify({'error': f'Export failed: {str(e)}'}), 500
|
|
|
|
def create_templates_dir():
|
|
"""Create templates directory and HTML file"""
|
|
templates_dir = Path('templates')
|
|
templates_dir.mkdir(exist_ok=True)
|
|
|
|
# Only create the HTML file if it doesn't exist (don't overwrite existing customizations)
|
|
html_file = templates_dir / 'index.html'
|
|
if html_file.exists():
|
|
print("Template file already exists, skipping auto-generation to preserve customizations.")
|
|
return
|
|
|
|
html_content = '''<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>KST vs Coordi Data Comparison</title>
|
|
<style>
|
|
body {
|
|
font-family: Arial, sans-serif;
|
|
margin: 0;
|
|
padding: 20px;
|
|
background-color: #f5f5f5;
|
|
}
|
|
.container {
|
|
max-width: 1400px;
|
|
margin: 0 auto;
|
|
background: white;
|
|
padding: 20px;
|
|
border-radius: 8px;
|
|
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
|
}
|
|
h1 {
|
|
text-align: center;
|
|
color: #333;
|
|
margin-bottom: 30px;
|
|
}
|
|
.file-section {
|
|
background: #f8f9fa;
|
|
padding: 20px;
|
|
border-radius: 6px;
|
|
margin-bottom: 20px;
|
|
}
|
|
.file-input {
|
|
display: flex;
|
|
gap: 10px;
|
|
align-items: center;
|
|
margin-bottom: 10px;
|
|
}
|
|
input[type="text"], input[type="file"] {
|
|
flex: 1;
|
|
padding: 8px 12px;
|
|
border: 1px solid #ddd;
|
|
border-radius: 4px;
|
|
}
|
|
input[type="file"] {
|
|
padding: 6px 8px;
|
|
}
|
|
button {
|
|
padding: 8px 16px;
|
|
background: #007bff;
|
|
color: white;
|
|
border: none;
|
|
border-radius: 4px;
|
|
cursor: pointer;
|
|
}
|
|
button:hover {
|
|
background: #0056b3;
|
|
}
|
|
button:disabled {
|
|
background: #6c757d;
|
|
cursor: not-allowed;
|
|
}
|
|
.loading {
|
|
text-align: center;
|
|
color: #666;
|
|
font-style: italic;
|
|
}
|
|
.tabs {
|
|
border-bottom: 2px solid #ddd;
|
|
margin-bottom: 20px;
|
|
}
|
|
.tab {
|
|
display: inline-block;
|
|
padding: 10px 20px;
|
|
background: #f8f9fa;
|
|
border: 1px solid #ddd;
|
|
border-bottom: none;
|
|
cursor: pointer;
|
|
margin-right: 5px;
|
|
border-radius: 4px 4px 0 0;
|
|
}
|
|
.tab.active {
|
|
background: white;
|
|
border-bottom: 2px solid white;
|
|
margin-bottom: -2px;
|
|
}
|
|
.tab-content {
|
|
display: none;
|
|
}
|
|
.tab-content.active {
|
|
display: block;
|
|
}
|
|
.summary-grid {
|
|
display: grid;
|
|
grid-template-columns: 1fr 1fr;
|
|
gap: 20px;
|
|
margin-bottom: 20px;
|
|
}
|
|
.summary-card {
|
|
background: #f8f9fa;
|
|
padding: 15px;
|
|
border-radius: 6px;
|
|
border-left: 4px solid #007bff;
|
|
}
|
|
.summary-card h3 {
|
|
margin-top: 0;
|
|
margin-bottom: 15px;
|
|
color: #333;
|
|
font-size: 1.1em;
|
|
}
|
|
.summary-card p {
|
|
margin: 8px 0;
|
|
color: #555;
|
|
}
|
|
.summary-card span {
|
|
font-weight: bold;
|
|
color: #007bff;
|
|
}
|
|
.count-badge {
|
|
display: inline-block;
|
|
background: #007bff;
|
|
color: white;
|
|
padding: 4px 8px;
|
|
border-radius: 12px;
|
|
font-size: 0.9em;
|
|
margin-left: 10px;
|
|
}
|
|
.reconciliation {
|
|
background: #d4edda;
|
|
border: 1px solid #c3e6cb;
|
|
padding: 15px;
|
|
border-radius: 6px;
|
|
margin-top: 15px;
|
|
}
|
|
.reconciliation.mismatch {
|
|
background: #f8d7da;
|
|
border-color: #f5c6cb;
|
|
}
|
|
table {
|
|
width: 100%;
|
|
border-collapse: collapse;
|
|
margin-top: 10px;
|
|
}
|
|
th, td {
|
|
padding: 10px;
|
|
text-align: left;
|
|
border-bottom: 1px solid #ddd;
|
|
}
|
|
th {
|
|
background-color: #f8f9fa;
|
|
font-weight: bold;
|
|
}
|
|
tr:hover {
|
|
background-color: #f5f5f5;
|
|
}
|
|
.error {
|
|
background: #f8d7da;
|
|
color: #721c24;
|
|
padding: 15px;
|
|
border-radius: 6px;
|
|
margin: 10px 0;
|
|
}
|
|
.success {
|
|
background: #d4edda;
|
|
color: #155724;
|
|
padding: 15px;
|
|
border-radius: 6px;
|
|
margin: 10px 0;
|
|
}
|
|
.table-container {
|
|
max-height: 500px;
|
|
overflow-y: auto;
|
|
border: 1px solid #ddd;
|
|
border-radius: 4px;
|
|
}
|
|
|
|
/* Professional UI Design System - Semantic Color Palette */
|
|
.coordi-only-row {
|
|
background-color: #f0f9ff !important; /* Sky blue 50 - Information state */
|
|
color: #0c4a6e !important; /* Sky blue 900 - High contrast text */
|
|
border-left: 4px solid #0ea5e9 !important; /* Sky blue 500 - Primary accent */
|
|
font-weight: 500;
|
|
}
|
|
|
|
.kst-only-row {
|
|
background-color: #f7fee7 !important; /* Lime 50 - Success/Available state */
|
|
color: #365314 !important; /* Lime 900 - High contrast text */
|
|
border-left: 4px solid #65a30d !important; /* Lime 600 - Success accent */
|
|
font-weight: 500;
|
|
}
|
|
|
|
.mixed-duplicate-row {
|
|
background-color: #fefbeb !important; /* Amber 50 - Warning state */
|
|
color: #92400e !important; /* Amber 800 - High contrast text */
|
|
border-left: 4px solid #f59e0b !important; /* Amber 500 - Warning accent */
|
|
font-weight: 500;
|
|
}
|
|
|
|
.pure-duplicate-row {
|
|
background-color: #fef2f2 !important; /* Red 50 - Error/Critical state */
|
|
color: #991b1b !important; /* Red 800 - High contrast text */
|
|
border-left: 4px solid #ef4444 !important; /* Red 500 - Error accent */
|
|
font-weight: 500;
|
|
}
|
|
|
|
.matched-row {
|
|
background-color: #ffffff !important; /* Pure white - Neutral/Default state */
|
|
color: #374151 !important; /* Gray 700 - Standard text */
|
|
border-left: 4px solid #10b981 !important; /* Emerald 500 - Success indicator */
|
|
font-weight: 400;
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="container">
|
|
<h1>KST vs Coordi Data Comparison Tool</h1>
|
|
|
|
<div class="file-section">
|
|
<div class="file-input">
|
|
<label for="filePath">Excel File Path:</label>
|
|
<input type="text" id="filePath" value="data/sample-data.xlsx" placeholder="Enter file path">
|
|
<button onclick="analyzeData()" id="analyzeBtn">Analyze Data</button>
|
|
</div>
|
|
<div class="file-input" style="margin-top: 10px;">
|
|
<label>Or Upload File:</label>
|
|
<input type="file" id="fileUpload" accept=".xlsx,.xls" onchange="handleFileUpload()">
|
|
<button onclick="uploadAndAnalyze()" id="uploadBtn" disabled>Upload & Analyze</button>
|
|
</div>
|
|
<div class="file-input" style="margin-top: 10px;">
|
|
<label for="sheetFilter">Sheet Filter:</label>
|
|
<select id="sheetFilter" onchange="filterBySheet()" disabled>
|
|
<!-- Options will be populated dynamically -->
|
|
</select>
|
|
</div>
|
|
<div id="status"></div>
|
|
</div>
|
|
|
|
<div id="results" style="display: none;">
|
|
<div class="tabs">
|
|
<div class="tab active" onclick="showTab('summary')">Summary</div>
|
|
<div class="tab" onclick="showTab('different')">Different</div>
|
|
<div class="tab" onclick="showTab('visualize')">Visualize</div>
|
|
</div>
|
|
|
|
<div id="summary" class="tab-content active">
|
|
<!-- Summary Cards Section -->
|
|
<div class="summary-grid">
|
|
<div class="summary-card">
|
|
<h3>📊 Sheet Summary</h3>
|
|
<p><strong>Current Sheet:</strong> <span id="current-sheet-name">-</span></p>
|
|
<p><strong>Matched Items:</strong> <span id="summary-matched-count">0</span> (Same in both KST and Coordi)</p>
|
|
<p><strong>Different Items:</strong> <span id="summary-different-count">0</span> (Total tasks excluding matched items)</p>
|
|
</div>
|
|
<div class="summary-card">
|
|
<h3>🔍 Breakdown</h3>
|
|
<p><strong>KST Only:</strong> <span id="summary-kst-only">0</span></p>
|
|
<p><strong>Coordi Only:</strong> <span id="summary-coordi-only">0</span></p>
|
|
<p><strong>Duplicates:</strong> <span id="summary-duplicates">0</span></p>
|
|
</div>
|
|
</div>
|
|
|
|
<h3>Matched Items (Same in both KST and Coordi) <span id="matched-count-display" class="count-badge">0</span></h3>
|
|
<div class="table-container">
|
|
<table>
|
|
<thead>
|
|
<tr>
|
|
<th>Korean Title</th>
|
|
<th>Episode</th>
|
|
<th>Sheet</th>
|
|
<th>Row</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody id="summary-table">
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="different" class="tab-content">
|
|
<h3>Different Items <span id="different-count-display" class="count-badge">0</span></h3>
|
|
<div class="table-container">
|
|
<table>
|
|
<thead>
|
|
<tr>
|
|
<th>KST Data</th>
|
|
<th>Coordi Data</th>
|
|
<th>Reason</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody id="different-table">
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="visualize" class="tab-content">
|
|
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 15px;">
|
|
<h3>Excel-like Visualization</h3>
|
|
<button onclick="downloadExcel()" id="downloadBtn" style="background: #28a745; padding: 8px 16px;">📥 Download All Sheets</button>
|
|
</div>
|
|
<div class="table-container">
|
|
<table id="visualize-table">
|
|
<thead>
|
|
<tr>
|
|
<th id="coordi-lang-header">Coordi Lang</th>
|
|
<th id="coordi-title-header">Coordi Title</th>
|
|
<th id="coordi-chapter-header">Coordi Chapter</th>
|
|
<th>KST Lang</th>
|
|
<th>KST Title</th>
|
|
<th>KST Chapter</th>
|
|
<th>Status</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody id="visualize-table-body">
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<script>
|
|
function showTab(tabName) {
|
|
// Hide all tab contents
|
|
document.querySelectorAll('.tab-content').forEach(content => {
|
|
content.classList.remove('active');
|
|
});
|
|
|
|
// Remove active class from all tabs
|
|
document.querySelectorAll('.tab').forEach(tab => {
|
|
tab.classList.remove('active');
|
|
});
|
|
|
|
// Show selected tab content
|
|
document.getElementById(tabName).classList.add('active');
|
|
|
|
// Add active class to clicked tab
|
|
event.target.classList.add('active');
|
|
}
|
|
|
|
function analyzeData() {
|
|
const filePath = document.getElementById('filePath').value;
|
|
const sheetFilterElement = document.getElementById('sheetFilter');
|
|
const sheetFilter = sheetFilterElement.value || null; // Use null if empty
|
|
const statusDiv = document.getElementById('status');
|
|
const analyzeBtn = document.getElementById('analyzeBtn');
|
|
|
|
if (!filePath.trim()) {
|
|
statusDiv.innerHTML = '<div class="error">Please enter a file path</div>';
|
|
return;
|
|
}
|
|
|
|
// Show loading state
|
|
statusDiv.innerHTML = '<div class="loading">Analyzing data...</div>';
|
|
analyzeBtn.disabled = true;
|
|
analyzeBtn.textContent = 'Analyzing...';
|
|
|
|
// Make API call
|
|
fetch('/analyze', {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
file_path: filePath,
|
|
sheet_filter: sheetFilter
|
|
})
|
|
})
|
|
.then(response => response.json())
|
|
.then(data => {
|
|
if (data.success) {
|
|
statusDiv.innerHTML = '<div class="success">Analysis complete!</div>';
|
|
updateResults(data.results);
|
|
updateSheetFilter(data.results.sheet_names, data.results.current_sheet_filter);
|
|
document.getElementById('results').style.display = 'block';
|
|
} else {
|
|
statusDiv.innerHTML = `<div class="error">Error: ${data.error}</div>`;
|
|
}
|
|
})
|
|
.catch(error => {
|
|
statusDiv.innerHTML = `<div class="error">Error: ${error.message}</div>`;
|
|
})
|
|
.finally(() => {
|
|
analyzeBtn.disabled = false;
|
|
analyzeBtn.textContent = 'Analyze Data';
|
|
});
|
|
}
|
|
|
|
function updateSheetFilter(sheetNames, currentFilter) {
|
|
const select = document.getElementById('sheetFilter');
|
|
select.innerHTML = '';
|
|
|
|
// Add a default option if no sheets are available yet
|
|
if (!sheetNames || sheetNames.length === 0) {
|
|
const option = document.createElement('option');
|
|
option.value = '';
|
|
option.textContent = 'Loading sheets...';
|
|
option.disabled = true;
|
|
option.selected = true;
|
|
select.appendChild(option);
|
|
select.disabled = true;
|
|
return;
|
|
}
|
|
|
|
sheetNames.forEach((sheetName, index) => {
|
|
const option = document.createElement('option');
|
|
option.value = sheetName;
|
|
option.textContent = sheetName;
|
|
// Select the first sheet by default, or the current filter if specified
|
|
if (sheetName === currentFilter || (!currentFilter && index === 0)) {
|
|
option.selected = true;
|
|
}
|
|
select.appendChild(option);
|
|
});
|
|
|
|
select.disabled = false;
|
|
}
|
|
|
|
function filterBySheet() {
|
|
// Re-analyze with the selected sheet filter
|
|
analyzeData();
|
|
}
|
|
|
|
function handleFileUpload() {
|
|
const fileInput = document.getElementById('fileUpload');
|
|
const uploadBtn = document.getElementById('uploadBtn');
|
|
|
|
if (fileInput.files.length > 0) {
|
|
uploadBtn.disabled = false;
|
|
uploadBtn.textContent = 'Upload & Analyze';
|
|
} else {
|
|
uploadBtn.disabled = true;
|
|
}
|
|
}
|
|
|
|
function uploadAndAnalyze() {
|
|
const fileInput = document.getElementById('fileUpload');
|
|
const statusDiv = document.getElementById('status');
|
|
const uploadBtn = document.getElementById('uploadBtn');
|
|
|
|
if (fileInput.files.length === 0) {
|
|
statusDiv.innerHTML = '<div class="error">Please select a file to upload</div>';
|
|
return;
|
|
}
|
|
|
|
const file = fileInput.files[0];
|
|
const formData = new FormData();
|
|
formData.append('file', file);
|
|
|
|
// Show uploading state
|
|
statusDiv.innerHTML = '<div class="loading">Uploading and analyzing file...</div>';
|
|
uploadBtn.disabled = true;
|
|
uploadBtn.textContent = 'Uploading...';
|
|
|
|
// Upload file
|
|
fetch('/upload', {
|
|
method: 'POST',
|
|
body: formData
|
|
})
|
|
.then(response => response.json())
|
|
.then(data => {
|
|
if (data.success) {
|
|
// File uploaded successfully, now analyze it
|
|
document.getElementById('filePath').value = data.file_path;
|
|
statusDiv.innerHTML = '<div class="loading">File uploaded! Analyzing data...</div>';
|
|
|
|
// Clear sheet filter for new file (let it default to first sheet)
|
|
const sheetFilterElement = document.getElementById('sheetFilter');
|
|
const sheetFilter = null; // Always use default (first sheet) for new uploads
|
|
return fetch('/analyze', {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
file_path: data.file_path,
|
|
sheet_filter: sheetFilter
|
|
})
|
|
});
|
|
} else {
|
|
throw new Error(data.error);
|
|
}
|
|
})
|
|
.then(response => response.json())
|
|
.then(data => {
|
|
if (data.success) {
|
|
statusDiv.innerHTML = '<div class="success">File uploaded and analyzed successfully!</div>';
|
|
updateResults(data.results);
|
|
updateSheetFilter(data.results.sheet_names, data.results.current_sheet_filter);
|
|
document.getElementById('results').style.display = 'block';
|
|
} else {
|
|
statusDiv.innerHTML = `<div class="error">Analysis error: ${data.error}</div>`;
|
|
}
|
|
})
|
|
.catch(error => {
|
|
statusDiv.innerHTML = `<div class="error">Upload error: ${error.message}</div>`;
|
|
})
|
|
.finally(() => {
|
|
uploadBtn.disabled = false;
|
|
uploadBtn.textContent = 'Upload & Analyze';
|
|
handleFileUpload(); // Reset button state based on file selection
|
|
});
|
|
}
|
|
|
|
function updateResults(results) {
|
|
// Update dynamic headers with coordi label
|
|
const coordiLabel = results.coordi_label || 'Coordi';
|
|
document.getElementById('coordi-lang-header').textContent = `${coordiLabel} Lang`;
|
|
document.getElementById('coordi-title-header').textContent = `${coordiLabel} Title`;
|
|
document.getElementById('coordi-chapter-header').textContent = `${coordiLabel} Chapter`;
|
|
|
|
// Update count displays
|
|
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
|
|
|
|
// Count all different items including duplicates and mixed duplicates
|
|
const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count +
|
|
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
|
|
(results.mismatches.mixed_duplicates_count || 0);
|
|
document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
|
|
|
|
// Update summary section
|
|
document.getElementById('current-sheet-name').textContent = results.current_sheet_filter;
|
|
document.getElementById('summary-matched-count').textContent = results.matched_items_count.toLocaleString();
|
|
document.getElementById('summary-different-count').textContent = totalDifferent.toLocaleString();
|
|
document.getElementById('summary-kst-only').textContent = results.mismatches.kst_only_count.toLocaleString();
|
|
document.getElementById('summary-coordi-only').textContent = results.mismatches.coordi_only_count.toLocaleString();
|
|
|
|
// Calculate total duplicates (KST + Coordi + Mixed)
|
|
const totalDuplicates = results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
|
|
(results.mismatches.mixed_duplicates_count || 0);
|
|
document.getElementById('summary-duplicates').textContent = totalDuplicates.toLocaleString();
|
|
|
|
// Update Summary tab (matched items)
|
|
updateSummaryTable(results.matched_data);
|
|
|
|
// Update Different tab
|
|
updateDifferentTable(results.mismatch_details);
|
|
|
|
// Update Visualize tab
|
|
updateVisualizeTable(results.visualize_data);
|
|
}
|
|
|
|
function updateSummaryTable(matchedData) {
|
|
const tbody = document.getElementById('summary-table');
|
|
tbody.innerHTML = '';
|
|
|
|
// Sort by Korean title + episode
|
|
const sortedData = [...matchedData].sort((a, b) => {
|
|
const titleCompare = a.title.localeCompare(b.title, 'ko');
|
|
if (titleCompare !== 0) return titleCompare;
|
|
|
|
// Try to sort episodes numerically
|
|
const aEp = parseFloat(a.episode) || 0;
|
|
const bEp = parseFloat(b.episode) || 0;
|
|
return aEp - bEp;
|
|
});
|
|
|
|
sortedData.forEach(item => {
|
|
const row = tbody.insertRow();
|
|
row.insertCell(0).textContent = item.title;
|
|
row.insertCell(1).textContent = item.episode;
|
|
row.insertCell(2).textContent = item.sheet;
|
|
row.insertCell(3).textContent = item.row_index ? item.row_index + 1 : item.row;
|
|
});
|
|
}
|
|
|
|
function updateDifferentTable(mismatchDetails) {
|
|
const tbody = document.getElementById('different-table');
|
|
tbody.innerHTML = '';
|
|
|
|
const allDifferences = [];
|
|
|
|
// Add KST-only items (no special highlighting)
|
|
mismatchDetails.kst_only.forEach(item => {
|
|
allDifferences.push({
|
|
kstData: `${item.title} - Episode ${item.episode}`,
|
|
coordiData: '',
|
|
reason: 'Only appears in KST',
|
|
sortTitle: item.title,
|
|
sortEpisode: parseFloat(item.episode) || 0,
|
|
highlightType: 'none'
|
|
});
|
|
});
|
|
|
|
// Add Coordi-only items (no special highlighting)
|
|
mismatchDetails.coordi_only.forEach(item => {
|
|
allDifferences.push({
|
|
kstData: '',
|
|
coordiData: `${item.title} - Episode ${item.episode}`,
|
|
reason: 'Only appears in Coordi',
|
|
sortTitle: item.title,
|
|
sortEpisode: parseFloat(item.episode) || 0,
|
|
highlightType: 'none'
|
|
});
|
|
});
|
|
|
|
// Add KST duplicates (red highlighting)
|
|
mismatchDetails.kst_duplicates.forEach(item => {
|
|
allDifferences.push({
|
|
kstData: `${item.title} - Episode ${item.episode}`,
|
|
coordiData: '',
|
|
reason: 'Duplicate entry in KST data',
|
|
sortTitle: item.title,
|
|
sortEpisode: parseFloat(item.episode) || 0,
|
|
highlightType: 'red'
|
|
});
|
|
});
|
|
|
|
// Add Coordi duplicates (red highlighting)
|
|
mismatchDetails.coordi_duplicates.forEach(item => {
|
|
allDifferences.push({
|
|
kstData: '',
|
|
coordiData: `${item.title} - Episode ${item.episode}`,
|
|
reason: 'Duplicate entry in Coordi data',
|
|
sortTitle: item.title,
|
|
sortEpisode: parseFloat(item.episode) || 0,
|
|
highlightType: 'red'
|
|
});
|
|
});
|
|
|
|
// Add mixed duplicates (yellow highlighting)
|
|
if (mismatchDetails.mixed_duplicates) {
|
|
mismatchDetails.mixed_duplicates.forEach(item => {
|
|
allDifferences.push({
|
|
kstData: item.duplicate_side === 'KST' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
|
|
coordiData: item.duplicate_side === 'COORDI' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
|
|
reason: item.reason,
|
|
sortTitle: item.title,
|
|
sortEpisode: parseFloat(item.episode) || 0,
|
|
highlightType: 'yellow'
|
|
});
|
|
});
|
|
}
|
|
|
|
// Sort by Korean title + episode
|
|
allDifferences.sort((a, b) => {
|
|
const titleCompare = a.sortTitle.localeCompare(b.sortTitle, 'ko');
|
|
if (titleCompare !== 0) return titleCompare;
|
|
return a.sortEpisode - b.sortEpisode;
|
|
});
|
|
|
|
// Populate table with highlighting
|
|
allDifferences.forEach(diff => {
|
|
const row = tbody.insertRow();
|
|
row.insertCell(0).textContent = diff.kstData;
|
|
row.insertCell(1).textContent = diff.coordiData;
|
|
row.insertCell(2).textContent = diff.reason;
|
|
|
|
// Apply highlighting based on type
|
|
if (diff.highlightType === 'red') {
|
|
row.style.backgroundColor = '#f8d7da'; // Light red
|
|
row.title = 'Pure duplicate entry';
|
|
} else if (diff.highlightType === 'yellow') {
|
|
row.style.backgroundColor = '#fff3cd'; // Light yellow
|
|
row.title = 'Item exists in both datasets but has duplicates on one side';
|
|
}
|
|
});
|
|
}
|
|
|
|
function updateVisualizeTable(visualizeData) {
|
|
const tbody = document.getElementById('visualize-table-body');
|
|
tbody.innerHTML = '';
|
|
|
|
// Data is already sorted by the backend (mismatches first, then matches, all by Korean title)
|
|
visualizeData.forEach(row => {
|
|
const tr = tbody.insertRow();
|
|
tr.insertCell(0).textContent = row.coordi_language || '';
|
|
tr.insertCell(1).textContent = row.coordi_title || '';
|
|
tr.insertCell(2).textContent = row.coordi_chapter || '';
|
|
tr.insertCell(3).textContent = row.kst_language || '';
|
|
tr.insertCell(4).textContent = row.kst_title || '';
|
|
tr.insertCell(5).textContent = row.kst_chapter || '';
|
|
tr.insertCell(6).textContent = row.reason || '';
|
|
|
|
// Apply vibrant color highlighting based on row type
|
|
switch (row.row_type) {
|
|
case 'coordi_only':
|
|
tr.className = 'coordi-only-row';
|
|
break;
|
|
case 'kst_only':
|
|
tr.className = 'kst-only-row';
|
|
break;
|
|
case 'mixed_duplicate':
|
|
tr.className = 'mixed-duplicate-row';
|
|
break;
|
|
case 'pure_duplicate':
|
|
tr.className = 'pure-duplicate-row';
|
|
break;
|
|
case 'matched':
|
|
tr.className = 'matched-row';
|
|
break;
|
|
}
|
|
});
|
|
}
|
|
|
|
function downloadExcel() {
|
|
const downloadBtn = document.getElementById('downloadBtn');
|
|
const originalText = downloadBtn.textContent;
|
|
|
|
// Show loading state
|
|
downloadBtn.disabled = true;
|
|
downloadBtn.textContent = '⏳ Generating...';
|
|
downloadBtn.style.background = '#6c757d';
|
|
|
|
// Create a temporary link and trigger download
|
|
fetch('/download_excel', {
|
|
method: 'GET'
|
|
})
|
|
.then(response => {
|
|
if (!response.ok) {
|
|
throw new Error('Download failed');
|
|
}
|
|
return response.blob();
|
|
})
|
|
.then(blob => {
|
|
// Create download link
|
|
const url = window.URL.createObjectURL(blob);
|
|
const a = document.createElement('a');
|
|
a.style.display = 'none';
|
|
a.href = url;
|
|
a.download = 'data_comparison_export.xlsx';
|
|
document.body.appendChild(a);
|
|
a.click();
|
|
window.URL.revokeObjectURL(url);
|
|
document.body.removeChild(a);
|
|
|
|
// Show success message
|
|
const statusDiv = document.getElementById('status');
|
|
statusDiv.innerHTML = '<div class="success">Excel file downloaded successfully!</div>';
|
|
setTimeout(() => {
|
|
statusDiv.innerHTML = '';
|
|
}, 3000);
|
|
})
|
|
.catch(error => {
|
|
console.error('Download error:', error);
|
|
const statusDiv = document.getElementById('status');
|
|
statusDiv.innerHTML = '<div class="error">Download failed. Please try again.</div>';
|
|
setTimeout(() => {
|
|
statusDiv.innerHTML = '';
|
|
}, 5000);
|
|
})
|
|
.finally(() => {
|
|
// Reset button state
|
|
downloadBtn.disabled = false;
|
|
downloadBtn.textContent = originalText;
|
|
downloadBtn.style.background = '#28a745';
|
|
});
|
|
}
|
|
|
|
// Auto-analyze on page load with default file
|
|
window.onload = function() {
|
|
// Initialize sheet filter with loading state
|
|
updateSheetFilter([], null);
|
|
analyzeData();
|
|
};
|
|
</script>
|
|
</body>
|
|
</html>'''
|
|
|
|
html_file = templates_dir / 'index.html'
|
|
html_file.write_text(html_content)
|
|
|
|
def main():
|
|
# Create templates directory and HTML file
|
|
create_templates_dir()
|
|
|
|
print("Starting web-based GUI...")
|
|
print("Open your browser and go to: http://localhost:8080")
|
|
app.run(debug=True, host='0.0.0.0', port=8080)
|
|
|
|
if __name__ == "__main__":
|
|
main() |