from flask import Flask, render_template, request, jsonify, send_file import json import os import tempfile import pandas as pd from pathlib import Path from werkzeug.utils import secure_filename from data_comparator import KSTCoordiComparator app = Flask(__name__) app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50MB max file size app.config['UPLOAD_FOLDER'] = tempfile.gettempdir() # Global variable to store comparison results comparison_results = None comparator_instance = None @app.route('/') def index(): return render_template('index.html') @app.route('/analyze', methods=['POST']) def analyze_data(): global comparison_results, comparator_instance try: file_path = request.json.get('file_path', 'data/sample-data.xlsx') sheet_filter = request.json.get('sheet_filter', None) # Handle empty string as None if sheet_filter == '' or sheet_filter == 'undefined': sheet_filter = None if not Path(file_path).exists(): return jsonify({'error': f'File not found: {file_path}'}), 400 # Create comparator and analyze comparator_instance = KSTCoordiComparator(file_path) if not comparator_instance.load_data(): return jsonify({'error': 'Failed to load Excel data'}), 500 # Debug: Print available sheets available_sheets = list(comparator_instance.data.keys()) print(f"Available sheets: {available_sheets}") print(f"Requested sheet_filter: {repr(sheet_filter)}") # Get comparison results with optional sheet filtering comparison_results = comparator_instance.get_comparison_summary(sheet_filter) # Get matched items from the grouped data matched_items_data = [] for title, items in comparison_results['grouped_by_title']['matched_by_title'].items(): for item in items[:500]: # Limit for performance matched_items_data.append({ 'title': item['title'], 'episode': item['episode'], 'sheet': item['sheet'], 'row': item['row_index'] + 1 if item['row_index'] is not None else 'N/A', 'reason': 'Perfect match' }) # Add matched data to results comparison_results['matched_data'] = matched_items_data # Generate visualize data visualize_data = comparator_instance.generate_visualize_data(sheet_filter) comparison_results['visualize_data'] = visualize_data return jsonify({ 'success': True, 'results': comparison_results }) except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/upload', methods=['POST']) def upload_file(): try: if 'file' not in request.files: return jsonify({'error': 'No file provided'}), 400 file = request.files['file'] if file.filename == '': return jsonify({'error': 'No file selected'}), 400 if file and file.filename.lower().endswith(('.xlsx', '.xls')): # Save uploaded file filename = secure_filename(file.filename) file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(file_path) return jsonify({ 'success': True, 'file_path': file_path, 'filename': filename }) else: return jsonify({'error': 'Please upload an Excel file (.xlsx or .xls)'}), 400 except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/get_results') def get_results(): if comparison_results is None: return jsonify({'error': 'No analysis results available'}), 404 return jsonify(comparison_results) @app.route('/get_sheets', methods=['POST']) def get_sheets(): """Get available sheet names from an Excel file""" try: file_path = request.json.get('file_path', 'data/sample-data.xlsx') if not Path(file_path).exists(): return jsonify({'error': f'File not found: {file_path}'}), 400 # Create comparator and load data to get sheet names temp_comparator = KSTCoordiComparator(file_path) if not temp_comparator.load_data(): return jsonify({'error': 'Failed to load Excel data'}), 500 available_sheets = list(temp_comparator.data.keys()) return jsonify({ 'success': True, 'sheets': available_sheets, 'default_sheet': available_sheets[0] if available_sheets else None }) except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/download_excel') def download_excel(): """Generate and download Excel file with all sheets in visualize format""" global comparator_instance try: if not comparator_instance: return jsonify({'error': 'No data available. Please analyze data first.'}), 400 # Generate export data for all sheets export_data = comparator_instance.generate_excel_export_data() if not export_data: return jsonify({'error': 'No data available for export'}), 400 # Create temporary Excel file temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') temp_path = temp_file.name temp_file.close() try: # Create Excel writer with multiple sheets with pd.ExcelWriter(temp_path, engine='openpyxl') as writer: for sheet_name, sheet_data in export_data.items(): if sheet_data: # Only create sheet if there's data df = pd.DataFrame(sheet_data) # Clean sheet name for Excel (remove invalid characters) clean_sheet_name = str(sheet_name).replace('/', '_').replace('\\', '_')[:31] df.to_excel(writer, sheet_name=clean_sheet_name, index=False) # Get the workbook and worksheet to apply formatting workbook = writer.book worksheet = writer.sheets[clean_sheet_name] # Apply color formatting based on Type column from openpyxl.styles import PatternFill # Define colors matching the web interface colors = { 'Coordi Only': PatternFill(start_color='FF4444', end_color='FF4444', fill_type='solid'), 'Kst Only': PatternFill(start_color='4488FF', end_color='4488FF', fill_type='solid'), 'Mixed Duplicate': PatternFill(start_color='FF8800', end_color='FF8800', fill_type='solid'), 'Pure Duplicate': PatternFill(start_color='8844FF', end_color='8844FF', fill_type='solid'), 'Matched': PatternFill(start_color='FFFFFF', end_color='FFFFFF', fill_type='solid') } # Find the Type column (should be column F, index 5) type_col_idx = None for idx, col in enumerate(df.columns): if col == 'Type': type_col_idx = idx + 1 # Excel is 1-indexed break # Apply formatting to data rows (skip header) if type_col_idx: for row_idx, row_data in enumerate(sheet_data, start=2): # Start from row 2 (after header) row_type = row_data.get('Type', '') fill = colors.get(row_type) if fill: for col_idx in range(1, len(df.columns) + 1): cell = worksheet.cell(row=row_idx, column=col_idx) cell.fill = fill # Auto-adjust column widths for column in worksheet.columns: max_length = 0 column_letter = column[0].column_letter for cell in column: try: if len(str(cell.value)) > max_length: max_length = len(str(cell.value)) except: pass adjusted_width = min(max_length + 2, 50) # Cap at 50 characters worksheet.column_dimensions[column_letter].width = adjusted_width # Send file for download return send_file( temp_path, as_attachment=True, download_name='data_comparison_export.xlsx', mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ) finally: # Clean up temporary file after a delay (Flask handles this) pass except Exception as e: return jsonify({'error': f'Export failed: {str(e)}'}), 500 def create_templates_dir(): """Create templates directory and HTML file""" templates_dir = Path('templates') templates_dir.mkdir(exist_ok=True) html_content = ''' KST vs Coordi Data Comparison

KST vs Coordi Data Comparison Tool

''' html_file = templates_dir / 'index.html' html_file.write_text(html_content) def main(): # Create templates directory and HTML file create_templates_dir() print("Starting web-based GUI...") print("Open your browser and go to: http://localhost:8080") app.run(debug=True, host='0.0.0.0', port=8080) if __name__ == "__main__": main()