From 07b4a3f34fb8d20e441e5891077488864af27495 Mon Sep 17 00:00:00 2001 From: arthur Date: Thu, 21 Aug 2025 11:23:33 +0700 Subject: [PATCH] map column index base --- CLAUDE.md | 7 ++++- data/~$sample-data.xlsx | Bin 165 -> 0 bytes data_comparator.py | 55 ++++++++++++++++++++++++---------------- templates/index.html | 20 +++++++++++++-- 4 files changed, 57 insertions(+), 25 deletions(-) delete mode 100644 data/~$sample-data.xlsx diff --git a/CLAUDE.md b/CLAUDE.md index 40586fc..25ccc75 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -54,13 +54,18 @@ The project uses Python 3.13+ with uv for dependency management. Dependencies in The tool compares Excel data by: 1. **Sheet-specific analysis only** - No more "All Sheets" functionality, each sheet is analyzed independently -2. Finding columns by header names (not positions) +2. **Fixed column positions** - KST data from columns I & J, Coordi data from columns C & D 3. Extracting title+episode combinations from both datasets within the selected sheet 4. **Fixed duplicate detection** - Only items that appear multiple times within the same dataset are marked as duplicates 5. **Mixed duplicate priority** - Items that exist in both datasets but have duplicates on one side are prioritized over pure duplicates 6. Categorizing mismatches and calculating reconciliation 7. Displaying results with reasons for each discrepancy +## Column Mapping + +- **KST Data**: Column I (title) and Column J (chapter/episode) +- **Coordi Data**: Column C (title) and Column D (chapter/episode) + ### BA Confirmed Cases - **US URGENT**: `금수의 영역 - Episode 17`, `신결 - Episode 23` (Coordi duplicates), `트윈 가이드 - Episode 31` (mixed duplicate) - **TH URGENT**: `백라이트 - Episode 53-1x(휴재)` (KST duplicate, doesn't appear in Coordi) \ No newline at end of file diff --git a/data/~$sample-data.xlsx b/data/~$sample-data.xlsx deleted file mode 100644 index 7719d1a3cc6cf9cd6292cbf990e012792c57ff86..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 165 ucmd;fEGo$;Em8<6O3YIr9&j@_G88eCFk~>40%-+?5FnYzkOySZ0s;W-yb~<| diff --git a/data_comparator.py b/data_comparator.py index 6653806..0c78606 100644 --- a/data_comparator.py +++ b/data_comparator.py @@ -43,7 +43,7 @@ class KSTCoordiComparator: return False def extract_kst_coordi_items_for_sheet(self, sheet_name: str) -> Dict[str, Any]: - """Extract KST and Coordi items from a specific sheet using column header names""" + """Extract KST and Coordi items from a specific sheet using fixed column positions""" if sheet_name not in self.data: raise ValueError(f"Sheet '{sheet_name}' not found in data") @@ -57,32 +57,43 @@ class KSTCoordiComparator: kst_all_items = [] # Keep all items including duplicates coordi_all_items = [] # Keep all items including duplicates - # Find columns by header names - # KST columns: 'Title KR' and 'Epi.' - # Coordi columns: 'KR title' and 'Chap' + # Try fixed column positions first, then fall back to header names + # KST columns: I (index 8) for title, J (index 9) for chapter + # Coordi columns: C (index 2) for title, D (index 3) for chapter - kst_title_col = None - kst_episode_col = None - coordi_title_col = None - coordi_episode_col = None + kst_title_col_idx = 8 # Column I + kst_episode_col_idx = 9 # Column J + coordi_title_col_idx = 2 # Column C + coordi_episode_col_idx = 3 # Column D - # Find KST columns - for col in columns: - if col == 'Title KR': - kst_title_col = col - elif col == 'Epi.': - kst_episode_col = col + # Get column names by index (if they exist) + kst_title_col = columns[kst_title_col_idx] if len(columns) > kst_title_col_idx else None + kst_episode_col = columns[kst_episode_col_idx] if len(columns) > kst_episode_col_idx else None + coordi_title_col = columns[coordi_title_col_idx] if len(columns) > coordi_title_col_idx else None + coordi_episode_col = columns[coordi_episode_col_idx] if len(columns) > coordi_episode_col_idx else None - # Find Coordi columns - for col in columns: - if col == 'KR title': - coordi_title_col = col - elif col == 'Chap': - coordi_episode_col = col + # Fallback: search by header names if fixed positions don't work + if not kst_title_col or not kst_episode_col: + for i, col in enumerate(columns): + if col == 'Title KR': + kst_title_col = col + kst_title_col_idx = i + elif col == 'Epi.': + kst_episode_col = col + kst_episode_col_idx = i + + if not coordi_title_col or not coordi_episode_col: + for i, col in enumerate(columns): + if col == 'KR title': + coordi_title_col = col + coordi_title_col_idx = i + elif col == 'Chap': + coordi_episode_col = col + coordi_episode_col_idx = i print(f"Sheet: {sheet_name}") - print(f" KST columns - Title: {kst_title_col}, Episode: {kst_episode_col}") - print(f" Coordi columns - Title: {coordi_title_col}, Episode: {coordi_episode_col}") + print(f" KST columns - Title: Column {chr(65 + kst_title_col_idx) if kst_title_col else 'None'} ({kst_title_col}), Episode: Column {chr(65 + kst_episode_col_idx) if kst_episode_col else 'None'} ({kst_episode_col})") + print(f" Coordi columns - Title: Column {chr(65 + coordi_title_col_idx) if coordi_title_col else 'None'} ({coordi_title_col}), Episode: Column {chr(65 + coordi_episode_col_idx) if coordi_episode_col else 'None'} ({coordi_episode_col})") # Extract items from each row for idx, row in df.iterrows(): diff --git a/templates/index.html b/templates/index.html index 5961426..bb50abd 100644 --- a/templates/index.html +++ b/templates/index.html @@ -279,7 +279,8 @@ function analyzeData() { const filePath = document.getElementById('filePath').value; - const sheetFilter = document.getElementById('sheetFilter').value; + const sheetFilterElement = document.getElementById('sheetFilter'); + const sheetFilter = sheetFilterElement.value || null; // Use null if empty const statusDiv = document.getElementById('status'); const analyzeBtn = document.getElementById('analyzeBtn'); @@ -328,6 +329,18 @@ const select = document.getElementById('sheetFilter'); select.innerHTML = ''; + // Add a default option if no sheets are available yet + if (!sheetNames || sheetNames.length === 0) { + const option = document.createElement('option'); + option.value = ''; + option.textContent = 'Loading sheets...'; + option.disabled = true; + option.selected = true; + select.appendChild(option); + select.disabled = true; + return; + } + sheetNames.forEach((sheetName, index) => { const option = document.createElement('option'); option.value = sheetName; @@ -390,7 +403,8 @@ document.getElementById('filePath').value = data.file_path; statusDiv.innerHTML = '
File uploaded! Analyzing data...
'; - // Analyze the uploaded file (use default sheet for new uploads) + // Clear sheet filter for new file (let it default to first sheet) + const sheetFilterElement = document.getElementById('sheetFilter'); const sheetFilter = null; // Always use default (first sheet) for new uploads return fetch('/analyze', { method: 'POST', @@ -575,6 +589,8 @@ // Auto-analyze on page load with default file window.onload = function() { + // Initialize sheet filter with loading state + updateSheetFilter([], null); analyzeData(); }; -- 2.45.2