map column index base #1

Merged
IDS-Viet merged 1 commits from dev/viettran into main 2025-08-21 04:24:12 +00:00
4 changed files with 57 additions and 25 deletions
Showing only changes of commit 07b4a3f34f - Show all commits

View File

@ -54,13 +54,18 @@ The project uses Python 3.13+ with uv for dependency management. Dependencies in
The tool compares Excel data by: The tool compares Excel data by:
1. **Sheet-specific analysis only** - No more "All Sheets" functionality, each sheet is analyzed independently 1. **Sheet-specific analysis only** - No more "All Sheets" functionality, each sheet is analyzed independently
2. Finding columns by header names (not positions) 2. **Fixed column positions** - KST data from columns I & J, Coordi data from columns C & D
3. Extracting title+episode combinations from both datasets within the selected sheet 3. Extracting title+episode combinations from both datasets within the selected sheet
4. **Fixed duplicate detection** - Only items that appear multiple times within the same dataset are marked as duplicates 4. **Fixed duplicate detection** - Only items that appear multiple times within the same dataset are marked as duplicates
5. **Mixed duplicate priority** - Items that exist in both datasets but have duplicates on one side are prioritized over pure duplicates 5. **Mixed duplicate priority** - Items that exist in both datasets but have duplicates on one side are prioritized over pure duplicates
6. Categorizing mismatches and calculating reconciliation 6. Categorizing mismatches and calculating reconciliation
7. Displaying results with reasons for each discrepancy 7. Displaying results with reasons for each discrepancy
## Column Mapping
- **KST Data**: Column I (title) and Column J (chapter/episode)
- **Coordi Data**: Column C (title) and Column D (chapter/episode)
### BA Confirmed Cases ### BA Confirmed Cases
- **US URGENT**: `금수의 영역 - Episode 17`, `신결 - Episode 23` (Coordi duplicates), `트윈 가이드 - Episode 31` (mixed duplicate) - **US URGENT**: `금수의 영역 - Episode 17`, `신결 - Episode 23` (Coordi duplicates), `트윈 가이드 - Episode 31` (mixed duplicate)
- **TH URGENT**: `백라이트 - Episode 53-1x(휴재)` (KST duplicate, doesn't appear in Coordi) - **TH URGENT**: `백라이트 - Episode 53-1x(휴재)` (KST duplicate, doesn't appear in Coordi)

Binary file not shown.

View File

@ -43,7 +43,7 @@ class KSTCoordiComparator:
return False return False
def extract_kst_coordi_items_for_sheet(self, sheet_name: str) -> Dict[str, Any]: def extract_kst_coordi_items_for_sheet(self, sheet_name: str) -> Dict[str, Any]:
"""Extract KST and Coordi items from a specific sheet using column header names""" """Extract KST and Coordi items from a specific sheet using fixed column positions"""
if sheet_name not in self.data: if sheet_name not in self.data:
raise ValueError(f"Sheet '{sheet_name}' not found in data") raise ValueError(f"Sheet '{sheet_name}' not found in data")
@ -57,32 +57,43 @@ class KSTCoordiComparator:
kst_all_items = [] # Keep all items including duplicates kst_all_items = [] # Keep all items including duplicates
coordi_all_items = [] # Keep all items including duplicates coordi_all_items = [] # Keep all items including duplicates
# Find columns by header names # Try fixed column positions first, then fall back to header names
# KST columns: 'Title KR' and 'Epi.' # KST columns: I (index 8) for title, J (index 9) for chapter
# Coordi columns: 'KR title' and 'Chap' # Coordi columns: C (index 2) for title, D (index 3) for chapter
kst_title_col = None kst_title_col_idx = 8 # Column I
kst_episode_col = None kst_episode_col_idx = 9 # Column J
coordi_title_col = None coordi_title_col_idx = 2 # Column C
coordi_episode_col = None coordi_episode_col_idx = 3 # Column D
# Find KST columns # Get column names by index (if they exist)
for col in columns: kst_title_col = columns[kst_title_col_idx] if len(columns) > kst_title_col_idx else None
if col == 'Title KR': kst_episode_col = columns[kst_episode_col_idx] if len(columns) > kst_episode_col_idx else None
kst_title_col = col coordi_title_col = columns[coordi_title_col_idx] if len(columns) > coordi_title_col_idx else None
elif col == 'Epi.': coordi_episode_col = columns[coordi_episode_col_idx] if len(columns) > coordi_episode_col_idx else None
kst_episode_col = col
# Find Coordi columns # Fallback: search by header names if fixed positions don't work
for col in columns: if not kst_title_col or not kst_episode_col:
if col == 'KR title': for i, col in enumerate(columns):
coordi_title_col = col if col == 'Title KR':
elif col == 'Chap': kst_title_col = col
coordi_episode_col = col kst_title_col_idx = i
elif col == 'Epi.':
kst_episode_col = col
kst_episode_col_idx = i
if not coordi_title_col or not coordi_episode_col:
for i, col in enumerate(columns):
if col == 'KR title':
coordi_title_col = col
coordi_title_col_idx = i
elif col == 'Chap':
coordi_episode_col = col
coordi_episode_col_idx = i
print(f"Sheet: {sheet_name}") print(f"Sheet: {sheet_name}")
print(f" KST columns - Title: {kst_title_col}, Episode: {kst_episode_col}") print(f" KST columns - Title: Column {chr(65 + kst_title_col_idx) if kst_title_col else 'None'} ({kst_title_col}), Episode: Column {chr(65 + kst_episode_col_idx) if kst_episode_col else 'None'} ({kst_episode_col})")
print(f" Coordi columns - Title: {coordi_title_col}, Episode: {coordi_episode_col}") print(f" Coordi columns - Title: Column {chr(65 + coordi_title_col_idx) if coordi_title_col else 'None'} ({coordi_title_col}), Episode: Column {chr(65 + coordi_episode_col_idx) if coordi_episode_col else 'None'} ({coordi_episode_col})")
# Extract items from each row # Extract items from each row
for idx, row in df.iterrows(): for idx, row in df.iterrows():

View File

@ -279,7 +279,8 @@
function analyzeData() { function analyzeData() {
const filePath = document.getElementById('filePath').value; const filePath = document.getElementById('filePath').value;
const sheetFilter = document.getElementById('sheetFilter').value; const sheetFilterElement = document.getElementById('sheetFilter');
const sheetFilter = sheetFilterElement.value || null; // Use null if empty
const statusDiv = document.getElementById('status'); const statusDiv = document.getElementById('status');
const analyzeBtn = document.getElementById('analyzeBtn'); const analyzeBtn = document.getElementById('analyzeBtn');
@ -328,6 +329,18 @@
const select = document.getElementById('sheetFilter'); const select = document.getElementById('sheetFilter');
select.innerHTML = ''; select.innerHTML = '';
// Add a default option if no sheets are available yet
if (!sheetNames || sheetNames.length === 0) {
const option = document.createElement('option');
option.value = '';
option.textContent = 'Loading sheets...';
option.disabled = true;
option.selected = true;
select.appendChild(option);
select.disabled = true;
return;
}
sheetNames.forEach((sheetName, index) => { sheetNames.forEach((sheetName, index) => {
const option = document.createElement('option'); const option = document.createElement('option');
option.value = sheetName; option.value = sheetName;
@ -390,7 +403,8 @@
document.getElementById('filePath').value = data.file_path; document.getElementById('filePath').value = data.file_path;
statusDiv.innerHTML = '<div class="loading">File uploaded! Analyzing data...</div>'; statusDiv.innerHTML = '<div class="loading">File uploaded! Analyzing data...</div>';
// Analyze the uploaded file (use default sheet for new uploads) // Clear sheet filter for new file (let it default to first sheet)
const sheetFilterElement = document.getElementById('sheetFilter');
const sheetFilter = null; // Always use default (first sheet) for new uploads const sheetFilter = null; // Always use default (first sheet) for new uploads
return fetch('/analyze', { return fetch('/analyze', {
method: 'POST', method: 'POST',
@ -575,6 +589,8 @@
// Auto-analyze on page load with default file // Auto-analyze on page load with default file
window.onload = function() { window.onload = function() {
// Initialize sheet filter with loading state
updateSheetFilter([], null);
analyzeData(); analyzeData();
}; };
</script> </script>