map column index base #1
@ -54,13 +54,18 @@ The project uses Python 3.13+ with uv for dependency management. Dependencies in
|
|||||||
|
|
||||||
The tool compares Excel data by:
|
The tool compares Excel data by:
|
||||||
1. **Sheet-specific analysis only** - No more "All Sheets" functionality, each sheet is analyzed independently
|
1. **Sheet-specific analysis only** - No more "All Sheets" functionality, each sheet is analyzed independently
|
||||||
2. Finding columns by header names (not positions)
|
2. **Fixed column positions** - KST data from columns I & J, Coordi data from columns C & D
|
||||||
3. Extracting title+episode combinations from both datasets within the selected sheet
|
3. Extracting title+episode combinations from both datasets within the selected sheet
|
||||||
4. **Fixed duplicate detection** - Only items that appear multiple times within the same dataset are marked as duplicates
|
4. **Fixed duplicate detection** - Only items that appear multiple times within the same dataset are marked as duplicates
|
||||||
5. **Mixed duplicate priority** - Items that exist in both datasets but have duplicates on one side are prioritized over pure duplicates
|
5. **Mixed duplicate priority** - Items that exist in both datasets but have duplicates on one side are prioritized over pure duplicates
|
||||||
6. Categorizing mismatches and calculating reconciliation
|
6. Categorizing mismatches and calculating reconciliation
|
||||||
7. Displaying results with reasons for each discrepancy
|
7. Displaying results with reasons for each discrepancy
|
||||||
|
|
||||||
|
## Column Mapping
|
||||||
|
|
||||||
|
- **KST Data**: Column I (title) and Column J (chapter/episode)
|
||||||
|
- **Coordi Data**: Column C (title) and Column D (chapter/episode)
|
||||||
|
|
||||||
### BA Confirmed Cases
|
### BA Confirmed Cases
|
||||||
- **US URGENT**: `금수의 영역 - Episode 17`, `신결 - Episode 23` (Coordi duplicates), `트윈 가이드 - Episode 31` (mixed duplicate)
|
- **US URGENT**: `금수의 영역 - Episode 17`, `신결 - Episode 23` (Coordi duplicates), `트윈 가이드 - Episode 31` (mixed duplicate)
|
||||||
- **TH URGENT**: `백라이트 - Episode 53-1x(휴재)` (KST duplicate, doesn't appear in Coordi)
|
- **TH URGENT**: `백라이트 - Episode 53-1x(휴재)` (KST duplicate, doesn't appear in Coordi)
|
||||||
Binary file not shown.
@ -43,7 +43,7 @@ class KSTCoordiComparator:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def extract_kst_coordi_items_for_sheet(self, sheet_name: str) -> Dict[str, Any]:
|
def extract_kst_coordi_items_for_sheet(self, sheet_name: str) -> Dict[str, Any]:
|
||||||
"""Extract KST and Coordi items from a specific sheet using column header names"""
|
"""Extract KST and Coordi items from a specific sheet using fixed column positions"""
|
||||||
if sheet_name not in self.data:
|
if sheet_name not in self.data:
|
||||||
raise ValueError(f"Sheet '{sheet_name}' not found in data")
|
raise ValueError(f"Sheet '{sheet_name}' not found in data")
|
||||||
|
|
||||||
@ -57,32 +57,43 @@ class KSTCoordiComparator:
|
|||||||
kst_all_items = [] # Keep all items including duplicates
|
kst_all_items = [] # Keep all items including duplicates
|
||||||
coordi_all_items = [] # Keep all items including duplicates
|
coordi_all_items = [] # Keep all items including duplicates
|
||||||
|
|
||||||
# Find columns by header names
|
# Try fixed column positions first, then fall back to header names
|
||||||
# KST columns: 'Title KR' and 'Epi.'
|
# KST columns: I (index 8) for title, J (index 9) for chapter
|
||||||
# Coordi columns: 'KR title' and 'Chap'
|
# Coordi columns: C (index 2) for title, D (index 3) for chapter
|
||||||
|
|
||||||
kst_title_col = None
|
kst_title_col_idx = 8 # Column I
|
||||||
kst_episode_col = None
|
kst_episode_col_idx = 9 # Column J
|
||||||
coordi_title_col = None
|
coordi_title_col_idx = 2 # Column C
|
||||||
coordi_episode_col = None
|
coordi_episode_col_idx = 3 # Column D
|
||||||
|
|
||||||
# Find KST columns
|
# Get column names by index (if they exist)
|
||||||
for col in columns:
|
kst_title_col = columns[kst_title_col_idx] if len(columns) > kst_title_col_idx else None
|
||||||
if col == 'Title KR':
|
kst_episode_col = columns[kst_episode_col_idx] if len(columns) > kst_episode_col_idx else None
|
||||||
kst_title_col = col
|
coordi_title_col = columns[coordi_title_col_idx] if len(columns) > coordi_title_col_idx else None
|
||||||
elif col == 'Epi.':
|
coordi_episode_col = columns[coordi_episode_col_idx] if len(columns) > coordi_episode_col_idx else None
|
||||||
kst_episode_col = col
|
|
||||||
|
|
||||||
# Find Coordi columns
|
# Fallback: search by header names if fixed positions don't work
|
||||||
for col in columns:
|
if not kst_title_col or not kst_episode_col:
|
||||||
if col == 'KR title':
|
for i, col in enumerate(columns):
|
||||||
coordi_title_col = col
|
if col == 'Title KR':
|
||||||
elif col == 'Chap':
|
kst_title_col = col
|
||||||
coordi_episode_col = col
|
kst_title_col_idx = i
|
||||||
|
elif col == 'Epi.':
|
||||||
|
kst_episode_col = col
|
||||||
|
kst_episode_col_idx = i
|
||||||
|
|
||||||
|
if not coordi_title_col or not coordi_episode_col:
|
||||||
|
for i, col in enumerate(columns):
|
||||||
|
if col == 'KR title':
|
||||||
|
coordi_title_col = col
|
||||||
|
coordi_title_col_idx = i
|
||||||
|
elif col == 'Chap':
|
||||||
|
coordi_episode_col = col
|
||||||
|
coordi_episode_col_idx = i
|
||||||
|
|
||||||
print(f"Sheet: {sheet_name}")
|
print(f"Sheet: {sheet_name}")
|
||||||
print(f" KST columns - Title: {kst_title_col}, Episode: {kst_episode_col}")
|
print(f" KST columns - Title: Column {chr(65 + kst_title_col_idx) if kst_title_col else 'None'} ({kst_title_col}), Episode: Column {chr(65 + kst_episode_col_idx) if kst_episode_col else 'None'} ({kst_episode_col})")
|
||||||
print(f" Coordi columns - Title: {coordi_title_col}, Episode: {coordi_episode_col}")
|
print(f" Coordi columns - Title: Column {chr(65 + coordi_title_col_idx) if coordi_title_col else 'None'} ({coordi_title_col}), Episode: Column {chr(65 + coordi_episode_col_idx) if coordi_episode_col else 'None'} ({coordi_episode_col})")
|
||||||
|
|
||||||
# Extract items from each row
|
# Extract items from each row
|
||||||
for idx, row in df.iterrows():
|
for idx, row in df.iterrows():
|
||||||
|
|||||||
@ -279,7 +279,8 @@
|
|||||||
|
|
||||||
function analyzeData() {
|
function analyzeData() {
|
||||||
const filePath = document.getElementById('filePath').value;
|
const filePath = document.getElementById('filePath').value;
|
||||||
const sheetFilter = document.getElementById('sheetFilter').value;
|
const sheetFilterElement = document.getElementById('sheetFilter');
|
||||||
|
const sheetFilter = sheetFilterElement.value || null; // Use null if empty
|
||||||
const statusDiv = document.getElementById('status');
|
const statusDiv = document.getElementById('status');
|
||||||
const analyzeBtn = document.getElementById('analyzeBtn');
|
const analyzeBtn = document.getElementById('analyzeBtn');
|
||||||
|
|
||||||
@ -328,6 +329,18 @@
|
|||||||
const select = document.getElementById('sheetFilter');
|
const select = document.getElementById('sheetFilter');
|
||||||
select.innerHTML = '';
|
select.innerHTML = '';
|
||||||
|
|
||||||
|
// Add a default option if no sheets are available yet
|
||||||
|
if (!sheetNames || sheetNames.length === 0) {
|
||||||
|
const option = document.createElement('option');
|
||||||
|
option.value = '';
|
||||||
|
option.textContent = 'Loading sheets...';
|
||||||
|
option.disabled = true;
|
||||||
|
option.selected = true;
|
||||||
|
select.appendChild(option);
|
||||||
|
select.disabled = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
sheetNames.forEach((sheetName, index) => {
|
sheetNames.forEach((sheetName, index) => {
|
||||||
const option = document.createElement('option');
|
const option = document.createElement('option');
|
||||||
option.value = sheetName;
|
option.value = sheetName;
|
||||||
@ -390,7 +403,8 @@
|
|||||||
document.getElementById('filePath').value = data.file_path;
|
document.getElementById('filePath').value = data.file_path;
|
||||||
statusDiv.innerHTML = '<div class="loading">File uploaded! Analyzing data...</div>';
|
statusDiv.innerHTML = '<div class="loading">File uploaded! Analyzing data...</div>';
|
||||||
|
|
||||||
// Analyze the uploaded file (use default sheet for new uploads)
|
// Clear sheet filter for new file (let it default to first sheet)
|
||||||
|
const sheetFilterElement = document.getElementById('sheetFilter');
|
||||||
const sheetFilter = null; // Always use default (first sheet) for new uploads
|
const sheetFilter = null; // Always use default (first sheet) for new uploads
|
||||||
return fetch('/analyze', {
|
return fetch('/analyze', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
@ -575,6 +589,8 @@
|
|||||||
|
|
||||||
// Auto-analyze on page load with default file
|
// Auto-analyze on page load with default file
|
||||||
window.onload = function() {
|
window.onload = function() {
|
||||||
|
// Initialize sheet filter with loading state
|
||||||
|
updateSheetFilter([], null);
|
||||||
analyzeData();
|
analyzeData();
|
||||||
};
|
};
|
||||||
</script>
|
</script>
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user