map column index base
This commit is contained in:
parent
4a8d3d294b
commit
07b4a3f34f
@ -54,13 +54,18 @@ The project uses Python 3.13+ with uv for dependency management. Dependencies in
|
||||
|
||||
The tool compares Excel data by:
|
||||
1. **Sheet-specific analysis only** - No more "All Sheets" functionality, each sheet is analyzed independently
|
||||
2. Finding columns by header names (not positions)
|
||||
2. **Fixed column positions** - KST data from columns I & J, Coordi data from columns C & D
|
||||
3. Extracting title+episode combinations from both datasets within the selected sheet
|
||||
4. **Fixed duplicate detection** - Only items that appear multiple times within the same dataset are marked as duplicates
|
||||
5. **Mixed duplicate priority** - Items that exist in both datasets but have duplicates on one side are prioritized over pure duplicates
|
||||
6. Categorizing mismatches and calculating reconciliation
|
||||
7. Displaying results with reasons for each discrepancy
|
||||
|
||||
## Column Mapping
|
||||
|
||||
- **KST Data**: Column I (title) and Column J (chapter/episode)
|
||||
- **Coordi Data**: Column C (title) and Column D (chapter/episode)
|
||||
|
||||
### BA Confirmed Cases
|
||||
- **US URGENT**: `금수의 영역 - Episode 17`, `신결 - Episode 23` (Coordi duplicates), `트윈 가이드 - Episode 31` (mixed duplicate)
|
||||
- **TH URGENT**: `백라이트 - Episode 53-1x(휴재)` (KST duplicate, doesn't appear in Coordi)
|
||||
Binary file not shown.
@ -43,7 +43,7 @@ class KSTCoordiComparator:
|
||||
return False
|
||||
|
||||
def extract_kst_coordi_items_for_sheet(self, sheet_name: str) -> Dict[str, Any]:
|
||||
"""Extract KST and Coordi items from a specific sheet using column header names"""
|
||||
"""Extract KST and Coordi items from a specific sheet using fixed column positions"""
|
||||
if sheet_name not in self.data:
|
||||
raise ValueError(f"Sheet '{sheet_name}' not found in data")
|
||||
|
||||
@ -57,32 +57,43 @@ class KSTCoordiComparator:
|
||||
kst_all_items = [] # Keep all items including duplicates
|
||||
coordi_all_items = [] # Keep all items including duplicates
|
||||
|
||||
# Find columns by header names
|
||||
# KST columns: 'Title KR' and 'Epi.'
|
||||
# Coordi columns: 'KR title' and 'Chap'
|
||||
# Try fixed column positions first, then fall back to header names
|
||||
# KST columns: I (index 8) for title, J (index 9) for chapter
|
||||
# Coordi columns: C (index 2) for title, D (index 3) for chapter
|
||||
|
||||
kst_title_col = None
|
||||
kst_episode_col = None
|
||||
coordi_title_col = None
|
||||
coordi_episode_col = None
|
||||
kst_title_col_idx = 8 # Column I
|
||||
kst_episode_col_idx = 9 # Column J
|
||||
coordi_title_col_idx = 2 # Column C
|
||||
coordi_episode_col_idx = 3 # Column D
|
||||
|
||||
# Find KST columns
|
||||
for col in columns:
|
||||
if col == 'Title KR':
|
||||
kst_title_col = col
|
||||
elif col == 'Epi.':
|
||||
kst_episode_col = col
|
||||
# Get column names by index (if they exist)
|
||||
kst_title_col = columns[kst_title_col_idx] if len(columns) > kst_title_col_idx else None
|
||||
kst_episode_col = columns[kst_episode_col_idx] if len(columns) > kst_episode_col_idx else None
|
||||
coordi_title_col = columns[coordi_title_col_idx] if len(columns) > coordi_title_col_idx else None
|
||||
coordi_episode_col = columns[coordi_episode_col_idx] if len(columns) > coordi_episode_col_idx else None
|
||||
|
||||
# Find Coordi columns
|
||||
for col in columns:
|
||||
if col == 'KR title':
|
||||
coordi_title_col = col
|
||||
elif col == 'Chap':
|
||||
coordi_episode_col = col
|
||||
# Fallback: search by header names if fixed positions don't work
|
||||
if not kst_title_col or not kst_episode_col:
|
||||
for i, col in enumerate(columns):
|
||||
if col == 'Title KR':
|
||||
kst_title_col = col
|
||||
kst_title_col_idx = i
|
||||
elif col == 'Epi.':
|
||||
kst_episode_col = col
|
||||
kst_episode_col_idx = i
|
||||
|
||||
if not coordi_title_col or not coordi_episode_col:
|
||||
for i, col in enumerate(columns):
|
||||
if col == 'KR title':
|
||||
coordi_title_col = col
|
||||
coordi_title_col_idx = i
|
||||
elif col == 'Chap':
|
||||
coordi_episode_col = col
|
||||
coordi_episode_col_idx = i
|
||||
|
||||
print(f"Sheet: {sheet_name}")
|
||||
print(f" KST columns - Title: {kst_title_col}, Episode: {kst_episode_col}")
|
||||
print(f" Coordi columns - Title: {coordi_title_col}, Episode: {coordi_episode_col}")
|
||||
print(f" KST columns - Title: Column {chr(65 + kst_title_col_idx) if kst_title_col else 'None'} ({kst_title_col}), Episode: Column {chr(65 + kst_episode_col_idx) if kst_episode_col else 'None'} ({kst_episode_col})")
|
||||
print(f" Coordi columns - Title: Column {chr(65 + coordi_title_col_idx) if coordi_title_col else 'None'} ({coordi_title_col}), Episode: Column {chr(65 + coordi_episode_col_idx) if coordi_episode_col else 'None'} ({coordi_episode_col})")
|
||||
|
||||
# Extract items from each row
|
||||
for idx, row in df.iterrows():
|
||||
|
||||
@ -279,7 +279,8 @@
|
||||
|
||||
function analyzeData() {
|
||||
const filePath = document.getElementById('filePath').value;
|
||||
const sheetFilter = document.getElementById('sheetFilter').value;
|
||||
const sheetFilterElement = document.getElementById('sheetFilter');
|
||||
const sheetFilter = sheetFilterElement.value || null; // Use null if empty
|
||||
const statusDiv = document.getElementById('status');
|
||||
const analyzeBtn = document.getElementById('analyzeBtn');
|
||||
|
||||
@ -328,6 +329,18 @@
|
||||
const select = document.getElementById('sheetFilter');
|
||||
select.innerHTML = '';
|
||||
|
||||
// Add a default option if no sheets are available yet
|
||||
if (!sheetNames || sheetNames.length === 0) {
|
||||
const option = document.createElement('option');
|
||||
option.value = '';
|
||||
option.textContent = 'Loading sheets...';
|
||||
option.disabled = true;
|
||||
option.selected = true;
|
||||
select.appendChild(option);
|
||||
select.disabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
sheetNames.forEach((sheetName, index) => {
|
||||
const option = document.createElement('option');
|
||||
option.value = sheetName;
|
||||
@ -390,7 +403,8 @@
|
||||
document.getElementById('filePath').value = data.file_path;
|
||||
statusDiv.innerHTML = '<div class="loading">File uploaded! Analyzing data...</div>';
|
||||
|
||||
// Analyze the uploaded file (use default sheet for new uploads)
|
||||
// Clear sheet filter for new file (let it default to first sheet)
|
||||
const sheetFilterElement = document.getElementById('sheetFilter');
|
||||
const sheetFilter = null; // Always use default (first sheet) for new uploads
|
||||
return fetch('/analyze', {
|
||||
method: 'POST',
|
||||
@ -575,6 +589,8 @@
|
||||
|
||||
// Auto-analyze on page load with default file
|
||||
window.onload = function() {
|
||||
// Initialize sheet filter with loading state
|
||||
updateSheetFilter([], null);
|
||||
analyzeData();
|
||||
};
|
||||
</script>
|
||||
|
||||
Loading…
Reference in New Issue
Block a user