# 1. Extract Title and Year # Looks for pattern: Words.YYYY title_year_match = re.search(r'^(.*?)\.(\d4)\.', filename)
# Replace dots/underscores with spaces and apply Title Case raw_title = title_year_match.group(1) clean_title = raw_title.replace('.', ' ').replace('_', ' ').title() year = int(title_year_match.group(2)) Gargi.2022.720p.-MovieLinkBD.com-.SONYLIV.WEB-D...
# Clean up common spam domains if necessary if "MovieLinkBD" in release_group: release_group = f"release_group (SpamTag)" Input: 'Gargi
try: parsed_data = parse_media_filename(raw_input) clean_name = generate_clean_name(parsed_data) Gargi.2022.720p.-MovieLinkBD.com-.SONYLIV.WEB-D...
Here is the development of that feature in Python: This tool extracts hidden metadata (Title, Year, Resolution, Source) from the chaotic filename and generates a clean, standardized name. Python Implementation import re from dataclasses import dataclass @dataclass class MediaMetadata: title: str year: int resolution: str source: str release_group: str
if not title_year_match: raise ValueError("Could not parse title and year.")
def parse_media_filename(filename: str) -> MediaMetadata: """ Parses complex media filenames to extract structured metadata. Input: 'Gargi.2022.720p.-MovieLinkBD.com-.SONYLIV.WEB-D...' Output: Structured Data """