""" Stealth Mode Module =================== Anti-detection measures for Playwright scraping. """ import random from playwright_stealth import Stealth from .logger import get_logger # Realistic viewports (common screen resolutions) VIEWPORTS = [ (1920, 1080), (1366, 768), (1536, 864), (1440, 900), (1280, 720), (1600, 900), (2560, 1440), (1920, 1200), (1680, 1050), ] # Realistic user agents (rotated to avoid fingerprinting) USER_AGENTS = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0", ] # Timezones for Australian businesses TIMEZONES = [ "Australia/Perth", "Australia/Adelaide", "Australia/Brisbane", "Australia/Sydney", "Australia/Melbourne", "Australia/Hobart", ] # Languages LANGUAGES = ["en-AU", "en-US", "en-GB"] def apply_stealth(context, page=None, randomize=False): """ Apply stealth measures to Playwright context and page. Args: context: Playwright browser context page: Optional Playwright page (applies stealth to it) randomize: Ignored (kept for backward compat — randomization happens at context creation) Returns: Modified context """ logger = get_logger() # Apply playwright-stealth to page if page: stealth = Stealth() stealth.apply_stealth_sync(page) logger.debug("Stealth: playwright-stealth applied") return context def create_stealth_context(browser, headless=True, proxy=None): """ Create a stealth-enabled browser context. Args: browser: Playwright browser instance headless: Whether to run headless proxy: Optional proxy URL Returns: Playwright context with stealth applied """ logger = get_logger() # Base context options viewport = random.choice(VIEWPORTS) context_options = { "viewport": {"width": viewport[0], "height": viewport[1]}, "user_agent": random.choice(USER_AGENTS), "locale": random.choice(LANGUAGES), "timezone_id": random.choice(TIMEZONES), } # Add proxy if provided if proxy: context_options["proxy"] = {"server": proxy} logger.info(f"Using proxy: {proxy}") # Create context context = browser.new_context(**context_options) # Apply stealth apply_stealth(context, randomize=False) return context def human_delay(min_delay=1.0, max_delay=2.5, jitter=True): """ Human-like delay with optional jitter. Args: min_delay: Minimum delay in seconds max_delay: Maximum delay in seconds jitter: Add random jitter """ delay = random.uniform(min_delay, max_delay) if jitter: # Occasionally add longer pauses (like a human getting distracted) if random.random() < 0.1: # 10% chance delay *= random.uniform(1.5, 2.5) return delay def human_scroll_delay(): """Delay that mimics human scrolling behavior.""" # Most scrolls are quick, some are slow (reading) if random.random() < 0.7: return random.uniform(0.5, 1.2) else: return random.uniform(1.5, 3.0)