125 lines
3.7 KiB
Python
125 lines
3.7 KiB
Python
|
|
"""
|
||
|
|
Stealth Mode Module
|
||
|
|
===================
|
||
|
|
Anti-detection measures for Playwright scraping.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import random
|
||
|
|
from playwright_stealth import Stealth
|
||
|
|
from .logger import get_logger
|
||
|
|
|
||
|
|
|
||
|
|
# Realistic viewports (common screen resolutions)
|
||
|
|
VIEWPORTS = [
|
||
|
|
(1920, 1080), (1366, 768), (1536, 864),
|
||
|
|
(1440, 900), (1280, 720), (1600, 900),
|
||
|
|
(2560, 1440), (1920, 1200), (1680, 1050),
|
||
|
|
]
|
||
|
|
|
||
|
|
# Realistic user agents (rotated to avoid fingerprinting)
|
||
|
|
USER_AGENTS = [
|
||
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
|
||
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
||
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
|
||
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0",
|
||
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15",
|
||
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0",
|
||
|
|
]
|
||
|
|
|
||
|
|
# Timezones for Australian businesses
|
||
|
|
TIMEZONES = [
|
||
|
|
"Australia/Perth", "Australia/Adelaide", "Australia/Brisbane",
|
||
|
|
"Australia/Sydney", "Australia/Melbourne", "Australia/Hobart",
|
||
|
|
]
|
||
|
|
|
||
|
|
# Languages
|
||
|
|
LANGUAGES = ["en-AU", "en-US", "en-GB"]
|
||
|
|
|
||
|
|
|
||
|
|
def apply_stealth(context, page=None, randomize=False):
|
||
|
|
"""
|
||
|
|
Apply stealth measures to Playwright context and page.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
context: Playwright browser context
|
||
|
|
page: Optional Playwright page (applies stealth to it)
|
||
|
|
randomize: Ignored (kept for backward compat — randomization happens at context creation)
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Modified context
|
||
|
|
"""
|
||
|
|
logger = get_logger()
|
||
|
|
|
||
|
|
# Apply playwright-stealth to page
|
||
|
|
if page:
|
||
|
|
stealth = Stealth()
|
||
|
|
stealth.apply_stealth_sync(page)
|
||
|
|
logger.debug("Stealth: playwright-stealth applied")
|
||
|
|
|
||
|
|
return context
|
||
|
|
|
||
|
|
|
||
|
|
def create_stealth_context(browser, headless=True, proxy=None):
|
||
|
|
"""
|
||
|
|
Create a stealth-enabled browser context.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
browser: Playwright browser instance
|
||
|
|
headless: Whether to run headless
|
||
|
|
proxy: Optional proxy URL
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Playwright context with stealth applied
|
||
|
|
"""
|
||
|
|
logger = get_logger()
|
||
|
|
|
||
|
|
# Base context options
|
||
|
|
viewport = random.choice(VIEWPORTS)
|
||
|
|
context_options = {
|
||
|
|
"viewport": {"width": viewport[0], "height": viewport[1]},
|
||
|
|
"user_agent": random.choice(USER_AGENTS),
|
||
|
|
"locale": random.choice(LANGUAGES),
|
||
|
|
"timezone_id": random.choice(TIMEZONES),
|
||
|
|
}
|
||
|
|
|
||
|
|
# Add proxy if provided
|
||
|
|
if proxy:
|
||
|
|
context_options["proxy"] = {"server": proxy}
|
||
|
|
logger.info(f"Using proxy: {proxy}")
|
||
|
|
|
||
|
|
# Create context
|
||
|
|
context = browser.new_context(**context_options)
|
||
|
|
|
||
|
|
# Apply stealth
|
||
|
|
apply_stealth(context, randomize=False)
|
||
|
|
|
||
|
|
return context
|
||
|
|
|
||
|
|
|
||
|
|
def human_delay(min_delay=1.0, max_delay=2.5, jitter=True):
|
||
|
|
"""
|
||
|
|
Human-like delay with optional jitter.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
min_delay: Minimum delay in seconds
|
||
|
|
max_delay: Maximum delay in seconds
|
||
|
|
jitter: Add random jitter
|
||
|
|
"""
|
||
|
|
delay = random.uniform(min_delay, max_delay)
|
||
|
|
|
||
|
|
if jitter:
|
||
|
|
# Occasionally add longer pauses (like a human getting distracted)
|
||
|
|
if random.random() < 0.1: # 10% chance
|
||
|
|
delay *= random.uniform(1.5, 2.5)
|
||
|
|
|
||
|
|
return delay
|
||
|
|
|
||
|
|
|
||
|
|
def human_scroll_delay():
|
||
|
|
"""Delay that mimics human scrolling behavior."""
|
||
|
|
# Most scrolls are quick, some are slow (reading)
|
||
|
|
if random.random() < 0.7:
|
||
|
|
return random.uniform(0.5, 1.2)
|
||
|
|
else:
|
||
|
|
return random.uniform(1.5, 3.0)
|