""" Logging Module ============== Structured logging with rotation for production use. """ import logging import sys from pathlib import Path from logging.handlers import RotatingFileHandler from datetime import datetime def setup_logger(name='gmb_scraper', log_dir='/root/.hermes/logs/gmb', level=logging.INFO): """ Setup logger with console + file output and rotation. Args: name: Logger name log_dir: Directory for log files level: Logging level Returns: logging.Logger instance """ # Create log directory log_path = Path(log_dir) log_path.mkdir(parents=True, exist_ok=True) # Create logger logger = logging.getLogger(name) logger.setLevel(level) logger.handlers.clear() # Remove existing handlers # Console handler (human-readable) console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(logging.INFO) console_formatter = logging.Formatter( '%(asctime)s | %(levelname)-7s | %(message)s', datefmt='%H:%M:%S' ) console_handler.setFormatter(console_formatter) logger.addHandler(console_handler) # File handler with rotation (structured) log_file = log_path / f'{name}.log' file_handler = RotatingFileHandler( log_file, maxBytes=10*1024*1024, # 10MB backupCount=5, encoding='utf-8' ) file_handler.setLevel(logging.DEBUG) file_formatter = logging.Formatter( '%(asctime)s | %(levelname)-7s | %(name)s | %(funcName)s:%(lineno)d | %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) file_handler.setFormatter(file_formatter) logger.addHandler(file_handler) # Error file handler (only errors) error_log_file = log_path / f'{name}_errors.log' error_handler = RotatingFileHandler( error_log_file, maxBytes=5*1024*1024, # 5MB backupCount=10, encoding='utf-8' ) error_handler.setLevel(logging.ERROR) error_handler.setFormatter(file_formatter) logger.addHandler(error_handler) return logger def get_logger(name='gmb_scraper'): """Get existing logger or create default.""" logger = logging.getLogger(name) if not logger.handlers: return setup_logger(name) return logger class ScraperStats: """Track scraper statistics.""" def __init__(self, logger): self.logger = logger self.stats = { 'start_time': datetime.now(), 'queries': 0, 'businesses_found': 0, 'businesses_scraped': 0, 'businesses_filtered': 0, 'retries': 0, 'errors': 0, 'warnings': 0, 'pain_signals_detected': 0, 'reviews_scraped': 0, 'websites_checked': 0, 'pitches_generated': 0, } def increment(self, key, value=1): """Increment a stat counter.""" if key in self.stats: self.stats[key] += value def log_summary(self): """Log final statistics.""" duration = (datetime.now() - self.stats['start_time']).total_seconds() self.logger.info("=" * 80) self.logger.info("SCRAPER STATISTICS") self.logger.info("=" * 80) self.logger.info(f"Duration: {duration:.1f}s") self.logger.info(f"Businesses found: {self.stats['businesses_found']}") self.logger.info(f"Businesses scraped: {self.stats['businesses_scraped']}") self.logger.info(f"Businesses filtered: {self.stats['businesses_filtered']}") self.logger.info(f"Reviews scraped: {self.stats['reviews_scraped']}") self.logger.info(f"Websites checked: {self.stats['websites_checked']}") self.logger.info(f"Pain signals detected: {self.stats['pain_signals_detected']}") self.logger.info(f"Pitches generated: {self.stats['pitches_generated']}") self.logger.info(f"Retries: {self.stats['retries']}") self.logger.info(f"Warnings: {self.stats['warnings']}") self.logger.info(f"Errors: {self.stats['errors']}") self.logger.info("=" * 80)