GMB-Scraper/lib/logger.py

127 lines
4.1 KiB
Python
Raw Permalink Normal View History

"""
Logging Module
==============
Structured logging with rotation for production use.
"""
import logging
import sys
from pathlib import Path
from logging.handlers import RotatingFileHandler
from datetime import datetime
def setup_logger(name='gmb_scraper', log_dir='/root/.hermes/logs/gmb', level=logging.INFO):
"""
Setup logger with console + file output and rotation.
Args:
name: Logger name
log_dir: Directory for log files
level: Logging level
Returns:
logging.Logger instance
"""
# Create log directory
log_path = Path(log_dir)
log_path.mkdir(parents=True, exist_ok=True)
# Create logger
logger = logging.getLogger(name)
logger.setLevel(level)
logger.handlers.clear() # Remove existing handlers
# Console handler (human-readable)
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.INFO)
console_formatter = logging.Formatter(
'%(asctime)s | %(levelname)-7s | %(message)s',
datefmt='%H:%M:%S'
)
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)
# File handler with rotation (structured)
log_file = log_path / f'{name}.log'
file_handler = RotatingFileHandler(
log_file,
maxBytes=10*1024*1024, # 10MB
backupCount=5,
encoding='utf-8'
)
file_handler.setLevel(logging.DEBUG)
file_formatter = logging.Formatter(
'%(asctime)s | %(levelname)-7s | %(name)s | %(funcName)s:%(lineno)d | %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)
# Error file handler (only errors)
error_log_file = log_path / f'{name}_errors.log'
error_handler = RotatingFileHandler(
error_log_file,
maxBytes=5*1024*1024, # 5MB
backupCount=10,
encoding='utf-8'
)
error_handler.setLevel(logging.ERROR)
error_handler.setFormatter(file_formatter)
logger.addHandler(error_handler)
return logger
def get_logger(name='gmb_scraper'):
"""Get existing logger or create default."""
logger = logging.getLogger(name)
if not logger.handlers:
return setup_logger(name)
return logger
class ScraperStats:
"""Track scraper statistics."""
def __init__(self, logger):
self.logger = logger
self.stats = {
'start_time': datetime.now(),
'queries': 0,
'businesses_found': 0,
'businesses_scraped': 0,
'businesses_filtered': 0,
'retries': 0,
'errors': 0,
'warnings': 0,
'pain_signals_detected': 0,
'reviews_scraped': 0,
'websites_checked': 0,
'pitches_generated': 0,
}
def increment(self, key, value=1):
"""Increment a stat counter."""
if key in self.stats:
self.stats[key] += value
def log_summary(self):
"""Log final statistics."""
duration = (datetime.now() - self.stats['start_time']).total_seconds()
self.logger.info("=" * 80)
self.logger.info("SCRAPER STATISTICS")
self.logger.info("=" * 80)
self.logger.info(f"Duration: {duration:.1f}s")
self.logger.info(f"Businesses found: {self.stats['businesses_found']}")
self.logger.info(f"Businesses scraped: {self.stats['businesses_scraped']}")
self.logger.info(f"Businesses filtered: {self.stats['businesses_filtered']}")
self.logger.info(f"Reviews scraped: {self.stats['reviews_scraped']}")
self.logger.info(f"Websites checked: {self.stats['websites_checked']}")
self.logger.info(f"Pain signals detected: {self.stats['pain_signals_detected']}")
self.logger.info(f"Pitches generated: {self.stats['pitches_generated']}")
self.logger.info(f"Retries: {self.stats['retries']}")
self.logger.info(f"Warnings: {self.stats['warnings']}")
self.logger.info(f"Errors: {self.stats['errors']}")
self.logger.info("=" * 80)