- Stealth mode: playwright-stealth, random fingerprints, human delays - Retry logic: exponential backoff (3 attempts) - Logging: rotating logs to /root/.hermes/logs/gmb/ - Validation: phone/website/rating validation + dedup - Pain detection: 12 signals, scoring, service matching - Review scraper: extract reviews + pain keyword detection - Website health: SSL, speed, mobile, contact form checks - Pitch generator: Apex pitches (SMS, email, call, Gumtree) - Docker containerization - .env for secrets (no hardcoded API keys) - Integration with Pipecat voice dialer (gmb_to_voice.py)
435 lines
13 KiB
Python
435 lines
13 KiB
Python
"""
|
|
Pain Detection Module
|
|
=====================
|
|
Detect business pain signals and score leads for outreach priority.
|
|
Focus: Lead Generation (highest margin service)
|
|
"""
|
|
|
|
import re
|
|
from datetime import datetime
|
|
from .logger import get_logger
|
|
|
|
|
|
# Pain keywords in reviews (grouped by service type)
|
|
PAIN_KEYWORDS = {
|
|
'lead_gen': [
|
|
'no answer', 'nobody answered', 'didn\'t answer', 'never answer',
|
|
'voicemail', 'can\'t reach', 'unreachable', 'no response',
|
|
'didn\'t call back', 'no callback', 'never called back',
|
|
'phone disconnected', 'wrong number', 'busy signal',
|
|
],
|
|
'reputation': [
|
|
'rude', 'unprofessional', 'terrible', 'awful', 'worst',
|
|
'scam', 'rip off', 'overpriced', 'expensive', 'hidden fees',
|
|
'waste of time', 'waste of money', 'don\'t trust',
|
|
],
|
|
'website': [
|
|
'website down', 'can\'t find website', 'no website',
|
|
'website doesn\'t work', 'broken website', 'outdated website',
|
|
'can\'t book online', 'no online booking',
|
|
],
|
|
'service_quality': [
|
|
'slow', 'took forever', 'waited hours', 'long wait',
|
|
'unreliable', 'didn\'t show up', 'no show', 'late',
|
|
'poor quality', 'bad work', 'shoddy', 'amateur',
|
|
],
|
|
}
|
|
|
|
# Pain signals and their weights
|
|
PAIN_SIGNALS = {
|
|
'no_website': {
|
|
'weight': 25,
|
|
'service': 'Website Development',
|
|
'margin': 'high',
|
|
'description': 'No website detected',
|
|
},
|
|
'broken_website': {
|
|
'weight': 20,
|
|
'service': 'Website Maintenance',
|
|
'margin': 'medium',
|
|
'description': 'Website has issues (SSL expired, slow, not mobile-friendly)',
|
|
},
|
|
'low_rating': {
|
|
'weight': 15,
|
|
'service': 'Reputation Management',
|
|
'margin': 'high',
|
|
'description': 'Rating below 3.5 stars',
|
|
},
|
|
'recent_1star': {
|
|
'weight': 20,
|
|
'service': 'Review Response Service',
|
|
'margin': 'high',
|
|
'description': 'Recent 1-star reviews (last 30 days)',
|
|
},
|
|
'missed_calls': {
|
|
'weight': 30,
|
|
'service': 'Lead Generation + Call Tracking',
|
|
'margin': 'highest',
|
|
'description': 'Reviews mention missed calls / no answer',
|
|
},
|
|
'unclaimed_gmb': {
|
|
'weight': 12,
|
|
'service': 'GMB Optimization',
|
|
'margin': 'medium',
|
|
'description': 'Google Business profile appears unclaimed',
|
|
},
|
|
'missing_phone': {
|
|
'weight': 10,
|
|
'service': 'GMB Cleanup',
|
|
'margin': 'low',
|
|
'description': 'Phone number missing from GMB',
|
|
},
|
|
'no_hours': {
|
|
'weight': 5,
|
|
'service': 'GMB Optimization',
|
|
'margin': 'low',
|
|
'description': 'Business hours not listed',
|
|
},
|
|
'few_reviews': {
|
|
'weight': 8,
|
|
'service': 'Review Generation Campaign',
|
|
'margin': 'medium',
|
|
'description': 'Less than 10 reviews total',
|
|
},
|
|
'no_contact_form': {
|
|
'weight': 15,
|
|
'service': 'Lead Capture Optimization',
|
|
'margin': 'high',
|
|
'description': 'Website has no contact form',
|
|
},
|
|
'slow_website': {
|
|
'weight': 10,
|
|
'service': 'Website Performance',
|
|
'margin': 'medium',
|
|
'description': 'Website loads slowly (>3 seconds)',
|
|
},
|
|
'not_mobile_friendly': {
|
|
'weight': 12,
|
|
'service': 'Mobile Optimization',
|
|
'margin': 'medium',
|
|
'description': 'Website not mobile-friendly',
|
|
},
|
|
}
|
|
|
|
|
|
def detect_review_pain(reviews):
|
|
"""
|
|
Analyze reviews for pain keywords.
|
|
|
|
Args:
|
|
reviews: List of review dictionaries with 'text', 'rating', 'date'
|
|
|
|
Returns:
|
|
Dictionary of detected pain signals with counts
|
|
"""
|
|
logger = get_logger()
|
|
detected = {}
|
|
|
|
if not reviews:
|
|
return detected
|
|
|
|
# Analyze each review
|
|
for review in reviews:
|
|
text = review.get('text', '').lower()
|
|
rating = review.get('rating', 5)
|
|
review_date = review.get('date', '')
|
|
|
|
# Check each pain category
|
|
for category, keywords in PAIN_KEYWORDS.items():
|
|
for keyword in keywords:
|
|
if keyword in text:
|
|
# Create signal key
|
|
if category == 'lead_gen':
|
|
signal_key = 'missed_calls'
|
|
elif category == 'reputation':
|
|
signal_key = 'recent_1star' if rating <= 2 else 'low_rating'
|
|
elif category == 'website':
|
|
signal_key = 'broken_website'
|
|
else:
|
|
continue
|
|
|
|
# Initialize or increment
|
|
if signal_key not in detected:
|
|
detected[signal_key] = {
|
|
'count': 0,
|
|
'examples': [],
|
|
'signal_info': PAIN_SIGNALS.get(signal_key, {}),
|
|
}
|
|
|
|
detected[signal_key]['count'] += 1
|
|
|
|
# Store example (limit to 3)
|
|
if len(detected[signal_key]['examples']) < 3:
|
|
detected[signal_key]['examples'].append({
|
|
'text': text[:200],
|
|
'rating': rating,
|
|
'date': review_date,
|
|
})
|
|
|
|
return detected
|
|
|
|
|
|
def detect_structural_pain(lead):
|
|
"""
|
|
Detect pain signals from lead structure (missing data).
|
|
|
|
Args:
|
|
lead: Business data dictionary
|
|
|
|
Returns:
|
|
Dictionary of detected structural pain signals
|
|
"""
|
|
detected = {}
|
|
|
|
# No website
|
|
if not lead.get('website'):
|
|
detected['no_website'] = {
|
|
'count': 1,
|
|
'signal_info': PAIN_SIGNALS['no_website'],
|
|
}
|
|
|
|
# Missing phone
|
|
if not lead.get('phone'):
|
|
detected['missing_phone'] = {
|
|
'count': 1,
|
|
'signal_info': PAIN_SIGNALS['missing_phone'],
|
|
}
|
|
|
|
# No hours
|
|
if not lead.get('hours'):
|
|
detected['no_hours'] = {
|
|
'count': 1,
|
|
'signal_info': PAIN_SIGNALS['no_hours'],
|
|
}
|
|
|
|
# Low rating
|
|
rating = lead.get('rating', 0)
|
|
if 0 < rating < 3.5:
|
|
detected['low_rating'] = {
|
|
'count': 1,
|
|
'signal_info': PAIN_SIGNALS['low_rating'],
|
|
}
|
|
|
|
# Few reviews
|
|
review_count = lead.get('review_count', 0)
|
|
if 0 < review_count < 10:
|
|
detected['few_reviews'] = {
|
|
'count': 1,
|
|
'signal_info': PAIN_SIGNALS['few_reviews'],
|
|
}
|
|
|
|
return detected
|
|
|
|
|
|
def detect_website_pain(health_check):
|
|
"""
|
|
Detect pain signals from website health check.
|
|
|
|
Args:
|
|
health_check: Dictionary from check_website_health()
|
|
|
|
Returns:
|
|
Dictionary of detected website pain signals
|
|
"""
|
|
detected = {}
|
|
|
|
if not health_check:
|
|
return detected
|
|
|
|
# Broken website (SSL issues, unreachable)
|
|
if not health_check.get('reachable') or not health_check.get('ssl_valid'):
|
|
detected['broken_website'] = {
|
|
'count': 1,
|
|
'signal_info': PAIN_SIGNALS['broken_website'],
|
|
'details': health_check,
|
|
}
|
|
|
|
# Slow website
|
|
load_time = health_check.get('load_time', 0)
|
|
if load_time > 3.0:
|
|
detected['slow_website'] = {
|
|
'count': 1,
|
|
'signal_info': PAIN_SIGNALS['slow_website'],
|
|
'details': {'load_time': load_time},
|
|
}
|
|
|
|
# Not mobile friendly
|
|
if not health_check.get('mobile_friendly'):
|
|
detected['not_mobile_friendly'] = {
|
|
'count': 1,
|
|
'signal_info': PAIN_SIGNALS['not_mobile_friendly'],
|
|
}
|
|
|
|
# No contact form
|
|
if not health_check.get('has_contact_form'):
|
|
detected['no_contact_form'] = {
|
|
'count': 1,
|
|
'signal_info': PAIN_SIGNALS['no_contact_form'],
|
|
}
|
|
|
|
return detected
|
|
|
|
|
|
def detect_pain_signals(lead, reviews=None, health_check=None):
|
|
"""
|
|
Detect all pain signals for a lead.
|
|
|
|
Args:
|
|
lead: Business data dictionary
|
|
reviews: Optional list of reviews
|
|
health_check: Optional website health check results
|
|
|
|
Returns:
|
|
Dictionary with all detected signals and metadata
|
|
"""
|
|
logger = get_logger()
|
|
|
|
all_signals = {}
|
|
|
|
# Structural pain (from lead data)
|
|
structural = detect_structural_pain(lead)
|
|
all_signals.update(structural)
|
|
|
|
# Review pain (from review text)
|
|
if reviews:
|
|
review_pain = detect_review_pain(reviews)
|
|
# Merge, preferring review data when both exist
|
|
for key, value in review_pain.items():
|
|
if key in all_signals:
|
|
# Combine counts
|
|
all_signals[key]['count'] += value['count']
|
|
all_signals[key]['examples'] = value.get('examples', [])
|
|
else:
|
|
all_signals[key] = value
|
|
|
|
# Website pain (from health check)
|
|
if health_check and lead.get('website'):
|
|
website_pain = detect_website_pain(health_check)
|
|
all_signals.update(website_pain)
|
|
|
|
# Calculate total pain score
|
|
pain_score = calculate_pain_score(all_signals)
|
|
|
|
# Determine primary service to pitch (highest margin)
|
|
primary_service = get_primary_service(all_signals)
|
|
|
|
result = {
|
|
'signals': all_signals,
|
|
'pain_score': pain_score,
|
|
'signal_count': len(all_signals),
|
|
'primary_service': primary_service,
|
|
'confidence': 'high' if pain_score >= 30 else 'medium' if pain_score >= 15 else 'low',
|
|
}
|
|
|
|
if all_signals:
|
|
logger.info(
|
|
f"Pain detected for '{lead.get('name', 'Unknown')}': "
|
|
f"score={pain_score}, signals={len(all_signals)}, "
|
|
f"primary={primary_service}"
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
def calculate_pain_score(signals):
|
|
"""
|
|
Calculate total pain score from detected signals.
|
|
|
|
Args:
|
|
signals: Dictionary of detected signals
|
|
|
|
Returns:
|
|
Integer pain score (higher = more pain)
|
|
"""
|
|
total = 0
|
|
|
|
for signal_key, signal_data in signals.items():
|
|
signal_info = signal_data.get('signal_info', PAIN_SIGNALS.get(signal_key, {}))
|
|
weight = signal_info.get('weight', 5)
|
|
count = signal_data.get('count', 1)
|
|
|
|
# Diminishing returns: first occurrence counts most
|
|
if count == 1:
|
|
total += weight
|
|
elif count <= 3:
|
|
total += weight * 1.5
|
|
else:
|
|
total += weight * 2
|
|
|
|
return int(total)
|
|
|
|
|
|
def get_primary_service(signals):
|
|
"""
|
|
Determine the primary service to pitch based on highest margin.
|
|
|
|
Args:
|
|
signals: Dictionary of detected signals
|
|
|
|
Returns:
|
|
Primary service name
|
|
"""
|
|
if not signals:
|
|
return None
|
|
|
|
# Margin priority: highest > high > medium > low
|
|
margin_priority = {'highest': 4, 'high': 3, 'medium': 2, 'low': 1}
|
|
|
|
best_service = None
|
|
best_margin = 0
|
|
best_weight = 0
|
|
|
|
for signal_key, signal_data in signals.items():
|
|
signal_info = signal_data.get('signal_info', PAIN_SIGNALS.get(signal_key, {}))
|
|
service = signal_info.get('service', 'General Digital Services')
|
|
margin = signal_info.get('margin', 'low')
|
|
weight = signal_info.get('weight', 5)
|
|
|
|
margin_score = margin_priority.get(margin, 1)
|
|
|
|
# Prefer higher margin, then higher weight
|
|
if (margin_score > best_margin or
|
|
(margin_score == best_margin and weight > best_weight)):
|
|
best_margin = margin_score
|
|
best_weight = weight
|
|
best_service = service
|
|
|
|
return best_service
|
|
|
|
|
|
def format_pain_summary(pain_data):
|
|
"""
|
|
Format pain data as human-readable summary.
|
|
|
|
Args:
|
|
pain_data: Dictionary from detect_pain_signals()
|
|
|
|
Returns:
|
|
Formatted string
|
|
"""
|
|
if not pain_data['signals']:
|
|
return "No pain signals detected"
|
|
|
|
lines = [
|
|
f"Pain Score: {pain_data['pain_score']}/100 ({pain_data['confidence']} confidence)",
|
|
f"Primary Service: {pain_data['primary_service'] or 'None'}",
|
|
f"Signals Detected: {pain_data['signal_count']}",
|
|
"",
|
|
"Details:"
|
|
]
|
|
|
|
for signal_key, signal_data in pain_data['signals'].items():
|
|
signal_info = signal_data.get('signal_info', {})
|
|
description = signal_info.get('description', signal_key)
|
|
count = signal_data.get('count', 1)
|
|
|
|
lines.append(f" - {description} (x{count})")
|
|
|
|
# Add example if available
|
|
examples = signal_data.get('examples', [])
|
|
if examples:
|
|
example = examples[0]
|
|
text = example.get('text', '')[:100]
|
|
lines.append(f" Example: \"{text}...\"")
|
|
|
|
return '\n'.join(lines)
|