#!/usr/bin/env python3
"""
Log Processor for Fraud Detection
Analyzes NGINX logs to detect unregistered usage and suspicious patterns
"""

import os
import json
import time
import logging
from datetime import datetime, timedelta
from collections import defaultdict
from typing import Dict, List, Any

import requests
import redis

logging.basicConfig(
    level=os.getenv('LOG_LEVEL', 'INFO'),
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class LogProcessor:
    """Processes NGINX logs for fraud detection"""
    
    def __init__(self):
        self.backend_url = os.getenv('BACKEND_URL', 'http://localhost:8000')
        self.service_token = os.getenv('SERVICE_TOKEN', '')
        self.redis_host = os.getenv('REDIS_HOST', 'redis')
        self.log_path = '/var/log/nginx/ultrawhisper.log'
        
        # Redis connection
        self.redis_client = redis.Redis(
            host=self.redis_host,
            port=6379,
            db=0,
            decode_responses=True
        )
        
        # Tracking
        self.processed_logs = set()
        self.fraud_patterns = defaultdict(list)
        
    def parse_log_line(self, line: str) -> Optional[Dict[str, Any]]:
        """Parse JSON log line"""
        try:
            return json.loads(line)
        except json.JSONDecodeError:
            return None
    
    def analyze_logs(self):
        """Analyze logs for suspicious patterns"""
        try:
            # Read log file
            if not os.path.exists(self.log_path):
                logger.warning(f"Log file not found: {self.log_path}")
                return
            
            with open(self.log_path, 'r') as f:
                lines = f.readlines()
            
            # Parse and group by user/device
            usage_by_user = defaultdict(list)
            usage_by_device = defaultdict(list)
            
            for line in lines:
                log_entry = self.parse_log_line(line)
                if not log_entry:
                    continue
                
                # Skip if already processed
                request_id = log_entry.get('request_id')
                if request_id in self.processed_logs:
                    continue
                
                self.processed_logs.add(request_id)
                
                # Group by user and device
                user_id = log_entry.get('user_id')
                device_id = log_entry.get('device_id')
                
                if user_id:
                    usage_by_user[user_id].append(log_entry)
                if device_id:
                    usage_by_device[device_id].append(log_entry)
            
            # Detect patterns
            self.detect_fraud_patterns(usage_by_user, usage_by_device)
            
        except Exception as e:
            logger.error(f"Error analyzing logs: {e}")
    
    def detect_fraud_patterns(self, by_user: Dict, by_device: Dict):
        """Detect suspicious usage patterns"""
        
        # Pattern 1: Excessive requests from single device
        for device_id, logs in by_device.items():
            recent_logs = self.get_recent_logs(logs, hours=1)
            
            if len(recent_logs) > 100:  # More than 100 requests/hour
                self.report_suspicious_activity({
                    'type': 'excessive_requests',
                    'device_id': device_id,
                    'count': len(recent_logs),
                    'period': '1_hour'
                })
        
        # Pattern 2: Multiple devices for same user in short time
        for user_id, logs in by_user.items():
            devices = set(log.get('device_id') for log in logs)
            recent_logs = self.get_recent_logs(logs, minutes=10)
            recent_devices = set(log.get('device_id') for log in recent_logs)
            
            if len(recent_devices) > 3:  # More than 3 devices in 10 minutes
                self.report_suspicious_activity({
                    'type': 'multiple_devices',
                    'user_id': user_id,
                    'device_count': len(recent_devices),
                    'devices': list(recent_devices),
                    'period': '10_minutes'
                })
        
        # Pattern 3: Rapid model switching (potential abuse)
        for user_id, logs in by_user.items():
            recent_logs = self.get_recent_logs(logs, minutes=5)
            models_used = [log.get('model') for log in recent_logs if log.get('model')]
            
            if len(set(models_used)) > 5:  # Using >5 different models in 5 minutes
                self.report_suspicious_activity({
                    'type': 'model_switching',
                    'user_id': user_id,
                    'models': list(set(models_used)),
                    'count': len(models_used),
                    'period': '5_minutes'
                })
    
    def get_recent_logs(self, logs: List[Dict], hours: int = 0, minutes: int = 0) -> List[Dict]:
        """Filter logs by recency"""
        cutoff = datetime.utcnow() - timedelta(hours=hours, minutes=minutes)
        recent = []
        
        for log in logs:
            timestamp_str = log.get('timestamp')
            if timestamp_str:
                try:
                    timestamp = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
                    if timestamp > cutoff:
                        recent.append(log)
                except:
                    pass
        
        return recent
    
    def compare_with_backend(self):
        """Compare proxy logs with backend registration"""
        try:
            # Get usage counts from Redis
            proxy_counts = {}
            
            for key in self.redis_client.scan_iter("usage:counters:*"):
                user_id = key.split(':')[-1]
                data = self.redis_client.hgetall(key)
                proxy_counts[user_id] = {
                    'total_requests': int(data.get('total_requests', 0)),
                    'total_credits': int(data.get('total_credits', 0))
                }
            
            # Send to backend for comparison
            response = requests.post(
                f"{self.backend_url}/api/usage/compare",
                json={'proxy_usage': proxy_counts},
                headers={
                    'X-Service-Token': self.service_token,
                    'Content-Type': 'application/json'
                },
                timeout=10
            )
            
            if response.status_code == 200:
                discrepancies = response.json().get('discrepancies', [])
                
                for discrepancy in discrepancies:
                    self.report_suspicious_activity({
                        'type': 'usage_discrepancy',
                        'user_id': discrepancy['user_id'],
                        'proxy_count': discrepancy['proxy_count'],
                        'backend_count': discrepancy['backend_count'],
                        'difference': discrepancy['difference']
                    })
            
        except Exception as e:
            logger.error(f"Error comparing with backend: {e}")
    
    def report_suspicious_activity(self, activity: Dict[str, Any]):
        """Report suspicious activity to backend"""
        try:
            # Store in Redis for immediate action
            key = f"fraud:alert:{activity['type']}:{time.time()}"
            self.redis_client.setex(key, 86400, json.dumps(activity))
            
            # Send to backend
            response = requests.post(
                f"{self.backend_url}/api/fraud/report",
                json=activity,
                headers={
                    'X-Service-Token': self.service_token,
                    'Content-Type': 'application/json'
                },
                timeout=5
            )
            
            if response.status_code == 200:
                logger.warning(f"Fraud alert reported: {activity['type']} for {activity.get('user_id') or activity.get('device_id')}")
            
        except Exception as e:
            logger.error(f"Failed to report suspicious activity: {e}")
    
    def run(self):
        """Main processing loop"""
        logger.info("Starting log processor for fraud detection")
        
        while True:
            try:
                # Analyze logs every minute
                self.analyze_logs()
                
                # Compare with backend every 5 minutes
                if int(time.time()) % 300 == 0:
                    self.compare_with_backend()
                
                # Clean old processed logs periodically
                if len(self.processed_logs) > 10000:
                    self.processed_logs.clear()
                
                time.sleep(60)  # Check every minute
                
            except KeyboardInterrupt:
                logger.info("Shutting down log processor...")
                break
            except Exception as e:
                logger.error(f"Processing error: {e}")
                time.sleep(60)

if __name__ == "__main__":
    processor = LogProcessor()
    processor.run()