#!/usr/bin/env python3
"""
Link verification adapter using Link Checker Package
This script returns simple status codes (200/404/etc.) for testing
Based on web10-old/app.py
"""
import sys
import os
import argparse
import io
import contextlib

# Get the directory where this script is located
script_dir = os.path.dirname(os.path.abspath(__file__))
link_checker_dir = os.path.join(script_dir, 'link_checker_package')

# Add link_checker_package to Python path
if os.path.isdir(link_checker_dir):
    sys.path.insert(0, link_checker_dir)
    # Also add parent directory in case it's a package
    sys.path.insert(0, script_dir)
else:
    print(f"error\nlink_checker_package directory not found at: {link_checker_dir}\nPlease clone the repository first.")
    sys.exit(1)

try:
    from link_checker_package import LinkChecker
    from link_checker_package.exceptions import LinkCheckerError
except ImportError as e:
    print(f"error\nFailed to import link_checker_package: {e}\nPlease ensure link_checker_package is installed.\nPython path: {sys.path}")
    sys.exit(1)

class LinkVerificationService(LinkChecker):
    """
    Link verification service that returns simple status codes
    Based on web10-old/app.py
    """
    
    def __init__(self):
        super().__init__(server_type='linkverification')
    
    def check_link_status(self, url, project_id=None):
        """
        Check a link and return simple status code
        
        Args:
            url (str): URL to check
            project_id (int): Optional project ID
        
        Returns:
            str: Status code ('200', '404', '403', '429', '503', '500')
        """
        try:
            # Ensure database connection
            if not self.cur:
                self.connect()
            
            result = self.check_link(url, project_id=project_id)
            
            # Return simple status codes based on analysis
            if result['analysis'].get('is_deleted'):
                return '404'
            elif result.get('status') == 'maintenance':
                return '503'
            elif result.get('status') == 'rate_limited':
                return '429'
            elif result.get('status') == 'temporary_error':
                return '500'
            elif result.get('status') == 'error':
                return '500'
            else:
                # Return actual HTTP code if available, otherwise 200
                http_code = result['analysis'].get('http_code', 200)
                return str(http_code) if http_code else '200'
                
        except LinkCheckerError as e:
            return '500'
        except Exception as e:
            import traceback
            traceback.print_exc(file=sys.stderr)
            return '500'
    
    def check_link_detailed(self, url, project_id=None):
        """
        Check a link with detailed analysis for testing purposes
        Returns detailed result with status code
        """
        # Capture stdout to suppress debug print statements from link_checker_package
        old_stdout = sys.stdout
        captured_stdout = io.StringIO()
        sys.stdout = captured_stdout
        
        try:
            # Ensure database connection
            if not self.cur:
                try:
                    self.connect()
                except Exception as db_error:
                    # If database connection fails, still try to check the link
                    pass
            
            # Check link using the parent class method
            try:
                result = self.check_link(url, project_id=project_id)
            except Exception as check_error:
                # Restore stdout before handling error
                sys.stdout = old_stdout
                # If check_link fails, return error result
                error_msg = str(check_error)
                # Extract meaningful error message (first line of traceback)
                if '\n' in error_msg:
                    error_msg = error_msg.split('\n')[0]
                
                return {
                    'url': url,
                    'status': 'error',
                    'status_code': '500',
                    'http_code': 500,
                    'error': error_msg,
                    'content_length': 0,
                    'reason': f'Link check failed: {error_msg}',
                    'raw_content': ''
                }
            
            # Restore stdout
            sys.stdout = old_stdout
            
            # Determine status code based on result
            http_code = result.get('analysis', {}).get('http_code', 200)
            
            # Map status to HTTP code if needed
            if result.get('analysis', {}).get('is_deleted'):
                status_code = '404'
                http_code = 404
            elif result.get('status') == 'maintenance':
                status_code = '503'
                http_code = 503
            elif result.get('status') == 'rate_limited':
                status_code = '429'
                http_code = 429
            elif result.get('status') == 'temporary_error':
                status_code = '500'
                http_code = 500
            elif result.get('status') == 'error':
                status_code = '500'
                http_code = 500
            else:
                status_code = str(http_code) if http_code else '200'
            
            detailed_result = {
                'url': url,
                'display_link': result.get('display_link', url),
                'status': result.get('status', 'success'),
                'http_code': http_code,
                'status_code': status_code,
                'redirect_url': result.get('analysis', {}).get('redirect_url'),
                'reason': result.get('analysis', {}).get('reason', ''),
                'is_deleted': result.get('analysis', {}).get('is_deleted', False),
                'content_length': len(result.get('content', '')),
                'raw_content': result.get('content', '')
            }
            
            return detailed_result
            
        except LinkCheckerError as e:
            # Restore stdout before handling error
            sys.stdout = old_stdout
            error_msg = str(e)
            if '\n' in error_msg:
                error_msg = error_msg.split('\n')[0]
            return {
                'url': url,
                'status': 'error',
                'status_code': '500',
                'http_code': 500,
                'error': error_msg,
                'content_length': 0,
                'reason': f'Link check failed: {error_msg}',
                'raw_content': ''
            }
        except Exception as e:
            # Restore stdout before handling error
            sys.stdout = old_stdout
            # Catch all other exceptions and format them properly
            error_msg = str(e)
            # Extract meaningful error message (avoid full traceback in output)
            if '\n' in error_msg:
                # Get the last meaningful line (usually the error message)
                lines = error_msg.split('\n')
                for line in reversed(lines):
                    if line.strip() and not line.strip().startswith('File'):
                        error_msg = line.strip()
                        break
                else:
                    error_msg = lines[0] if lines else str(e)
            
            # Print full traceback to stderr for debugging
            import traceback
            traceback.print_exc(file=sys.stderr)
            
            return {
                'url': url,
                'status': 'error',
                'status_code': '500',
                'http_code': 500,
                'error': error_msg,
                'content_length': 0,
                'reason': f'Unexpected error: {error_msg}',
                'raw_content': ''
            }

def main():
    parser = argparse.ArgumentParser(description='Check a link using link_checker_package (based on web10-old/app.py)')
    parser.add_argument('--url', required=True, help='URL to check')
    parser.add_argument('--project-id', type=int, help='Optional project ID')
    parser.add_argument('--detailed', action='store_true', help='Return detailed result instead of just status code')
    
    args = parser.parse_args()
    
    # Redirect both stdout and stderr to capture all output
    stdout_capture = io.StringIO()
    stderr_capture = io.StringIO()
    
    try:
        # Capture all output to prevent debug messages from appearing
        with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
            service = LinkVerificationService()
            
            if args.detailed:
                # Return detailed result
                result = service.check_link_detailed(args.url, args.project_id)
            else:
                # Return simple status code
                status_code = service.check_link_status(args.url, args.project_id)
                result = {
                    'status_code': status_code,
                    'status': status_code,
                    'reason': '',
                    'raw_content': ''
                }
        
        # Get captured output (debug messages go here, not to final output)
        captured_stdout = stdout_capture.getvalue()
        captured_stderr = stderr_capture.getvalue()
        
        if args.detailed:
            # Output in the format expected by the Laravel controller
            # Format: status\nreason\nraw_content
            status = result.get('status_code', result.get('status', 'error'))
            reason = result.get('reason', '') or ''
            raw_content = result.get('raw_content', '') or ''
            
            # Clean up reason - remove debug messages and tracebacks
            if 'Website settings' in reason or 'has_settings' in reason:
                # This is a debug message, not the actual reason
                reason = result.get('analysis', {}).get('reason', '') if 'analysis' in result else ''
            
            if 'Traceback' in reason or 'File "' in reason:
                # Extract just the error message, not the traceback
                lines = reason.split('\n')
                clean_reason = []
                for line in lines:
                    line_stripped = line.strip()
                    if (not line_stripped.startswith('File "') and 
                        not line_stripped.startswith('Traceback') and 
                        not 'line' in line_stripped.lower() and
                        not line_stripped.startswith('  ')):
                        if line_stripped and not line_stripped.startswith('File'):
                            clean_reason.append(line_stripped)
                reason = ' | '.join(clean_reason) if clean_reason else 'Error occurred during link check'
            
            # If we have stderr output but no reason, use a generic error message
            if captured_stderr and not reason and status == 'error':
                # Try to extract error from stderr
                stderr_lines = captured_stderr.split('\n')
                for line in reversed(stderr_lines):
                    if line.strip() and not line.strip().startswith('File') and 'Error' in line:
                        reason = line.strip()
                        break
                if not reason:
                    reason = 'Error occurred during link check (see logs for details)'
            
            # Only print the three lines to stdout (no debug messages)
            # This is the exact format the Laravel controller expects
            sys.stdout.write(f"{status}\n{reason}\n{raw_content}\n")
            sys.stdout.flush()
        else:
            # Return simple status code (like web10-old/app.py)
            status_code = result.get('status_code', '500')
            sys.stdout.write(f"{status_code}\n")
            sys.stdout.flush()
            
    except Exception as e:
        # If anything fails, output error in expected format
        error_msg = str(e)
        # Clean error message
        if '\n' in error_msg:
            error_msg = error_msg.split('\n')[0]
        
        # Get stderr for debugging
        captured_stderr = stderr_capture.getvalue()
        captured_stdout = stdout_capture.getvalue()
        
        # Write error to stderr for logging (but not to stdout)
        sys.stderr.write(f"Error in check_link_cli: {error_msg}\n")
        if captured_stderr:
            sys.stderr.write(f"Stderr output: {captured_stderr}\n")
        if captured_stdout:
            sys.stderr.write(f"Stdout output: {captured_stdout}\n")
        
        # Output clean error to stdout
        sys.stdout.write(f"error\n{error_msg}\n\n")
        sys.stdout.flush()
        sys.exit(1)

if __name__ == "__main__":
    main()

