"""
Utility functions for Link Checker package
"""

import os
import time
import random
import subprocess
import requests
from urllib.parse import urlparse
from .exceptions import ProxyError, ContentFetchError

def get_proxy(config):
    """
    Get proxy from API or file with retry
    
    Args:
        config (dict): Configuration containing proxy settings
    
    Returns:
        str: Proxy string (ip:port)
    """
    # Try API with 3 retries
    for attempt in range(3):
        try:
            response = requests.get(
                "https://proxy.webshare.io/api/proxy/list?page=1",
                headers={"Authorization": f"Token {config['api']['proxy_api_token']}"}, 
                timeout=10
            )
            response.raise_for_status()
            results = response.json()
            proxy_details = random.choice(results['results'])
            proxy_address = proxy_details['proxy_address']
            proxy_port = str(proxy_details['ports']['http'])
            proxy = proxy_address.strip() + ':' + proxy_port
            if proxy == "unknown" or not proxy:
                raise ProxyError("Received empty proxy from API")
            return proxy
        except Exception as e:
            if attempt < 2:
                continue  # retry again
            else:
                print(f"API proxy fetch failed after 3 attempts: {e}")

    # Fallback to file
    try:
        with open('/opt/aparser/files/proxy/proxy.txt') as f:
            lines = f.read().splitlines()
        proxy = random.choice(lines)
        return proxy.strip()
    except Exception as e:
        raise ProxyError(f"Failed to get proxy from both API and file: {e}")

def get_content(command, link_id, timeout=30):
    """
    Get content using scraper command with timeout
    
    Args:
        command (str): Scraper command
        link_id (str): Link ID for file
        timeout (int): Timeout in seconds (default: 30)
    
    Returns:
        str: Content string, or "TIMEOUT" if timeout occurred
    """
    try:
        # Use timeout parameter for subprocess
        subprocess.check_output(command, shell=True, timeout=timeout)
    except subprocess.TimeoutExpired:
        print(f"Scraper timed out after {timeout} seconds")
        # Clean up the file if it exists
        try:
            if os.path.exists(link_id):
                subprocess.check_output("rm " + link_id, shell=True)
        except:
            pass
        return "TIMEOUT"
    except Exception as e:
        print(f"Scraper command failed: {e}")
        return ""
    
    try:
        file_size = os.stat(link_id).st_size
        if file_size > 10000000:
            subprocess.check_output("rm " + link_id, shell=True)
            return ""
        
        content = open(link_id).read()
    except Exception:
        try:
            content = open(link_id, encoding="ISO-8859-1").read()
        except Exception:
            return ""
    
    try:
        subprocess.check_output("rm " + link_id, shell=True)
    except:
        pass
    
    return content

def test_content(content):
    """
    Test content and extract HTTP codes and redirects
    
    Args:
        content (str): Raw content from scraper
    
    Returns:
        list: [status_code, redirect_url, [all_codes]]
    """
    try:
        sub_content = content[0:3000]
        sub_contents = sub_content.split("\n")
    except Exception:
        return ['200', 'null', ['200']]
    
    codes = []
    res_url = 'null'
    code = ""
    
    for line in sub_contents:
        if line:
            if 'HTTP' in line:
                line_parts = line.split()
                try:
                    code_ = line_parts[1].strip()
                    if len(code_) < 4:
                        code = code_
                        codes.append(code)
                except:
                    pass
            if 'Location' in line or 'location:' in line:
                try:
                    line_parts2 = line.split()
                    res_url = line_parts2[1].strip()
                    
                    o = urlparse(res_url)
                    if o.path == '/' or len(o.path) < 2:
                        return ['300', res_url, codes]
                    
                    if 'rapidgator' in res_url:
                        return ['301', res_url, codes]
                    if 'redirect.se' in res_url:
                        return ['205', res_url, codes]
                    # if '.onion' in res_url:
                    #     return ['206', res_url, codes]
                except Exception:
                    pass
    
    if len(codes) == 0:
        return ['200', 'null', codes]
    
    if code == '301':
        if res_url:
            return ['301', res_url, codes]
    
    if code == '302':
        return ['301', res_url, codes]
    
    # Return the detected code - let the analysis logic handle it based on settings
    return [code, 'null', codes]
    
    return ['200', 'null', codes]

def merge_settings(website_settings, content_type_settings, global_settings):
    """
    Merge settings in hierarchical order: website > content_type > global
    
    Args:
        website_settings (dict): Website-specific settings
        content_type_settings (dict): Content type settings
        global_settings (dict): Global settings
    
    Returns:
        dict: Merged settings
    """
    merged = {
        'check_http_code': False,
        'http_codes': None,
        'check_stop_words': False,
        'stop_words': None,
        'homepage_redirect': False,
        'check_stop_words_on_page': False,
        'use_selenium': False,
        'use_playwright': False
    }
    
    # Apply global settings first (lowest priority)
    if global_settings:
        if global_settings.get('http_code'):
            merged['check_http_code'] = True
            merged['http_codes'] = ','.join(global_settings['http_code'])
        if global_settings.get('stopword'):
            merged['check_stop_words'] = True
            merged['stop_words'] = ','.join(global_settings['stopword'])
        if global_settings.get('redirect'):
            merged['homepage_redirect'] = True
        # Note: skip_code is handled separately in analysis, not merged into settings
    
    # Apply content type settings (medium priority)
    if content_type_settings:
        if content_type_settings.get('check_http_code') is not None:
            merged['check_http_code'] = content_type_settings['check_http_code']
        if content_type_settings.get('http_codes'):
            merged['http_codes'] = content_type_settings['http_codes']
        if content_type_settings.get('check_stop_words') is not None:
            merged['check_stop_words'] = content_type_settings['check_stop_words']
        if content_type_settings.get('stop_words'):
            merged['stop_words'] = content_type_settings['stop_words']
        if content_type_settings.get('homepage_redirect') is not None:
            merged['homepage_redirect'] = content_type_settings['homepage_redirect']
        if content_type_settings.get('check_stop_words_on_page') is not None:
            merged['check_stop_words_on_page'] = content_type_settings['check_stop_words_on_page']
        if content_type_settings.get('use_selenium') is not None:
            merged['use_selenium'] = content_type_settings['use_selenium']
        if content_type_settings.get('use_playwright') is not None:
            merged['use_playwright'] = content_type_settings['use_playwright']
    
    # Apply website settings last (highest priority)
    if website_settings:
        if website_settings.get('check_http_code') is not None:
            merged['check_http_code'] = website_settings['check_http_code']
        if website_settings.get('http_codes'):
            merged['http_codes'] = website_settings['http_codes']
        if website_settings.get('check_stop_words') is not None:
            merged['check_stop_words'] = website_settings['check_stop_words']
        if website_settings.get('stopwords'):
            merged['stop_words'] = website_settings['stopwords']
        
        # Special logic for homepage_redirect
        ws_val = website_settings.get('homepage_redirect')
        if ws_val in [1, '1', True, 'true']:
            merged['homepage_redirect'] = True
        elif ws_val in [0, '0', False, 'false', None, '', 'None']:
            pass
        
        if website_settings.get('check_stop_words_on_page') is not None:
            merged['check_stop_words_on_page'] = website_settings['check_stop_words_on_page']
        if website_settings.get('use_selenium') is not None:
            merged['use_selenium'] = website_settings['use_selenium']
        if website_settings.get('use_playwright') is not None:
            merged['use_playwright'] = website_settings['use_playwright']
    
    return merged

def parse_url(url):
    """
    Parse URL and extract display link
    
    Args:
        url (str): URL to parse
    
    Returns:
        str: Display link
    """
    o = urlparse(url)
    display_links = o.netloc.split('.')
    
    if len(display_links) > 2:
        displays = ''
        for alink in display_links:
            if alink != 'www':
                displays += alink + '.'
        display_link = displays.strip('.')
    else:
        if o.netloc.startswith('www'):
            display_link = o.netloc.strip('www.')
        else:
            display_link = o.netloc.strip()
    
    return display_link