"""
Content fetching methods for Link Checker package
"""

import time
import subprocess
import random
import json
import requests
from urllib.parse import urlparse
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from playwright.sync_api import sync_playwright
from curl_cffi import requests as curl_requests
from .exceptions import ContentFetchError, SocialMediaError
from .utils import get_proxy

# Import colorama for colored output
try:
    from colorama import Fore, Style
except ImportError:
    # Fallback if colorama is not available
    class Fore:
        YELLOW = ""
        GREEN = ""
        RED = ""
    class Style:
        RESET_ALL = ""

def selenium_content(useragent, url, proxy, cloudflare, config):
    """
    Fetch content using Selenium
    
    Args:
        useragent (str): User agent string
        url (str): URL to fetch
        proxy (str): Proxy string
        cloudflare (bool): Whether to handle Cloudflare
        config (dict): Configuration
    
    Returns:
        list: [status, content, current_url]
    """
    try:
        options = webdriver.ChromeOptions()
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-gpu')
        options.add_argument('headless')
        options.add_argument("window-size=1280,800")
        if proxy and proxy != "unknown":    
            options.add_argument('--proxy=' + "http://" + proxy)
        options.add_argument('--user-agent=' + useragent)
        options.add_argument('--no-first-run --no-service-autorun --password-store=basic')
        options.add_experimental_option("excludeSwitches", ["enable-automation"])
        options.add_experimental_option('useAutomationExtension', False)
        options.add_argument('--disable-blink-features=AutomationControlled')
        
        s = Service(config['paths']['chromedriver_path'])
        driver = webdriver.Chrome(service=s, options=options)
        driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
        
        driver.set_page_load_timeout(60)
        driver.get(url)
        time.sleep(20)
        
        title = driver.title
        content = driver.page_source
        current_url = driver.current_url
        driver.quit()
        
        if cloudflare:
            if 'Access denied' in title or 'Cloudflare' in title or 'Just a moment' in title:
                # Try Aparser for "Just a moment..." cases
                if 'Just a moment' in title:
                    print(Fore.YELLOW + "Just a moment... detected, trying Aparser" + Style.RESET_ALL)
                    aparser_content = AparserLinkExtractor(url)
                    if aparser_content and len(aparser_content) > 50:
                        print(Fore.GREEN + "Aparser successfully extracted content" + Style.RESET_ALL)
                        return ['200', aparser_content, current_url]
                    else:
                        print(Fore.RED + "Aparser failed, returning 403" + Style.RESET_ALL)
                        return ["403", '', '']
                else:
                    return ["403", '', '']
        
        if 'not found' in title or 'Erreur' in title or 'Error' in title:
            return ['404', '', '']
        if '404' in title:
            return ['404', '', '']
        if '429' in title:
            return ['429', '', '']
        
        if len(content) < 50:
            return ['600', '', '']
        
        o = urlparse(current_url)
        if o.path == '/' or len(o.path) < 2:
            return ['300', content, current_url]
        
        return ['200', content, current_url]
        
    except Exception as e:
        raise ContentFetchError(f"Selenium content fetch failed: {e}")

def playwright_content(useragent, url, cur, con, proxy, cloudflare, config):
    """
    Fetch content using Playwright
    
    Args:
        useragent (str): User agent string
        url (str): URL to fetch
        cur: Database cursor
        con: Database connection
        proxy (str): Proxy string
        cloudflare (bool): Whether to handle Cloudflare
        config (dict): Configuration
    
    Returns:
        list: [status, content, current_url]
    """
    try:
        proxy_to_use = {
            'server': 'http://' + proxy,
        }
        
        # Get location data
        location = get_location(cur, con, proxy)
        lat = location['latitude']
        lng = location['longitude']
        timezone = location['timezone']
        locale = location['locale']
        
        with sync_playwright() as p:
            if proxy=="unknown" or not proxy:
                browser = p.firefox.launch()
            else:
                browser = p.firefox.launch(proxy=proxy_to_use)
            
            if lat is None or lng is None:
                context = browser.new_context(
                    user_agent=useragent,
                    locale='en-US',
                    ignore_https_errors=True
                    # extra_http_headers={
                    #     "Accept-Language": locale
                    # }
                )
            else:
                context = browser.new_context(
                    user_agent=useragent,
                    timezone_id=timezone,
                    locale='en-US',
                    geolocation={"longitude": float(lng), "latitude": float(lat)},
                    permissions=["geolocation"],
                    ignore_https_errors=True,
                    extra_http_headers={
                        "Accept-Language": locale
                    }
                )
            
            page = context.new_page()
            page.set_default_timeout(60000)
            
            try:
                page.goto(url, wait_until="load")
                # time.sleep(20)
            except Exception as e:
                if 'SSL_ERROR_UNKNOWN' in str(e):
                    page.close()
                    browser.close()
                    return ['', '']
                elif 'NS_ERROR_PROXY_FORBIDDEN' in str(e):
                    return ['403', '', '']
                elif 'Timeout' in str(e):
                    page.close()
                    browser.close()
                    return selenium_content(useragent, url, proxy, cloudflare, config)
                else:
                    page.close()
                    browser.close()
                    return ['200', '', '']
            
            title = page.title()
            current_url = page.url
            content = page.content()
            page.close()
            browser.close()
            
            if cloudflare:
                if 'Access denied' in title or 'Cloudflare' in title or '403' in title or 'Just a moment' in title:
                    # Try Aparser for "Just a moment..." cases
                    if 'Just a moment' in title:
                        print(Fore.YELLOW + "Just a moment... detected, trying Aparser" + Style.RESET_ALL)
                        aparser_content = AparserLinkExtractor(url)
                        if aparser_content and len(aparser_content) > 50:
                            print(Fore.GREEN + "Aparser successfully extracted content" + Style.RESET_ALL)
                            return ['200', aparser_content, current_url]
                        else:
                            print(Fore.RED + "Aparser failed, returning 403" + Style.RESET_ALL)
                            return ["403", '', '']
                    else:
                        return ["403", '', '']
            
            if '/search?q=' in current_url:
                return ['200', '', current_url]
            
            o = urlparse(current_url)
            if o.path == '/' or len(o.path) < 2:
                return ['300', content, current_url]
            
            if len(content) < 200:
                return selenium_content(useragent, url, proxy, cloudflare, config)
            
            return ['200', content, current_url]
            
    except Exception as e:
        raise ContentFetchError(f"Playwright content fetch failed: {e}")

def process_instagram(useragent, url, insta_user, config):
    """
    Process Instagram URLs
    
    Args:
        useragent (str): User agent string
        url (str): URL to fetch
        insta_user (dict): Instagram user data
        config (dict): Configuration
    
    Returns:
        str: Content string
    """
    try:
        proxy = get_proxy(config)
        proxy_to_use = {
            'server': 'http://' + proxy,
        }
        
        cookies = insta_user['cookies']
        new_cookies = '{"cookies": ' + cookies + '}'
        
        cookies_file = 'insta_cookies.json'
        with open(cookies_file, 'w') as fh:
            fh.write(new_cookies)
        
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True, proxy=proxy_to_use)
            context = browser.new_context(
                user_agent=useragent,
                storage_state=cookies_file,
                locale='en-US',
                timezone_id="Europe/Tallinn"
            )
            context.tracing.start(screenshots=True, snapshots=True, sources=True)
            page = context.new_page()
            page.set_default_timeout(120000)
            
            try:
                page.goto(url)
                time.sleep(10)
            except Exception as e:
                if 'Timeout' in str(e):
                    page.close()
                    browser.close()
                    return ""
            
            title = page.title()
            current_url = page.url
            content = page.content()
            page.close()
            browser.close()
            
            return content
            
    except Exception as e:
        raise SocialMediaError(f"Instagram processing failed: {e}")

def process_facebook(useragent, cur, url, fb_user, config):
    """
    Process Facebook URLs
    
    Args:
        useragent (str): User agent string
        cur: Database cursor
        url (str): URL to fetch
        fb_user (dict): Facebook user data
        config (dict): Configuration
    
    Returns:
        str: Content string
    """
    try:
        proxy = get_proxy(config)
        proxy_to_use = {
            'server': 'http://' + proxy,
        }
        
        cookies = fb_user['cookies']
        new_cookies = '{"cookies": ' + cookies + '}'
        
        cookies_file = 'facebook_cookies.json'
        with open(cookies_file, 'w') as fh:
            fh.write(new_cookies)
        
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True, proxy=proxy_to_use)
            context = browser.new_context(
                user_agent=useragent,
                storage_state=cookies_file,
                locale='en-US',
                timezone_id="Europe/Tallinn"
            )
            context.tracing.start(screenshots=True, snapshots=True, sources=True)
            page = context.new_page()
            page.set_default_timeout(120000)
            
            try:
                page.goto(url)
                time.sleep(20)
            except Exception as e:
                page.close()
                browser.close()
                return ""
            
            title = page.title()
            current_url = page.url
            content = page.content()
            
            if 'Log into Facebook' in content or "logout.php" not in content:
                return ''
            
            page.close()
            browser.close()
            
            return content
            
    except Exception as e:
        raise SocialMediaError(f"Facebook processing failed: {e}")

def process_tiktok(useragent, url, tiktok_user, config):
    """
    Process TikTok URLs
    
    Args:
        useragent (str): User agent string
        url (str): URL to fetch
        tiktok_user (dict): TikTok user data
        config (dict): Configuration
    
    Returns:
        str: Content string
    """
    try:
        proxy = get_proxy(config)
        proxy_to_use = {
            'server': 'http://' + proxy,
        }
        
        cookies = tiktok_user['cookies']
        new_cookies = '{"cookies": ' + cookies + '}'
        
        cookies_file = 'tiktok_cookies.json'
        with open(cookies_file, 'w') as fh:
            fh.write(new_cookies)
        
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True, proxy=proxy_to_use)
            context = browser.new_context(
                user_agent=useragent,
                storage_state=cookies_file,
                locale='en-US',
                timezone_id="Europe/Tallinn"
            )
            context.tracing.start(screenshots=True, snapshots=True, sources=True)
            page = context.new_page()
            page.set_default_timeout(120000)
            
            try:
                page.goto(url)
                time.sleep(10)
            except Exception as e:
                page.close()
                browser.close()
                return ""
            
            title = page.title()
            current_url = page.url
            content = page.content()
            page.close()
            browser.close()
            
            return content
            
    except Exception as e:
        raise SocialMediaError(f"TikTok processing failed: {e}")

def process_reddit(useragent, url, reddit_user, config):
    """
    Process Reddit URLs
    
    Args:
        useragent (str): User agent string
        url (str): URL to fetch
        reddit_user (dict): Reddit user data
        config (dict): Configuration
    
    Returns:
        str: Content string
    """
    try:
        proxy = get_proxy(config)
        proxy_to_use = {
            'server': 'http://' + proxy,
        }
        
        cookies = reddit_user['cookies']
        new_cookies = '{"cookies": ' + cookies + '}'
        
        cookies_file = 'reddit_cookies.json'
        with open(cookies_file, 'w') as fh:
            fh.write(new_cookies)
        
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True, proxy=proxy_to_use)
            context = browser.new_context(
                user_agent=useragent,
                storage_state=cookies_file,
                locale='en-US',
                timezone_id="Europe/Tallinn"
            )
            context.tracing.start(screenshots=True, snapshots=True, sources=True)
            page = context.new_page()
            page.set_default_timeout(120000)
            
            try:
                page.goto(url)
                time.sleep(10)
            except Exception as e:
                page.close()
                browser.close()
                return ""
            
            title = page.title()
            current_url = page.url
            content = page.content()
            page.close()
            browser.close()
            
            return content
            
    except Exception as e:
        raise SocialMediaError(f"Reddit processing failed: {e}")

def get_location(cur, con, proxy):
    """
    Get location data for proxy
    
    Args:
        cur: Database cursor
        con: Database connection
        proxy (str): Proxy string
    
    Returns:
        dict: Location data
    """
    try:
        sql = "SELECT * FROM proxy_locations WHERE proxy = %s"
        cur.execute(sql, [proxy])
        if cur.rowcount:
            proxy_data = cur.fetchall()
            response = proxy_data[0]
            loca = response['locale'].split(',')
            location_data = {
                "city": response['city'],
                "region": response['region'],
                "country": response['country'],
                "latitude": response['lat'],
                "longitude": response['lng'],
                "timezone": response['timezone'],
                "locale": loca[0].strip(),
                "proxy": proxy
            }
        else:
            location_data = {
                "city": None,
                "region": None,
                "country": None,
                "latitude": None,
                "longitude": None,
                "timezone": None,
                "locale": None
            }
        return location_data
    except Exception:
        return {
            "city": None,
            "region": None,
            "country": None,
            "latitude": None,
            "longitude": None,
            "timezone": None,
            "locale": None
        }

def fetch_proxy_from_file():
    """
    Fetch a random proxy from proxy file
    
    Returns:
        str: Proxy string (ip:port)
    """
    try:
        lines = open('/root/flask/proxies/proxy.txt').read().splitlines()
        proxy = random.choice(lines)
        return proxy.strip()
    except Exception as e:
        print(f"Error fetching proxy from file: {e}")
        # Fallback to default proxy paths
        try:
            lines = open('/opt/aparser/files/proxy/proxy.txt').read().splitlines()
            proxy = random.choice(lines)
            return proxy.strip()
        except Exception as e2:
            print(f"Error fetching proxy from fallback file: {e2}")
            return "127.0.0.1:8080"

def get_user_agent_from_script():
    """
    Get user agent from Node.js script
    
    Returns:
        str: User agent string
    """
    command = "node /root/user_agents.js"
    try:
        useragent = subprocess.check_output(command, shell=True)
        useragent = useragent.decode("utf-8").strip()
        return useragent
    except BaseException as e:
        print(f"Error getting user agent: {e}")
        return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"

def curl_cffi_content(url, useragent=None, proxy=None, max_retries=3):
    """
    Fetch content using curl_cffi with proxy support and retry logic
    
    Args:
        url (str): URL to fetch
        useragent (str): User agent string (optional)
        proxy (str): Proxy string (optional)
        max_retries (int): Maximum number of retries for 500 errors
    
    Returns:
        list: [status_code, content, final_url, redirect_detected]
    """
    print(Fore.YELLOW + f"Using curl_cffi for {url}" + Style.RESET_ALL)
    
    # Get proxy and user agent
    if not proxy:
        proxy = fetch_proxy_from_file()
    if not useragent:
        useragent = get_user_agent_from_script()
    
    proxy_url = f"http://{proxy}"
    proxies = {"http": proxy_url, "https": proxy_url}
    
    print(f"Using proxy: {proxy}")
    print(f"Using User-Agent: {useragent[:50]}...")
    
    # Headers for better compatibility
    headers = {
        "User-Agent": useragent,
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.9",
        "Accept-Encoding": "gzip, deflate, br",
        "Referer": "https://www.google.com/",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
        "Sec-Fetch-Dest": "document",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-Site": "none",
        "Cache-Control": "max-age=0"
    }
    
    session = curl_requests.Session()
    
    for attempt in range(max_retries):
        try:
            start_time = time.time()
            print(f"Fetching URL (attempt {attempt + 1}/{max_retries})...")
            
            response = session.get(
                url,
                proxies=proxies,
                headers=headers,
                verify=False,
                timeout=(60, 120),  # (connect timeout, read timeout)
                allow_redirects=True,
                stream=False,
                impersonate="chrome110"
            )
            
            elapsed_time = time.time() - start_time
            print(f"Request completed in {elapsed_time:.2f} seconds")
            print(f"Status: {response.status_code}")
            print(f"Final URL: {response.url}")
            
            # Check for home redirect
            redirect_detected = False
            original_parsed = urlparse(url)
            final_parsed = urlparse(str(response.url))
            
            # Detect home redirect: if original URL has a path (not homepage) and final URL is homepage
            # This works for same-domain and cross-domain redirects
            original_has_path = len(original_parsed.path) > 2
            final_is_homepage = (final_parsed.path == '/' or len(final_parsed.path) < 2)
            
            if original_has_path and final_is_homepage:
                redirect_detected = True
                if original_parsed.netloc != final_parsed.netloc:
                    print(f"Cross-domain home redirect detected: {url} -> {response.url}")
                else:
                    print(f"Home redirect detected: {url} -> {response.url}")
            
            # Handle different status codes
            if response.status_code == 500 and attempt < max_retries - 1:
                print(f"Got 500 status code, retrying... (attempt {attempt + 1}/{max_retries})")
                time.sleep(2)  # Wait before retry
                continue
            
            # Get content length info
            content_length = len(response.content)
            content_length_header = response.headers.get('Content-Length')
            
            print(f"Content length: {content_length} bytes")
            if content_length_header:
                expected_length = int(content_length_header)
                print(f"Expected length: {expected_length} bytes")
                if content_length < expected_length:
                    print(f"WARNING: Content may be incomplete! Got {content_length} bytes, expected {expected_length}")
            
            # Return result based on status code
            if response.status_code in [200, 201, 202]:
                return ['200', response.text, str(response.url), redirect_detected]
            elif response.status_code in [300, 301, 302, 303, 307, 308]:
                return ['300' if redirect_detected else str(response.status_code), response.text, str(response.url), redirect_detected]
            else:
                return [str(response.status_code), response.text, str(response.url), redirect_detected]
                
        except curl_requests.RequestsError as e:
            print(f"Request error (attempt {attempt + 1}): {e}")
            if attempt == max_retries - 1:
                return ['500', '', url, False]
            time.sleep(2)  # Wait before retry
            
        except Exception as e:
            print(f"Error (attempt {attempt + 1}): {e}")
            if attempt == max_retries - 1:
                import traceback
                traceback.print_exc()
                return ['500', '', url, False]
            time.sleep(2)  # Wait before retry
        
        finally:
            if 'session' in locals():
                try:
                    session.close()
                except:
                    pass
    
    return ['500', '', url, False]

def AparserLinkExtractor(url):
    """
    Extract content using Aparser when Cloudflare protection is detected
    
    Args:
        url (str): URL to extract content from
    
    Returns:
        str: Extracted content or empty string if failed
    """
    print(Fore.YELLOW + "USING APARSER " + url + Style.RESET_ALL)

    parser_url = 'http://pma6.remov.ee:9096/API'
    payload = {
        "action": "oneRequest",
        "data": {
            "parser": "Net::HTTP",
            "preset": "secondpack",
            "configPreset": "default",
            "query": url
        },
        "password": "tWCj3yvB4v4US2wjfrSC"
    }

    headers = {'content-type': 'application/text', 'Accept-Charset': 'UTF-8'}
    try:
        res = requests.post(parser_url, data=json.dumps(payload), headers=headers)
        response = res.json()
        # print(response)
        if response.get('success') == 1:
            print(Fore.GREEN + "AParser response received" + Style.RESET_ALL)
            return response['data']['resultString']
        else:
            print(Fore.RED + "AParser failed to fetch content" + Style.RESET_ALL)
            print(response)
            return ""
    except Exception as e:
        print(e)
        return ""