#!/usr/bin/python3

import json
import requests
import sqlite3
import sys
from difflib import SequenceMatcher
from fuzzywuzzy import fuzz, process
from wwwimgpull import *
import argparse

bods = ['a', 'c', 'w', 'm', 'cgl', 'cm', 'f', 'n', 'jp', 'vp', 'v', 'vg', 'vr', 'co', 'g', 'tv', 'k', 'o', 'an', 'tg', 'sp', 'asp', 'sci', 'int', 'out', 'toy', 'biz', 'i', 'po', 'p', 'ck', 'ic', 'wg', 'mu', 'fa', '3', 'gd', 'diy', 'wsg', 's', 'trv', 'fit', 'x', 'lit', 'adv', 'lgbt', 'mlp', 'b', 'r', 'r9k', 'pol', 'soc', 's4s']
abods = ['hc', 'hm', 'h', 'e', 'u', 'd', 'y', 't', 'hr', 'gif']

class FuzzySearchConfig:
    def __init__(self):
        self.min_ratio = 60  # Minimum similarity ratio (0-100)
        self.partial_ratio_weight = 0.7
        self.token_sort_weight = 0.3
        self.enable_partial = True
        self.enable_token_sort = True

def fuzzy_match(search_term, text, config):
    """
    Performs fuzzy matching with configurable algorithms
    Returns True if match is found, False otherwise
    """
    if not text or not search_term:
        return False
    
    # Handle wildcard
    if search_term == "*" or search_term == "":
        return True
    
    search_lower = search_term.lower()
    text_lower = text.lower()
    
    # Exact match (highest priority)
    if search_lower in text_lower:
        return True
    
    # Fuzzy matching using different algorithms
    scores = []
    
    # Basic ratio
    basic_ratio = fuzz.ratio(search_lower, text_lower)
    scores.append(basic_ratio)
    
    # Partial ratio (good for substring matching)
    if config.enable_partial:
        partial_ratio = fuzz.partial_ratio(search_lower, text_lower)
        scores.append(partial_ratio * config.partial_ratio_weight)
    
    # Token sort ratio (good for word order differences)
    if config.enable_token_sort:
        token_sort_ratio = fuzz.token_sort_ratio(search_lower, text_lower)
        scores.append(token_sort_ratio * config.token_sort_weight)
    
    # Token set ratio (handles duplicates and order)
    token_set_ratio = fuzz.token_set_ratio(search_lower, text_lower)
    scores.append(token_set_ratio)
    
    # Use the best score
    best_score = max(scores) if scores else 0
    
    return best_score >= config.min_ratio

def processCatalog(catalog, b, search_config):
    for i in range(0, len(catalog)): #each page of the board
        for j in range(0, len(catalog[i]['threads'])): #each OP on the page
            if not 'com' in catalog[i]['threads'][j]:
                continue
            url = "https://boards.4channel.org/"+b+"/thread/"+str(catalog[i]['threads'][j]['no'])
            
            # Use fuzzy matching instead of simple string containment
            if fuzzy_match(wod, catalog[i]['threads'][j]['com'], search_config):
                results_url.append((url, catalog[i]['threads'][j]['last_modified']))
                results_content.append(catalog[i]['threads'][j]['com'])
                for imgurl in pull4chImgs(url):
                    results_img.append(imgurl)
            
            if not 'last_replies' in catalog[i]['threads'][j]:
                continue
            for k in range(0, len(catalog[i]['threads'][j]['last_replies'])): #each comment on the OP
                r = catalog[i]['threads'][j]['last_replies'][k]
                if not 'com' in r:
                        continue
                
                # Use fuzzy matching for replies too
                if fuzzy_match(wod, r['com'], search_config):
                    results_url.append((url+"#p"+str(catalog[i]['threads'][j]['last_replies'][k]['no'])))
                    results_content.append(catalog[i]['threads'][j]['last_replies'][k]['com'])
                    #imgs were already retrieved from OP grab

#def processThread(thread):
    

def repliesSort(catalog):
    result = []
    for i in range(0, len(catalog)):
        for j in range(0, len(catalog[i]['threads'])):
            url = "https://boards.4channel.org/"+bod+"/thread/"+str(catalog[i]['threads'][j]['no']) 
            result.append((url, catalog[i]['threads'][j]['replies']))
        
    result.sort(key=lambda v: v[1])
    return result

def print_usage():
    print('This program will give you the URLs of all 4chan posts that contain the given search word, either on the entire site or on a select board.')
    print('It supports fuzzy search for typos and partial matches.')
    print('Usage: ./query.py <searchword> [board] [options]')
    print('Options:')
    print('  --fuzzy-ratio <0-100>    Set minimum fuzzy match ratio (default: 60)')
    print('  --exact-only             Disable fuzzy search, use exact matching only')
    print('  --strict                 Use stricter fuzzy matching (ratio: 80)')
    print('  --loose                  Use looser fuzzy matching (ratio: 40)')
    print('Examples:')
    print('  ./query.py "programming" g          # Search for "programming" on /g/')
    print('  ./query.py "programing" g --fuzzy   # Will also match "programming"')
    print('  ./query.py "linux" --strict         # Strict matching across all boards')

def parseargs():
    global v
    global wod
    global bod
    parser = argparse.ArgumentParser(description='search for current 4chan posts that contain some string')
    parser.add_argument('-v', '--verbose', action='store_true', help='verbose')    
    parser.add_argument('-z', '--fuzzy-ratio', action='store', help='(0-100) minimum fuzzy match ratio (default: 60)')
    parser.add_argument('--strict', action='store_true', default = False, help='use stricter fuzzy matching (ratio: 80)')
    parser.add_argument('--loose', action='store_true', default = False, help='Use looser fuzzy matching (ratio: 40)')
    parser.add_argument('--exact-only', action='store_true', default = False, help='Disable fuzzy search, use exact matching only')
    parser.add_argument('query', help='the search word')
    parser.add_argument('--board', '-b', default = '', help="Choose a board to limit your query to")
    args = parser.parse_args()

    config = FuzzySearchConfig()
    
    if args.fuzzy_ratio:
        config.min_ratio = args.fuzzy_ratio

    
    return args.query, args.board, config


#####################################################################

def main():
    # Parse command line arguments
    wod, bod, search_config = parseargs()

    results_url = [] #URLs of threads containing the keyword
    results_content = [] #text of all posts and comments containing the keyword
    results_img = [] #URLs of all images containing the keyword

    print(f'Fuzzy searching for "{wod}" on  "{bod}" (min ratio: {search_config.min_ratio})')


    if bod == '':
        print('searching all boards')
        repl_res = []
        for b in bods:
            print(f'Processing board /{b}/')
            try:
                res = requests.get("https://a.4cdn.org/"+b+"/catalog.json")
                if res and res.text != None:    
                    #get each thread from each page,
                    pages = json.loads(res.text) #each page has page # and threads array
                    threads = []
                    for p in pages:
                        for t in p['threads']:
                            processThread(t) #TODO https://github.com/seanpm2001/4Chan_4Chan-API/blob/master/pages/Endpoints_and_domains.md
                        
                    
                    repl_res.append(repliesSort(json.loads(res.text)))
                    processCatalog(json.loads(res.text), b, search_config)
                else:
                    print(f'Error getting response from API, board /{b}/.')
            except Exception as e:
                print(f'Error processing board /{b}/: {e}')
        repl_res.sort(key=lambda v: v[1])
        print(repl_res)
    else:
        print('searching board ' + bod)
        try:
            res = requests.get("https://a.4cdn.org/"+bod+"/catalog.json")
            print(repliesSort(json.loads(res.text)))
            processCatalog(json.loads(res.text), bod, search_config)
        except Exception as e:
            print(f'Error processing board /{bod}/: {e}')

    print(f'\nFound {len(results_url)} matches:')
    for url in results_url:
        print(url)

# Optional: Show some sample matches with their similarity scores
    if results_content and search_config.min_ratio < 100:
        print(f'\nSample fuzzy matches for "{wod}":')
        for i, content in enumerate(results_content[:5]):  # Show first 5 matches
            # Strip HTML and limit length for display
            clean_content = content.replace('<br>', ' ').replace('&gt;', '>').replace('&lt;', '<')
            if len(clean_content) > 100:
                clean_content = clean_content[:100] + "..."
            
            score = fuzz.partial_ratio(wod.lower(), content.lower())
            print(f'[{score}%] {clean_content}')

if __name__ == '__main__':
    main()