import json
import os
import sqlite3
from typing import List, Dict, Optional, Tuple
import logging
from datetime import datetime
import hashlib

logger = logging.getLogger(__name__)

class KnowledgeBase:
    def __init__(self, db_path: str = None):
        if db_path is None:
            db_path = os.path.expanduser('~/.jarvis_knowledge.db')
        self.db_path = db_path
        self._init_database()
    
    def _init_database(self):
        """Инициализация базы данных SQLite"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS knowledge (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                question_hash TEXT UNIQUE,
                original_question TEXT,
                cleaned_question TEXT,
                answer TEXT,
                usage_count INTEGER DEFAULT 1,
                success_rate REAL DEFAULT 1.0,
                last_used TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                tags TEXT,
                category TEXT
            )
        ''')
        
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS synonyms (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                question_hash TEXT,
                similar_question TEXT,
                similarity_score REAL,
                FOREIGN KEY (question_hash) REFERENCES knowledge (question_hash)
            )
        ''')
        
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_question_hash ON knowledge(question_hash)')
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_last_used ON knowledge(last_used)')
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_usage_count ON knowledge(usage_count)')
        
        conn.commit()
        conn.close()
    
    def _hash_question(self, question: str) -> str:
        """Создает хэш вопроса для быстрого поиска"""
        return hashlib.md5(question.strip().lower().encode('utf-8')).hexdigest()
    
    def find_similar(self, query: str, threshold: float = 0.7) -> Optional[str]:
        """Поиск похожих вопросов в базе знаний"""
        query_hash = self._hash_question(query)
        
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        # Прямой поиск по хэшу
        cursor.execute(
            'SELECT answer FROM knowledge WHERE question_hash = ?', 
            (query_hash,)
        )
        result = cursor.fetchone()
        
        if result:
            self._update_usage_stats(query_hash)
            conn.close()
            logger.info(f"🎯 Точное совпадение найдено в базе знаний")
            return result[0]
        
        # Поиск похожих вопросов
        similar = self._find_similar_questions(query, threshold)
        
        if similar:
            question_hash, answer, similarity = similar
            self._update_usage_stats(question_hash)
            self._add_synonym(question_hash, query, similarity)
            
            conn.close()
            logger.info(f"🔍 Найдено похожий вопрос (сходство: {similarity:.2f})")
            return answer
        
        conn.close()
        return None
    
    def _find_similar_questions(self, query: str, threshold: float) -> Optional[Tuple]:
        """Поиск похожих вопросов используя различные методы"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        # Метод 1: Поиск по ключевым словам
        keywords = self._extract_keywords(query)
        if keywords:
            cursor.execute(f'''
                SELECT question_hash, answer, cleaned_question 
                FROM knowledge 
                WHERE cleaned_question LIKE '%' || ? || '%'
                ORDER BY usage_count DESC, success_rate DESC
                LIMIT 5
            ''', (keywords[0],))
            
            results = cursor.fetchall()
            for question_hash, answer, stored_question in results:
                similarity = self._calculate_similarity(query, stored_question)
                if similarity >= threshold:
                    return (question_hash, answer, similarity)
        
        # Метод 2: Поиск по всем вопросам
        cursor.execute('''
            SELECT question_hash, answer, cleaned_question 
            FROM knowledge 
            ORDER BY usage_count DESC, success_rate DESC
            LIMIT 50
        ''')
        
        results = cursor.fetchall()
        best_match = None
        best_similarity = 0
        
        for question_hash, answer, stored_question in results:
            similarity = self._calculate_similarity(query, stored_question)
            if similarity > best_similarity and similarity >= threshold:
                best_similarity = similarity
                best_match = (question_hash, answer, similarity)
        
        conn.close()
        return best_match
    
    def _calculate_similarity(self, query1: str, query2: str) -> float:
        """Вычисляет схожесть между двумя вопросами"""
        words1 = set(query1.lower().split())
        words2 = set(query2.lower().split())
        
        if not words1 or not words2:
            return 0.0
        
        intersection = len(words1.intersection(words2))
        union = len(words1.union(words2))
        
        return intersection / union if union > 0 else 0.0
    
    def _extract_keywords(self, query: str) -> List[str]:
        """Извлекает ключевые слова из запроса"""
        stop_words = {'как', 'что', 'где', 'когда', 'почему', 'зачем', 'или', 'и', 'в', 'на', 'с', 'по', 'у', 'о'}
        words = query.lower().split()
        keywords = [word for word in words if word not in stop_words and len(word) > 2]
        return keywords[:5]
    
    def save_qa(self, cleaned_question: str, answer: str, original_question: str = None, 
                tags: List[str] = None, category: str = None):
        """Сохраняет вопрос-ответ в базу знаний"""
        question_hash = self._hash_question(cleaned_question)
        
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        try:
            cursor.execute('''
                INSERT OR REPLACE INTO knowledge 
                (question_hash, original_question, cleaned_question, answer, tags, category, last_used)
                VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
            ''', (
                question_hash,
                original_question or cleaned_question,
                cleaned_question,
                answer,
                json.dumps(tags) if tags else None,
                category
            ))
            
            conn.commit()
            logger.info(f"💾 Сохранен новый вопрос-ответ в базу знаний")
            
        except sqlite3.IntegrityError:
            cursor.execute('''
                UPDATE knowledge 
                SET usage_count = usage_count + 1, 
                    last_used = CURRENT_TIMESTAMP,
                    success_rate = success_rate + 0.1
                WHERE question_hash = ?
            ''', (question_hash,))
            conn.commit()
            logger.info(f"📊 Обновлена статистика использования для существующего вопроса")
        
        conn.close()
    
    def _update_usage_stats(self, question_hash: str):
        """Обновляет статистику использования"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cursor.execute('''
            UPDATE knowledge 
            SET usage_count = usage_count + 1, 
                last_used = CURRENT_TIMESTAMP
            WHERE question_hash = ?
        ''', (question_hash,))
        
        conn.commit()
        conn.close()
    
    def _add_synonym(self, question_hash: str, similar_question: str, similarity: float):
        """Добавляет синоним/похожий вопрос"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cursor.execute('''
            INSERT OR IGNORE INTO synonyms (question_hash, similar_question, similarity_score)
            VALUES (?, ?, ?)
        ''', (question_hash, similar_question, similarity))
        
        conn.commit()
        conn.close()
    
    def get_statistics(self) -> Dict:
        """Возвращает статистику базы знаний"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cursor.execute('SELECT COUNT(*) FROM knowledge')
        total_questions = cursor.fetchone()[0]
        
        cursor.execute('SELECT SUM(usage_count) FROM knowledge')
        total_usage = cursor.fetchone()[0] or 0
        
        cursor.execute('''
            SELECT COUNT(*) FROM knowledge 
            WHERE last_used > datetime('now', '-7 days')
        ''')
        recent_usage = cursor.fetchone()[0]
        
        cursor.execute('''
            SELECT cleaned_question, usage_count 
            FROM knowledge 
            ORDER BY usage_count DESC 
            LIMIT 5
        ''')
        top_questions = cursor.fetchall()
        
        conn.close()
        
        return {
            "total_questions": total_questions,
            "total_usage": total_usage,
            "recent_usage_7d": recent_usage,
            "top_questions": top_questions,
            "tokens_saved_estimate": total_usage * 50,
        }