# -*- coding:utf-8 -*-

import re
import redis
import datetime
import urllib
import urlparse
import time
import random
import socket
import json

try:
    from django.conf import settings
except ImportError:
    import config as settings

from bs4 import BeautifulSoup

from .utils import get_normal_url
from .mixin import openYandexMixin


class TimeoutError(Exception):
    pass


def getData(s):
    data = ''
    text = ''
    now = datetime.datetime.now()

    while True:
        data += s.recv(1024)
        if (datetime.datetime.now() - datetime.timedelta(seconds=60)) > now:
            raise TimeoutError

        if '###end###' in data:
            text = data.replace('###end###', '')
            break
    return BeautifulSoup(text)


class openYandex(openYandexMixin):

    def __init__(self, key):
        self.key = key
        self.rds = redis.Redis(**settings.ANTI_REDIS_CONF)
        self.redis_key = None
        self.address = None

    def setUp(self):
        self.port = None
        while not self.port:
            time.sleep(random.uniform(0, 1))
            port = settings.ANTI_PORTS[random.randint(0, len(settings.ANTI_PORTS) - 1)]
            server = settings.ANTI_SERVERS[random.randint(0, len(settings.ANTI_SERVERS) - 1)]
            redis_key = '%s:%d' % (server, port)
            data = self.rds.get(redis_key)
            if not data:
                self.rds.set(redis_key, 1)
                self.port = port
                self.redis_key = redis_key
                self.server = server
                return True
            time.sleep(random.uniform(1, 2))

    def get_soup(self, url, save=True, counter=0, normalize=False):
        try:
            self.setUp()

            self.url = url
            parse_url = urlparse.urlparse(url)
            self.hostname = parse_url.hostname
            self.query = parse_url.query

            if isinstance(self.query, unicode):
                self.query = self.query.encode('utf8')

            string = '###split###'.join((url, self.key))
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            s.settimeout(120)
            print self.server + ':' + str(self.port)
            s.connect((self.server, self.port))
            s.send(string)
            soup = getData(s)
            self.rds.delete(self.redis_key)
            if save:
                data = self.saveData(soup)
                if normalize:
                    return data
            return soup

        except (TimeoutError, socket.timeout) as exc:
            self.rds.delete(self.redis_key)
            print exc
            counter += 1
            if counter > 5:
                return BeautifulSoup('<p>%s</p>' % str(exc))
            return self.get_soup(url, save=True, counter=counter)

        except KeyboardInterrupt as e:
            self.rds.delete(self.redis_key)
            raise e

        except Exception as e:
            self.rds.delete(self.redis_key)
            raise e

    def formatData(self, soup):
        """ Формирование данных """
        data = []
        if 'yandex.ru' == self.hostname:

            blocks = soup.find_all(class_='serp-block')
            if blocks:
                blocks = [item for item in blocks
                          if len(item.attrs.get('class')) < 3 or 'serp-block_type_site' in item.attrs.get('class')]

                params = urlparse.parse_qs(self.query)
                key = params['text'][0]

                pos = int(params['p'][0]) if 'p' in params else 0
                pos = int(params['numdoc'][0])*pos if 'numdoc' in params else pos*10
                lr = int(params['lr'][0]) if 'lr' in params else 0

                for block in blocks:
                    for item in block:
                        tlink = item.find('a', {'class': 'b-link serp-url__link'})
                        if tlink:
                            pos += 1
                            host = get_normal_url(tlink['href'])
                            if 'yandex.ru' not in host and 'infected?' not in host:
                                url = item.find_all('a', {'class': 'b-link serp-url__link'})[-1].get('href')
                                data.append((pos, key.decode('utf8'), host, url, 0, lr))
            if len(blocks) == 0:
                if unicode(soup).find(u'По вашему запросу ничего не нашлось') == -1:
                    self.get_soup(self.url)

        if 'www.google.ru' == self.hostname:
            params = urlparse.parse_qs(self.query)
            key = params['q'][0]
            pos = int(params['start'][0]) if 'start' in params else 0

            for item in soup.findAll('li', {'class': 'g'}):
                pos += 1
                if item.find('cite'):
                    res = re.search(r'(https://)?(www.)?([^/\ ]+)', item.find('cite').text)
                    if res:
                        host = get_normal_url(res.group())
                        data.append((pos, key.decode('utf8'), host, None, 1, None))

        return data

    def saveData(self, soup):
        """ Сохранение в postgre """
        data = self.formatData(soup)
        self.rds.set('page:' + self.url, json.dumps(data))
        return data


openGoogle = openYandex
