import os
import numpy as np
import pandas as pd

class pump:

    def demo():
        demostr = '''
import clubear as cb
#clubear.csv is generated by cb.manager.demo()
pm=cb.pump('clubear.csv') #start a new pump
pm.keep #check the head lists
cb.check(pm).stats() #check the for stats
pm.qlist=['age','height','weight','price','logsales']
cb.check(pm).stats() #check stats again
cb.check(pm).table() #check table
pm.subsize=10000 #check the subsize
df=pm.go #start to pump data
'''
        print(demostr)
    
    def __init__(self,pathfile):
        '''Very careful initial checking.'''
        if not isinstance(pathfile,str): print('pump: The pathfile must be a str.'); return
        if not os.path.exists(pathfile): print('pump: This file dose not exists!'); return
        if not os.path.isfile(pathfile): print('pump: This is not a file!'); return
        self.reader=open(pathfile,encoding='iso8859-1')
        self.keep=self.reader.readline().replace('\n','').split(',')
        if len(self.keep)==0: print('pump: Nothing found in the file!'); return 
        self.subsize=1000
        self.drop=[];
        self.qlist=self.keep
        
    def go(self):
        
        subsize=self.subsize
        '''Careful check all the inputs and pre-conditions'''                
        if not isinstance(subsize,int): print('pump.go: The subsize must be a int.'); return
        if not isinstance(self.keep,list): print('pump.go: The heads list must be a list.'); return
        if not isinstance(self.drop,list): print('pump.go: The drop list must be a list.'); return
        if not isinstance(self.qlist,list): print('pump.go: The qlist must be list.'); return
        
        self.reader.seek(0,0)
        Heads=self.reader.readline().replace('\n','').split(',')
        self.keep=[each for each in self.keep if each in Heads]
        self.keep=[each for each in self.keep if each not in self.drop]
        if len(self.keep)==0: print('pump.go: The heads list contains no valid heads.'); return
        select=[each for each in range(len(Heads)) if Heads[each] in self.keep]

        '''Find the file size'''
        self.reader.seek(0,2)
        file_size=self.reader.tell()
        
        '''start to generate the data'''
        oklines=0;data=[];self.reader.seek(0,0);
        ncolumns = len(Heads)
        while oklines<subsize:
            pos=int(np.random.uniform()*file_size)
            self.reader.seek(pos,0)
            skip_line=self.reader.readline()
            real_line=self.reader.readline()
            if real_line=='': continue
            line=real_line.replace('\n','').split(',')
            if len(line)!=ncolumns: continue
            line=[line[each] for each in select]
            data.append(line)
            oklines=oklines+1
        df=pd.DataFrame(data)
        df.columns=self.keep
        
        '''create numerical values'''
        df=df.astype('object')
        self.qlist=[each for each in self.qlist if each in df.columns]
        if len(self.qlist)>0:
            for each in self.qlist: 
                df[each]=pd.to_numeric(df[each],errors='coerce')
        return df