import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

import unittest
from pucktrick.outliers import *
from pucktrick.utils import *
import pandas as pd
import numpy as np
from pandas.testing import assert_frame_equal

class Test_outlies(unittest.TestCase):
  
  def test_outliers(self):
        percentage=0.5
        num_rows = 20
        selection_criteria = "f4=='apple'"
        #selection_criteria='all' 
        fake_df=create_fake_table(num_rows)
        print("original")
        print(fake_df)
        column=['f4']
        strategy =  {'affected_features': column, 'selection_criteria': selection_criteria, 'percentage': percentage, 'mode': 'new', 'perturbate_data': {'distribution': 'random', 'param': None, 'value': None, 'condition_logic': None}}
        error,noise_df=outlier(fake_df,strategy)
        
        print("prima modifica")
        print(noise_df)
        dif = fake_df[column] != noise_df[column]
        diff_number = dif.sum()
        percentage=0.7
        strategy =  {'affected_features': column, 'selection_criteria':selection_criteria, 'percentage':percentage, 'mode': 'extended', 'perturbate_data': {'distribution': 'random', 'param': None, 'value': None, 'condition_logic': None}}
        error,noise1_df=outlier(noise_df,strategy,fake_df)
        print("error "+str(error))
        if error==0:
            noise_df=noise1_df
    
        print("seconda modifica")
        print(noise_df)

        if selection_criteria == "all" or selection_criteria is None:
            dif = fake_df[column] != noise_df[column]
            diff_number = dif.sum().sum()/len(column)
            diff_p=round(diff_number/num_rows,1)
        else:
            selection_criteria,isAffected=affectedSelected(column,selection_criteria)
            if isAffected==1:
                selection_criteria="puck_f4=='apple'"
                df_pref2=fake_df[column].add_prefix("puck_")
                fake_df=pd.concat([fake_df,df_pref2],axis=1)
                noise_df=pd.concat([noise_df,df_pref2],axis=1)
            dif = fake_df.query(selection_criteria)[column] != noise_df.query(selection_criteria)[column]
            diff_number = dif.sum().sum()/len(column)
            diff_p=round(diff_number/len(fake_df.query(selection_criteria)),1)
        print("diff_p", diff_p)
        print("percentage", percentage)
        self.assertLessEqual(abs(diff_p-percentage),0.15)
  
    


if __name__ == "__main__":
    unittest.main()
