# (c) cavaliba.com - data - pipeline.py


from datetime import datetime
import re
import yaml
import uuid

from app_data.data import Instance 
from app_data.data import get_instances






# ---------------------------------------------------------------------
# Pipelines
# ---------------------------------------------------------------------

def list_pipelines(is_enabled=None):

    pipelines = get_instances(classname = "_pipeline", is_enabled=is_enabled)
    return pipelines



def get_pipeline(pipeline):

    pipeline_data = None

    instance = Instance.load_from_names(classname="_pipeline", keyname=pipeline)
    if not instance:
        return

    content = {}
    try:
        content = instance.fields["content"].value[0]
    except Exception as e:
        print(f"ERR - can't access pipeline : {e}")
        return
    
    if content:
        try:
            pipeline_data = yaml.safe_load(content)
        except Exception as e:
            print(f"ERR - invalid pipeline content ({pipeline}): {e}")
            return

    return pipeline_data


# ---------------------------------------------------------------------
# Pipelines tasks
# ---------------------------------------------------------------------


def task_discard(datadict, taskopt):
    return "discard"


def task_field_noop(datadict, taskopt):
    return


def task_field_toint(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = int(datadict[v])
        except:
            datadict[v] = None         


def task_field_tofloat(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = float(datadict[v])
        except:
            datadict[v] = None         

def task_field_tostring(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = str(datadict[v])
        except:
            datadict[v] = None  


def task_field_nospace(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = ''.join(datadict[v].split())
        except:
            pass                    


def task_field_set(datadict,taskopt):              
    # field_set:  "fieldname", "value"
    # add / overwrite
    try:
        (v1,v2) = taskopt
        datadict[v1] = v2
    except:
        pass


def task_field_copy(datadict,taskopt):
    # field_copy: "field1", "field2"
    # copy 1 to 2
    try:
        (v1,v2) = taskopt
        datadict[v2] = datadict[v1]
    except:
        pass


def task_field_rename(datadict,taskopt):
    # field_rename: ["field1", "field2"]
    # rename 1 to 2
    try:
        (v1,v2) = taskopt
        datadict[v2] = datadict[v1]
        datadict.pop(v1)
    except:
        pass

def task_field_delete(datadict,taskopt):
    for v in taskopt:
        try:
            datadict.pop(v, None)
        except:
            pass


def task_field_keep(datadict,taskopt):
    purge=[]
    try:
        for f in datadict:
            if f in ["classname", "keyname"]:
                continue
            if f not in taskopt:
                purge.append(f)
        for f in purge:
            datadict.pop(f)
    except:
        pass
    

def task_field_lower(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = datadict[v].lower()
        except:
            pass


def task_field_upper(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = datadict[v].upper()
        except:
            pass


def task_field_date_now(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = str ( datetime.today().strftime('%Y-%m-%d') )
        except:
            pass


def task_field_time_now(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = str ( datetime.today().strftime('%H:%M:%S') )
        except:
            pass


def task_field_datetime_now(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = str ( datetime.today().strftime('%Y-%m-%d %H:%M:%S') )
        except:
            pass

def task_field_regexp_sub(datadict,taskopt):
    try:
        (field,pattern,replace) = taskopt
        datadict[field] = re.sub(pattern, replace, datadict[field])
    except:
        pass
        
def task_field_uuid(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = str(uuid.uuid4())
        except:
            pass

def task_field_merge(datadict,taskopt):
    # field_merge : ["field1", "field2", "field3"]
    # rename 1 + 2 => 3
    try:
        (v1,v2,v3) = taskopt
        datadict[v3] = datadict[v1] + datadict[v2]
    except:
        pass

def task_field_append(datadict,taskopt):
    #  field, value => "field"+"value"
    try:
        (v1,v2) = taskopt
        datadict[v1] = f"{datadict[v1]}{v2}"
    except:
        pass
def task_field_prepend(datadict,taskopt):
    #  field, value => "field"+"value"
    try:
        (v1,v2) = taskopt
        datadict[v1] = f"{v2}{datadict[v1]}"
    except:
        pass


## Conditions



def condition_field_match(datadict, taskopt):
    # fieldname, pattern
    try:
        (field,pattern) = taskopt
        x = re.search(pattern, datadict[field])
        if x:
            return True
    except:
        pass
    return False

# ---------------------------------------------------------------------
# Pipeline MAP
# ---------------------------------------------------------------------


PIPELINE_TASK = {

    "discard": task_discard,

    "field_noop": task_field_noop,
    "field_toint": task_field_toint,
    "field_tofloat": task_field_tofloat,
    "field_nospace": task_field_nospace,
    "field_set": task_field_set,
    "field_copy": task_field_copy,
    "field_rename": task_field_rename,
    "field_delete": task_field_delete,
    "field_keep": task_field_keep,
    "field_lower": task_field_lower,
    "field_upper": task_field_upper,
    "field_date_now": task_field_date_now,
    "field_datetime_now": task_field_datetime_now,
    "field_time_now": task_field_time_now,
    "field_regexp_sub": task_field_regexp_sub,
    "field_uuid": task_field_uuid,
    "field_merge": task_field_merge,
    "field_tostring": task_field_tostring,
    "field_append": task_field_append,
    "field_prepend": task_field_prepend,

    # condition checkers
    "field_match": condition_field_match,
    }




def check_condition(datadict, opts):
    # taskopt = task_condition, opt1, opt2, ...
    try:
        operator = opts[0]
        taskopt = opts[1:]
    except:
        return False

    cfunc = PIPELINE_TASK.get(operator ,None)
    if not cfunc:
        return False
    
    r = cfunc(datadict,taskopt)
    
    return r



def apply_pipeline(pipeline=None, datalist=None):
    '''
    pipeline: keyname of _pipeline instance
    datalist is a list of dict [ {}, {} , ]
    '''

    if not pipeline:
        return datalist

    if not datalist:
        return

    if not type(datalist) is list:
        return

    pipeline_data = get_pipeline(pipeline)

    if not pipeline_data:       
        print(f"No pipeline data for {pipeline}")
        return datalist

    # tasks ?
    if "tasks" not in pipeline_data:
        return datalist


    keep_instance = []

    for datadict in datalist:

        discard = False
        condition_truelist = []

        # classname:keyname: => datadict{}
        for task in pipeline_data["tasks"]:        
            # task is :
            # - TASKNAME: ["CONDITION_NAME", "opt1", "opt2", ...]      
            # - set_condition: [CONDITION_NAME, operator, opt1, opt2, ...]

            # must be dict then
            if not type(task) is dict:
                continue


            for taskname, opts in task.items():

                condition = ""

                # legacy / to be deprecated
                if type(opts) is str:
                    condition = ""
                    opts = [opts]


                try:
                    condition = opts[0]
                    taskopt = opts[1:]
                except:
                    continue 

                # special task = set_condition ?
                if taskname == "set_condition":
                    r =  check_condition(datadict,taskopt)
                    if r: 
                        condition_truelist .append(condition)
                    continue

                # check condition first
                if len(condition) > 0:
                    # standard
                    if condition[0] != '!':
                        if condition not in condition_truelist:
                            continue
                    # negate '!'
                    else:
                        if len(condition)>1:
                            if condition[1:] in condition_truelist:
                                continue
                
                # apply operator
                tfunc = PIPELINE_TASK.get(taskname,None)
                if not tfunc:
                    continue
                r = tfunc(datadict,taskopt)
                if r == "discard":
                    discard = True


        if discard:
            continue

        keep_instance.append(datadict)

    return keep_instance

