from sklearn import preprocessing
import numpy as np

# import features


def import_train_set(train_file_name="AllResults.txt"):
    featurelist = []

    with open(train_file_name, "r") as infile:
        for line in infile:
            featurelist.append(line.strip())

    # so now, featurelist[1] has names of things in form
    #   'Area, MajorAxisLength, ... Class'
    FeatureNames = [x.strip() for x in featurelist[0].split(",")]
    # FeatureNames has form ['Area','MajorAxisLength',....'Class']
    #   which is what I wanted

    AllData = [
        [float(x.strip()) for x in featurelist[i].split(",")]
        for i in range(1, len(featurelist))
    ]

    # Data is in form [[1,2,3,....0.0],[3,3,1,...0.0],...[5,3,1,...0.0]],
    #   the last input is the class.

    classes = [int(i[-1]) for i in AllData]

    # classes contains the class number from which the data is from

    # want to delete target from AllData.

    X = [i[0:-1] for i in AllData]

    # X has form similar to Data. So when we reshape, we want the output to be
    # X = array([[0,1,2,...]
    #            [1,2,3,...]])

    Data = np.asarray(X, order="F")

    # this has the right form, is uses fortran column-major style memory representation vs row major C-style
    # the notation is scientific, where iris data set looks like a float. CHECKED: Both are type numpy.float64
    # both have same indexing calls, so I think we're in business.

    # looks exactly correct, or at least like iris data set target.
    Target = np.asarray(classes)
    return (Data, Target)


########################################################################
# for training purposes, the number of samples in data must be divisible by 256


def Trim_Train_Data(Data, Target, max_length=None, balance=False):
    ####
    # Inputs: Data is numpy array with N samples (rows) and M measures (cols)
    # Target is 1xN samples with ground truth
    # max_length defines maximum length of training data. Should be divisible by 256, might want to code that...
    # balance is boolean if you wish to have same number of samples in each class.
    print("Class lengths are = ", [sum(Target == i) for i in set(Target)])
    if not balance:
        if (
            np.shape(Data)[0] / 256 != np.round(np.shape(Data)[0] / 256)
            or max_length < np.shape(Data)[0]
        ):
            print("Trimming data for training purposes...")
            if not max_length:
                max_length = 256 * (np.floor(np.shape(Data)[0] / 256))
            else:
                if max_length / 256 != np.round(max_length / 256):
                    # must make it divisible by 256
                    max_length = int(np.floor(max_length / 256) * 256)
                    print(
                        "Your given max_length was not divisible by 256. New max length is = %d"
                        % max_length
                    )
            # determine percentages of each class.
            cs = np.unique(Target)
            ps = np.zeros(shape=(1, len(cs)))
            ps = ps[0]
            rows_to_take = np.array([])
            for i in range(len(cs)):
                ps[i] = np.sum(Target == cs[i]) / len(Target)
                goodrows = np.where(Target == cs[i])[0]
                rows_to_take = np.append(
                    rows_to_take, goodrows[0 : int(np.floor(ps[i] * max_length))]
                )

            ad_row = 0
            class_ind = 0
            while len(rows_to_take) != max_length:
                # need to supplament.
                goodrows = np.where(Target == cs[class_ind])[0]
                rows_to_take = np.append(
                    rows_to_take,
                    goodrows[int(np.floor(ps[class_ind] * max_length)) + 1 + ad_row],
                )
                class_ind = class_ind + 1
                if class_ind > len(cs):
                    class_ind = 0
                    ad_row = ad_row + 1
            rows_to_take = rows_to_take.astype(int)
            X_train_scaled = Data[rows_to_take, :]
            Y_train = Target[rows_to_take]
            print("Complete")
        else:
            X_train_scaled = Data
            Y_train = Target
        print("Final training length = %d" % X_train_scaled.shape[0])
        print(
            "Class lengths after trimming are = ",
            [sum(Y_train == i) for i in set(Y_train)],
        )
        return (X_train_scaled, Y_train)
    else:
        # determine which has the minimum number of cases.
        cs = np.unique(Target)
        lens = np.zeros((len(cs)))
        for i in range(len(cs)):
            lens[i] = sum(Target == cs[i])

        # randomly sample from each class now that number of samples.
        min_len = int(min(lens))
        rows_to_take = np.array([])
        for i in range(len(cs)):
            possiblerows = np.where(Target == cs[i])[0]
            # now sample without replacement.
            rows_to_take = np.append(
                rows_to_take, np.random.choice(possiblerows, min_len, replace=False)
            )
        if len(rows_to_take) / 256 != np.round(
            len(rows_to_take) / 256
        ) or max_length < len(rows_to_take):
            # trim until correct size.
            if not max_length:
                max_length = 256 * (np.floor(np.shape(Data)[0] / 256))
            else:
                if max_length / 256 != np.round(max_length / 256):
                    # must make it divisible by 256
                    max_length = int(np.floor(max_length / 256) * 256)
                    print(
                        "Your given max_length was not divisible by 256. New max length is = %d"
                        % max_length
                    )
            # use min_len now to delete entries.
            timearound = 0
            pheno = len(cs)  # start at the end
            while len(rows_to_take) > max_length:
                # entry to delete is
                # first (min_len-round)*range(1,len(np.unique(Target))+1) -1
                # print("%d entry delete" % (((min_len-timearound)*pheno) - 1))
                rows_to_take = np.delete(
                    rows_to_take, ((min_len - timearound) * pheno) - 1
                )
                pheno = pheno - 1
                if pheno < 1:
                    pheno = len(cs)
                    timearound = timearound + 1
        rows_to_take = rows_to_take.astype(int)
        X_train_scaled = Data[rows_to_take, :]
        Y_train = Target[rows_to_take]
        print("Final training length = %d" % X_train_scaled.shape[0])
        print(
            "Class lengths after trimming are = ",
            [sum(Y_train == i) for i in set(Y_train)],
        )
        return (X_train_scaled, Y_train)


#############################REMOVE OUTLIER DATA########################
# How? Do this after scaling the data, then compute a z-score. We'll check the data after that.


def Remove_Outliers(Data, Target):
    # for each class, detect outliers.
    # we'll begin by using z-scoring. This assumes data is described by a Guassian
    # which is why it is vital to do this AFTER scaling the data.
    # I plotted the data, it is absolutely not Gaussian.
    # I tried DBSCAN machine learning algorithm but it is really not helpful.
    # However, the data IS perhaps Gaussian after embedding. We can clean the signal AFTER by sending in
    # the emebedded data in 1, 2, or 3 dimensions and removing points that are beyond a standard deviation.
    # Data is TSNE embedded.
    zscores = np.zeros(np.shape(Data))
    for pheno in np.unique(Target):
        # find rows where phenotype is correct.
        prows = np.where(Target == pheno)[0]
        for dim in range(np.shape(Data)[1]):
            # calculate the mean.
            m = np.mean(Data[prows, dim])
            # calculate std.
            s = np.std(Data[prows, dim])
            for example in range(len(prows)):
                zscores[prows[example], dim] = (Data[prows[example], dim] - m) / s

    # now you calculated the zscores for each element. Apply a threshold
    # good "thumb-rule" thresholds can be: 2.5, 3, 3.5, or more.
    zthresh = 2.5

    zscores = zscores > 2.5

    badrows = [i for i in range(np.shape(zscores)[0]) if zscores[i].any()]

    Data = np.delete(Data, badrows, axis=0)
    Target = np.delete(Target, badrows, axis=0)

    return (Data, Target)


##############################POST AUGMENTATION#########################
def Augment_Size(Data, Target, max_copies=0, s=0.2, balance=False, augment_class=None):
    max_copies = int(max_copies)
    # augment only the copies made by scaling the unit based measures.
    # Measures should go: Area, MjrAxis, MnrAxis, Ecc,ConA,EqD,Sol,Ext,Per,conPer,fiber_length,InscribeR,bleb_len

    # first, determine if we desire class balance.
    if balance:
        # determine which class has maximum number of samples.
        cs = np.unique(Target)
        vals = [sum(Target == cs[i]) for i in cs]
        print(
            "Class %d has max number of samples, increasing other classes via size augmentation"
            % np.argmax(vals)
        )
        for i in range(len(cs)):
            if i != np.argmax(vals):
                # determine how many samples need to be made.
                to_make = int(vals[np.argmax(vals)] - vals[i])
                # randomly sample rows from Data with the correct phenotype cs[i]
                possible_rows = np.where(Target == cs[i])[0]
                # sample to_make numbers from possible_rows.
                sampled_rows = np.random.choice(possible_rows, to_make, replace=True)
                newrows = Data[sampled_rows, :]
                size_vary = s * np.random.rand(1, to_make)[0]
                # vary size.
                for v in range(to_make):
                    if np.random.rand() < 0.5:
                        newrows[v, 0] = (
                            newrows[v, 0] + newrows[v, 0] * size_vary[v] * size_vary[v]
                        )
                        newrows[v, 1] = newrows[v, 1] + newrows[v, 1] * size_vary[v]
                        newrows[v, 2] = newrows[v, 2] + newrows[v, 2] * size_vary[v]
                        newrows[v, 4] = (
                            newrows[v, 4] + newrows[v, 4] * size_vary[v] * size_vary[v]
                        )
                        newrows[v, 5] = newrows[v, 5] + newrows[v, 5] * size_vary[v]
                        newrows[v, 7] = newrows[v, 7] + newrows[v, 7] * size_vary[v]
                        newrows[v, 8] = newrows[v, 8] + newrows[v, 8] * size_vary[v]
                        newrows[v, 9] = newrows[v, 9] + newrows[v, 9] * size_vary[v]
                        newrows[v, 10] = newrows[v, 10] + newrows[v, 10] * size_vary[v]
                        newrows[v, 11] = newrows[v, 11] + newrows[v, 11] * size_vary[v]
                    else:
                        newrows[v, 0] = (
                            newrows[v, 0] - newrows[v, 0] * size_vary[v] * size_vary[v]
                        )
                        newrows[v, 1] = newrows[v, 1] - newrows[v, 1] * size_vary[v]
                        newrows[v, 2] = newrows[v, 2] - newrows[v, 2] * size_vary[v]
                        newrows[v, 4] = (
                            newrows[v, 4] - newrows[v, 4] * size_vary[v] * size_vary[v]
                        )
                        newrows[v, 5] = newrows[v, 5] - newrows[v, 5] * size_vary[v]
                        newrows[v, 7] = newrows[v, 7] - newrows[v, 7] * size_vary[v]
                        newrows[v, 8] = newrows[v, 8] - newrows[v, 8] * size_vary[v]
                        newrows[v, 9] = newrows[v, 9] - newrows[v, 9] * size_vary[v]
                        newrows[v, 10] = newrows[v, 10] - newrows[v, 10] * size_vary[v]
                        newrows[v, 11] = newrows[v, 11] - newrows[v, 11] * size_vary[v]
            Data = np.concatenate((Data, newrows), axis=0)
            yadd = np.ones(to_make) * cs[i]
            Target = np.concatenate((Target, yadd.astype(int)), axis=0)

        Data = Data[np.argsort(Target), :]
        Target = Target[np.argsort(Target)]

    if augment_class is None:
        if max_copies > 0:
            print(
                "Augmenting each class with additional %d samples via size augmentation"
                % max_copies
            )
            cs = np.unique(Target)
            for i in range(len(cs)):
                # generate n = max_copies of Data.
                possible_rows = np.where(Target == cs[i])[0]
                # sample to_make numbers from possible_rows.
                sampled_rows = np.random.choice(possible_rows, max_copies, replace=True)
                newrows = Data[sampled_rows, :]
                size_vary = s * np.random.rand(1, max_copies)[0]
                # vary size.
                for v in range(max_copies):
                    if np.random.rand() < 0.5:
                        newrows[v, 0] = (
                            newrows[v, 0] + newrows[v, 0] * size_vary[v] * size_vary[v]
                        )
                        newrows[v, 1] = newrows[v, 1] + newrows[v, 1] * size_vary[v]
                        newrows[v, 2] = newrows[v, 2] + newrows[v, 2] * size_vary[v]
                        newrows[v, 4] = (
                            newrows[v, 4] + newrows[v, 4] * size_vary[v] * size_vary[v]
                        )
                        newrows[v, 5] = newrows[v, 5] + newrows[v, 5] * size_vary[v]
                        newrows[v, 7] = newrows[v, 7] + newrows[v, 7] * size_vary[v]
                        newrows[v, 8] = newrows[v, 8] + newrows[v, 8] * size_vary[v]
                        newrows[v, 9] = newrows[v, 9] + newrows[v, 9] * size_vary[v]
                        newrows[v, 10] = newrows[v, 10] + newrows[v, 10] * size_vary[v]
                        newrows[v, 11] = newrows[v, 11] + newrows[v, 11] * size_vary[v]
                    else:
                        newrows[v, 0] = (
                            newrows[v, 0] - newrows[v, 0] * size_vary[v] * size_vary[v]
                        )
                        newrows[v, 1] = newrows[v, 1] - newrows[v, 1] * size_vary[v]
                        newrows[v, 2] = newrows[v, 2] - newrows[v, 2] * size_vary[v]
                        newrows[v, 4] = (
                            newrows[v, 4] - newrows[v, 4] * size_vary[v] * size_vary[v]
                        )
                        newrows[v, 5] = newrows[v, 5] - newrows[v, 5] * size_vary[v]
                        newrows[v, 7] = newrows[v, 7] - newrows[v, 7] * size_vary[v]
                        newrows[v, 8] = newrows[v, 8] - newrows[v, 8] * size_vary[v]
                        newrows[v, 9] = newrows[v, 9] - newrows[v, 9] * size_vary[v]
                        newrows[v, 10] = newrows[v, 10] - newrows[v, 10] * size_vary[v]
                        newrows[v, 11] = newrows[v, 11] - newrows[v, 11] * size_vary[v]
                Data = np.concatenate((Data, newrows), axis=0)
                yadd = np.ones(max_copies) * cs[i]
                Target = np.concatenate((Target, yadd.astype(int)), axis=0)

            Data = Data[np.argsort(Target), :]
            Target = Target[np.argsort(Target)]

    else:
        augment_class = int(augment_class)
        if max_copies > 0:
            print(
                "Augmenting Class = %d with additional %d samples via size augmentation"
                % (augment_class, max_copies)
            )
            # generate n = max_copies of Data.
            possible_rows = np.where(Target == augment_class)[0]
            # sample to_make numbers from possible_rows.
            sampled_rows = np.random.choice(possible_rows, max_copies, replace=True)
            newrows = Data[sampled_rows, :]
            size_vary = s * np.random.rand(1, max_copies)[0]
            # vary size.
            for v in range(max_copies):
                if np.random.rand() < 0.5:
                    newrows[v, 0] = (
                        newrows[v, 0] + newrows[v, 0] * size_vary[v] * size_vary[v]
                    )
                    newrows[v, 1] = newrows[v, 1] + newrows[v, 1] * size_vary[v]
                    newrows[v, 2] = newrows[v, 2] + newrows[v, 2] * size_vary[v]
                    newrows[v, 4] = (
                        newrows[v, 4] + newrows[v, 4] * size_vary[v] * size_vary[v]
                    )
                    newrows[v, 5] = newrows[v, 5] + newrows[v, 5] * size_vary[v]
                    newrows[v, 7] = newrows[v, 7] + newrows[v, 7] * size_vary[v]
                    newrows[v, 8] = newrows[v, 8] + newrows[v, 8] * size_vary[v]
                    newrows[v, 9] = newrows[v, 9] + newrows[v, 9] * size_vary[v]
                    newrows[v, 10] = newrows[v, 10] + newrows[v, 10] * size_vary[v]
                    newrows[v, 11] = newrows[v, 11] + newrows[v, 11] * size_vary[v]
                else:
                    newrows[v, 0] = (
                        newrows[v, 0] - newrows[v, 0] * size_vary[v] * size_vary[v]
                    )
                    newrows[v, 1] = newrows[v, 1] - newrows[v, 1] * size_vary[v]
                    newrows[v, 2] = newrows[v, 2] - newrows[v, 2] * size_vary[v]
                    newrows[v, 4] = (
                        newrows[v, 4] - newrows[v, 4] * size_vary[v] * size_vary[v]
                    )
                    newrows[v, 5] = newrows[v, 5] - newrows[v, 5] * size_vary[v]
                    newrows[v, 7] = newrows[v, 7] - newrows[v, 7] * size_vary[v]
                    newrows[v, 8] = newrows[v, 8] - newrows[v, 8] * size_vary[v]
                    newrows[v, 9] = newrows[v, 9] - newrows[v, 9] * size_vary[v]
                    newrows[v, 10] = newrows[v, 10] - newrows[v, 10] * size_vary[v]
                    newrows[v, 11] = newrows[v, 11] - newrows[v, 11] * size_vary[v]
            Data = np.concatenate((Data, newrows), axis=0)
            yadd = np.ones(max_copies) * augment_class
            Target = np.concatenate((Target, yadd.astype(int)), axis=0)

            Data = Data[np.argsort(Target), :]
            Target = Target[np.argsort(Target)]

    return (Data, Target)


########################################################################
########################################################################
####### IMPORT THE DEV SET #####
########################################################################
########################################################################
def import_dev_set(dev_file_name="DevResults.txt"):
    print("Importing the dev set...")

    # import features
    featurelist = []

    with open(dev_file_name, "r") as infile:
        for line in infile:
            featurelist.append(line.strip())

    # so now, featurelist[1] has names of things in form 'Area, MajorAxisLength, ... Class'
    FeatureNames = [x.strip() for x in featurelist[0].split(",")]
    # FeatureNames has form ['Area','MajorAxisLength',....'Class'] which is what I wanted

    DevData = [
        [float(x.strip()) for x in featurelist[i].split(",")]
        for i in range(1, len(featurelist))
    ]

    # Data is in form [[1,2,3,....0.0],[3,3,1,...0.0],...[5,3,1,...0.0]], the last input is the class.

    Devclasses = [int(i[-1]) for i in DevData]

    # classes contains the class number from which the data is from

    # want to delete target from AllData.

    DevX = [i[0:-1] for i in DevData]

    # X has form similar to Data. So when we reshape, we want the output to be
    # X = array([[0,1,2,...]
    #            [1,2,3,...]])

    X_dev = np.asarray(DevX, order="F")

    # add aspect ratio as last column of data
    AR = []
    for i in range(len(X_dev)):
        AR.append(X_dev[i, 1] / X_dev[i, 2])

    AR = np.asarray(AR)

    AR = AR.reshape((len(AR), 1))

    X_dev = np.append(X_dev, AR, 1)  # concatenates arrays appropriately.

    # add form factor as last column of data
    # P^2/Area
    FF = []
    for i in range(len(X_dev)):
        FF.append(X_dev[i, 8] * X_dev[i, 8] / X_dev[i, 0])
    FF = np.asarray(FF)
    FF = FF.reshape((len(FF), 1))
    X_dev = np.append(X_dev, FF, 1)

    # this has the right form, is uses fortran column-major style memory representation vs row major C-style
    # the notation is scientific, where iris data set looks like a float. CHECKED: Both are type numpy.float64
    # both have same indexing calls, so I think we're in business.

    # looks exactly correct, or at least like iris data set target.
    y_dev = np.asarray(Devclasses)

    return (X_dev, y_dev, FeatureNames)


########################################################################
#########DATA IS IN THE SAME FORM AS IS FOUND IN IRIS DATASET###########
########################################################################
# Target = Target classes (0-4) for training and validation (type, numpy.int64, array)
# Data = Data for training and validation to be split. (type, numpy.float64, array)
# FeatureNames = Feature names for each column of data. (type, 'str', python list)
########################################################################
# print "Data is now in the same form as that found in Iris Dataset"
# print "Splitting the training dataset into train/val"


def apply_normalization(X_train, max_norm=False, l1_norm=False, l2_norm=False):
    ########################################################
    if max_norm:
        print("Normalizing data using l1_norm")
        X_train = X_train / np.max(np.abs(X_train), 0)[None, :]
    if l1_norm:
        print("Normalizing data using l1_norm")
        X_train = X_train / np.sum(X_train, 0)[None, :]
    if l2_norm:
        print("Normalizing data using l1_norm")
        X_train = X_train / np.sqrt(np.sum(X_train * X_train, 0))[None, :]

    return X_train


########################################################################


def preprocess_train_data(X_train, d=2):

    ############### SPLITTING THE DATASET ##################
    # First split the dataset so it is as if we only had a training set then a eval set.
    # X_train, X_test, y_train, y_test = train_test_split(Data, Target, test_size = .3)#.25)#, random_state =
    # default has shuffle = True. test_size sets the proportion of the data set to include in the test, here 25%.
    ########################################################
    if d > 1:
        print("Increasing dimensionality of dataset using cross terms")
    #################INCREASING FEATURES####################
    poly = preprocessing.PolynomialFeatures(degree=d, interaction_only=True)
    # IN SOME MODELS with 2 polynomial features, we are getting 90% exactly. In some polynomial 3 models,
    # we are getting 90.83%, which is exactly even with deep learning models.

    X_train = poly.fit_transform(X_train)
    # target_feature_names = ['x'.join(['{}^{}'.format(pair[0],pair[1]) for pair in tuple if pair[1]!=0]) for tuple in [zip(FeatureNames,p) for p in poly.powers_]]
    # poly=preprocessing.PolynomialFeatures(degree = 2, interaction_only = True)
    # X_test = poly.fit_transform(X_test)
    # poly=preprocessing.PolynomialFeatures(degree = 2, interaction_only = True)
    # X_dev = poly.fit_transform(X_dev)

    ########################################################

    print("Scaling the data")
    ################# SCALE THE DATA #######################
    # Scale the data. Each attribute in the dataset must be independently scaled, that is
    # 0 mean, and unit variance. Doing this returns the z-scores of the data
    # Z = (x - mu) / sigma

    # , QuantileTransformer(output_distribution='normal')
    scaler = preprocessing.RobustScaler().fit(X_train)
    # preprocessing.StandardScaler().fit(X_train) #IMPORTANT NOTE: We are scaling based only on training data!!!!

    X_train_scaled = scaler.fit_transform(X_train)

    # X_test_scaled = scaler.transform(X_test) # will be used later to evaluate the performance.

    # X_dev_scaled = scaler.transform(X_dev)

    ##########################################################

    return (X_train_scaled, scaler)  # , target_feature_names)


def preprocess_test_data(X_dev, scaler, d=2):
    ############### SPLITTING THE DATASET ##################
    # First split the dataset so it is as if we only had a training set then a eval set.
    # X_train, X_test, y_train, y_test = train_test_split(Data, Target, test_size = .3)#.25)#, random_state =
    # default has shuffle = True. test_size sets the proportion of the data set to include in the test, here 25%.
    ########################################################

    print("Increasing dimensionality of dataset using cross terms")
    #################INCREASING FEATURES####################
    poly = preprocessing.PolynomialFeatures(degree=d, interaction_only=True)
    # IN SOME MODELS with 2 polynomial features, we are getting 90% exactly. In some polynomial 3 models,
    # we are getting 90.83%, which is exactly even with deep learning models.

    # X_train = poly.fit_transform(X_train)
    # target_feature_names = ['x'.join(['{}^{}'.format(pair[0],pair[1]) for pair in tuple if pair[1]!=0]) for tuple in [zip(FeatureNames,p) for p in poly.powers_]]
    # poly=preprocessing.PolynomialFeatures(degree = 2, interaction_only = True)
    # X_test = poly.fit_transform(X_test)
    # poly=preprocessing.PolynomialFeatures(degree = 2, interaction_only = True)
    X_dev = poly.fit_transform(X_dev)

    ########################################################

    print("Scaling the data")
    ################# SCALE THE DATA #######################
    # Scale the data. Each attribute in the dataset must be independently scaled, that is
    # 0 mean, and unit variance. Doing this returns the z-scores of the data
    # Z = (x - mu) / sigma

    # scaler = preprocessing.StandardScaler().fit(X_train) #IMPORTANT NOTE: We are scaling based only on training data!!!!

    # X_train_scaled = scaler.transform(X_train)

    # X_test_scaled = scaler.transform(X_test) # will be used later to evaluate the performance.

    X_dev_scaled = scaler.transform(X_dev)

    ##########################################################

    return X_dev_scaled


def Add_Measures(
    Data,
    FeatureNames=None,
    add_AR=True,
    add_FF=True,
    add_convexity=True,
    add_curl_old=True,
    add_curl=True,
    add_sphericity=True,
    add_InscribedArea=True,
    add_BlebRel=True,
):
    ############### EXPANDING THE DATASET ##################
    # Add measures of Aspect Ratio, Form Factor, Convexity, Curl, and Sphericity
    # Input: Data must be an np array with N (row) examples x M (cols) measures.
    # Measures should go: Area, MjrAxis, MnrAxis, Ecc,ConA,EqD,Sol,Ext,Per,conPer,fiber_length,InscribeR,bleb_len
    ########################################################
    if add_AR:
        AR = []
        for i in range(len(Data)):
            AR.append(Data[i, 1] / Data[i, 2])

        AR = np.asarray(AR)

        AR = AR.reshape((len(AR), 1))

        Data = np.append(Data, AR, 1)  # concatenates arrays appropriately.
        if FeatureNames is not None:
            FeatureNames.extend(["AR"])

    if add_FF:
        # this measure is really compactness, if you multiply each by 4 pi
        # note this is different from roundness, which would use convex perimeter
        FF = []
        for i in range(len(Data)):
            FF.append(Data[i, 0] / (Data[i, 8] * Data[i, 8]))
            # FF.append(Data[i,8]*Data[i,8] / Data[i,0])

        FF = np.asarray(FF)
        FF = FF.reshape((len(FF), 1))
        Data = np.append(Data, FF, 1)
        if FeatureNames is not None:
            FeatureNames.extend(["FF"])

    if add_convexity:
        CC = []
        for i in range(len(Data)):
            CC.append(Data[i, 8] / Data[i, 9])

        CC = np.asarray(CC)
        CC = CC.reshape((len(CC), 1))
        Data = np.append(Data, CC, 1)
        if FeatureNames is not None:
            FeatureNames.extend(["Convexity"])

    if add_curl_old:
        # tells how curled the object is. might help for lamellipodia.
        # curl is length / fiber length. (I assume length here can be major axis length)
        # fiber length definition is (perimeter - sqrt(perimeter^2 - 16*Area)) / 4

        # this definition does not work for a circle. Note that the result will be imaginary.
        # I changed the 16 to a 4Pi. This should be fine.
        cc = []
        for i in range(len(Data)):
            if (4 * np.pi * Data[i, 0]) <= (Data[i, 8] * Data[i, 8]):
                fiber_length = (
                    Data[i, 8]
                    - np.sqrt((Data[i, 8] * Data[i, 8]) - (4 * np.pi * Data[i, 0]))
                ) / np.pi  # 4
                cc.append(Data[i, 1] / fiber_length)
            else:
                fiber_length = Data[i, 8] / np.pi  # 4
                cc.append(Data[i, 1] / fiber_length)

        cc = np.asarray(cc)
        cc = cc.reshape((len(cc), 1))
        Data = np.append(Data, cc, 1)
        if FeatureNames is not None:
            FeatureNames.extend(["Curl_old"])

    if add_curl:
        cc = []
        for i in range(len(Data)):
            cc.append(Data[i, 1] / Data[i, 10])

        cc = np.asarray(cc)
        cc = cc.reshape((len(cc), 1))
        Data = np.append(Data, cc, 1)
        # bound between 0 and 1 if major axis length could be replaced by feret diameter.
        if FeatureNames is not None:
            FeatureNames.extend(["Curl"])

    if add_sphericity:
        ss = []
        for i in range(len(Data)):
            ss.append(Data[i, 11] * 2 / Data[i, 1])

        ss = np.asarray(ss)
        ss = ss.reshape((len(ss), 1))
        Data = np.append(Data, ss, 1)
        # bound between 0 and 1 where 1 is a circle, perfectly spherical, and 0 is not at all.
        # would be better if we had feret diameter instead of major axis.
        if FeatureNames is not None:
            FeatureNames.extend(["Sphericity"])

    if add_InscribedArea:
        aa = []
        for i in range(len(Data)):
            aa.append(Data[i, 1] * Data[i, 1] * np.pi / Data[i, 11])

        aa = np.asarray(aa)
        aa = aa.reshape((len(aa), 1))
        Data = np.append(Data, aa, 1)
        if FeatureNames is not None:
            FeatureNames.extend(["InArea"])

    if add_BlebRel:
        bb = []
        for i in range(len(Data)):
            bb.append(Data[i, 12] / Data[i, 11])

        bb = np.asarray(bb)
        bb = bb.reshape((len(bb), 1))
        Data = np.append(Data, bb, 1)
        if FeatureNames is not None:
            FeatureNames.extend(["Bleb_Rel"])

    if FeatureNames is not None:
        return (Data, FeatureNames)
    else:
        return Data


def Exclude_Measures(
    Data,
    FeatureNames=None,
    ex_Area=False,
    ex_MjrAxis=False,
    ex_MnrAxis=False,
    ex_Ecc=False,
    ex_ConA=False,
    ex_EqD=False,
    ex_Sol=False,
    ex_Ext=False,
    ex_Per=False,
    ex_conPer=False,
    ex_FL=False,
    ex_InR=False,
    ex_bleb=False,
):
    # Area,MjrAxis,MnrAxis,Ecc,ConA,EqD,Sol,Ext,Per,conPer,FL,InR

    del_cols = []
    if ex_Area:
        del_cols.append(0)
    if ex_MjrAxis:
        del_cols.append(1)
    if ex_MnrAxis:
        del_cols.append(2)
    if ex_Ecc:
        del_cols.append(3)
    if ex_ConA:
        del_cols.append(4)
    if ex_EqD:
        del_cols.append(5)
    if ex_Sol:
        del_cols.append(6)
    if ex_Ext:
        del_cols.append(7)
    if ex_Per:
        del_cols.append(8)
    if ex_conPer:
        del_cols.append(9)
    if ex_FL:
        del_cols.append(10)
    if ex_InR:
        del_cols.append(11)
    if ex_bleb:
        del_cols.append(12)

    Data = np.delete(Data, del_cols, 1)
    if FeatureNames is not None:
        FeatureNames = [i for j, i in enumerate(FeatureNames) if j not in del_cols]
        return (Data, FeatureNames)
    else:
        return Data


def open_and_save_test_data(fpath, csvfilename, txtfilename, ratio):
    # fpath = '/volumes/chris stuff/chemsensing/chemsensing/Y27632_120518/Results/'
    # /Rho_Act_120118/Results_after/'
    # filename = 'FinalResults_after'
    # option to delete certain measures if done so in training.
    # order should go like
    # %frame number%correctedNum%area%centroidx%centroidy%major%minor%eccentricity
    # %orientation%convex area%filledarea%equivDiameter%solidity%extent%perimeter
    # %perimeter old%convex perimeter%fiber length%%max in radii%bleb length%centersx%centersy

    data = np.genfromtxt(
        fpath + csvfilename + ".csv",
        delimiter=",",
        usecols=[2, 5, 6, 7, 9, 11, 12, 13, 14, 16, 17, 18, 19],
        skip_header=1,
    )
    # was cols 3,6,7,8,10,12,13,14,15
    frames_cell = np.genfromtxt(
        fpath + csvfilename + ".csv", delimiter=",", usecols=[0, 1], skip_header=1
    )
    # add aspect ratio as last column of data

    data[:, 0] = data[:, 0] * ratio * ratio  # area
    data[:, 1] = data[:, 1] * ratio  # mjr
    data[:, 2] = data[:, 2] * ratio  # MnrAxis
    # ecc unitless
    data[:, 4] = data[:, 4] * ratio * ratio  # ConvexArea
    data[:, 5] = data[:, 5] * ratio  # EquivDiameter
    # Solidity
    # Extent
    data[:, 8] = data[:, 8] * ratio  # Perimeter
    data[:, 9] = data[:, 9] * ratio  # conPerim
    data[:, 10] = data[:, 10] * ratio  # FibLen
    data[:, 11] = data[:, 11] * ratio  # max inscribed r
    data[:, 12] = data[:, 12] * ratio  # bleblen

    preds = np.genfromtxt(
        fpath + "/" + txtfilename + ".txt",
        delimiter=" ",
        usecols=[4, 5, 6, 7],
        skip_header=1,
    )
    y_target = np.where(np.max(preds, 1) > 0.7, np.argmax(preds, 1), 4)
    # y_target = np.reshape(y_target,(len(y_target),1))

    return (data, y_target, frames_cell)