import pandas as pd 
import numpy as np 
from pandas import ExcelWriter

def eval(x, length):
    return 1/(1 + np.e * np.exp(-(length/x)))
    # return 1/(1+10*np.power(10, -length/x))
    # return np.exp(-1*(x/length))

def avg_weighted_ensemble(df):
    sum = 0
    for method in methods:
        sum += df[methods[method]] * df['Weight_{}'.format(methods[method])]
    return np.longdouble(sum/df['Average Rank'])

labels = {
    1: "age",
    2: "stage",
    3: "tumor",
    4: "histological",
    5: "status",
    # 6: ""
}

methods = {
    # 1: "jmi",
    # 2: "mim",
    # 3: "mifs",
    # 4: "mrmr",
    5: "cmim",
    6: "disr",
    7: "cife",
    8: "icap",
    # 9: "condred"
}
final_file = "../Results/ranks.xlsx"
writer = ExcelWriter(final_file)  

all_genes = []
dfs = []
for label in labels:
    print('{}'.format(labels[label]))
    expression_file = "../data/R3/Level- 3/categorization_miRNA(RPMlog2)/tumor/data.csv"
    expression_df = pd.read_csv(expression_file, header=0)
    expression_df = expression_df.drop(expression_df.columns[0], 1)
    expression_df = expression_df.drop('label', 1)
    print(expression_df.shape)
    all_columns = (expression_df.columns.values)
    all_genes.append(all_columns)
    print('No. of genes: {}'.format(len(all_columns)))
    genes = []
    columns = ['Genes'] + list(methods.values())
    method_df = pd.DataFrame(columns=columns)
    method_df['Genes'] = all_columns
    num_genes = len(all_columns)
    for method in methods:
        current_file = "../Results/ranking/{}_{}.csv".format(labels[label], methods[method])
        current_df = pd.read_csv(current_file, header=None, index_col=None)
        print(current_file, current_df.shape)
        current_columns = current_df[0].values.tolist()
        current_columns[:] = [x - 1 for x in current_columns]
        ranks = list(range(len(current_columns)))
        print(f'Curr len: {len(current_columns)}, Ranks: {len(ranks)}')
        for i in range(len(ranks)):
            ranks[current_columns[i]] = i
        ranks[:] = [x + 1 for x in ranks]
        method_df[methods[method]] = ranks
    print('No. of ranks: {}'.format(len(ranks)))
    method_df['Average Rank'] = method_df.mean(axis=1)
    for method in methods:
        method_df['Weight_{}'.format(methods[method])] = eval(np.array(method_df[methods[method]]), num_genes)
    method_df['Average Weighted Ensemble'] = avg_weighted_ensemble(method_df)
    method_df = method_df.sort_values(['Average Rank', 'Average Weighted Ensemble'], ascending=[True, True])
    method_df.to_excel(writer, '{}'.format(labels[label]), index=False)
    dfs.append(method_df)

for i in range(len(dfs)):
    dfs[i] = dfs[i].set_index(dfs[i].columns[0])
common_df = pd.DataFrame()
results = set(all_genes[0])
for s in all_genes[1:]:
    results.intersection_update(s)
common_avg = []
weights = []
print(len(results))
for result in results:
    sum = 0
    weight = []
    for df in dfs:
        sum += df.loc[result, 'Average Weighted Ensemble']
        weight.append(df.loc[result, 'Average Weighted Ensemble'])
    sum /= len(dfs)
    common_avg.append(sum)
    weights.append(weight)
common_df['Genes'] = list(results)
new_df = pd.DataFrame(columns=['Score_age', 'Score_clinical', 'Score_tumor', 'Score_hist', 'Score_status'], data=weights)
new_df['Final Score'] = new_df.mean(axis=1)
df_concat = pd.concat([common_df, new_df], axis=1)
df_concat = df_concat.sort_values('Final Score', ascending=False)
df_concat.to_excel(writer, 'Common', index=False)
writer.save()