import pandas as pd 
import numpy as np 
from pandas import ExcelWriter

labels = {
    1: "age",
    2: "stage",
    3: "tumor",
    4: "histological",
    5: "status"
}

methods = {
    1: "cmim",
    2: "disr",
    3: "cife",
    4: "icap"
}
final_file = "../Results/all_stars.xlsx"
current_file = "../Results/ranks.xlsx"
writer = ExcelWriter(final_file)
dfs = []
all_genes = []
for label in labels:
    df = pd.read_excel(current_file, labels[label], header=0)
    all_genes = all_genes + df['Genes'].values.tolist()
    dfs.append(df)

all_genes = list(set(all_genes))

count = [0] * len(all_genes)
name_of_subtypes = [''] * len(all_genes)
scores = [''] * len(all_genes)

for i in range(len(all_genes)):
    gene = all_genes[i]
    for df in dfs:
        if any(df['Genes'] == gene):
            count[i] += 1
print(count)
for label in labels:
    df = pd.read_excel(current_file, labels[label], header=0)
    df = df.set_index('Genes')
    for i in range(len(all_genes)):
        if all_genes[i] in df.index.values:
            name_of_subtypes[i] += labels[label] + ' '
            scores[i] += str(df.loc[all_genes[i], 'Average Weighted Ensemble']) + ','

print(name_of_subtypes)
res_df = pd.DataFrame(columns=['Genes', 'Count', 'Labels', 'Scores'])
res_df['Genes'] = all_genes
res_df['Count'] = count 
res_df['Labels'] = name_of_subtypes
res_df['Scores'] = scores
res_df.to_excel(writer,'Results', index=False)
writer.save()