import pandas as pd 
import numpy as np 
from pandas import ExcelWriter

categories = ['common', 'age', 'histological', 'tumor', 'stage', 'status']
for category in categories:
    writer = ExcelWriter('../Results/KEGG/average/{}.xlsx'.format(category))
    top_nums = [10000, 10, 5, 2]

    for top_num in top_nums:
        df = pd.read_csv("../data/tarbase.csv", header=0)
        lines = [line.rstrip('\n') for line in open('../data/list/average/{}.txt'.format(category))]
        df = df[df.miRNA.isin(lines)]
        genes = []
        final_res_df = pd.DataFrame(columns=['miRNA', 'Gene', 'Target Gene (Entrez Gene ID)', 'References (PMID)', 'Total Experiments', 'Total PMID', 'Total'])
        for line in lines:
            sort_df = df[df.miRNA.isin([line])]
            sort_df = sort_df.sort_values(['Total'], ascending=False).groupby('miRNA').head(top_num)
            res_df = sort_df[['miRNA', 'Gene', 'Target Gene (Entrez Gene ID)', 'References (PMID)', 'Total Experiments', 'Total PMID', 'Total']]
            final_res_df = final_res_df.append(res_df)
            genes = genes +  res_df['Gene'].tolist() 
        genes = list(set(genes))
        # print(final_res_df)
        final_res_df.to_excel(writer, 'Top_{}'.format(top_num), index=0)
    writer.save()