import pandas as pd 
import numpy as np 
from pandas import ExcelWriter

categories = {
    1: "common",
    2: "age",
    3: "tumor",
    4: "status",
    5: "stage",
    6: "histological"
}
hsa_df = pd.read_excel("../data/hsa.xlsx")
writer = ExcelWriter('../Results/hsa.xlsx')

for category in categories:
    genes = [line.rstrip('\n') for line in open('../data/list/average/{}.txt'.format(categories[category]))]
    for i in range(len(genes)):
        if genes[i].count('-') > 2:
            genes[i] = genes[i].lower().rsplit('-', 1)[0]
    common_genes_df = hsa_df[hsa_df['miRNA'].isin(genes)]
    print(common_genes_df.head())
    common_genes_df.sort_values("miRNA", inplace=True)
    common_genes_df.drop_duplicates("TF", inplace=True)
    common_genes_df.to_excel(writer, categories[category], index=False)
writer.save()
