In [19]:
# import important modules
import pandas as pd
matrix_gene = pd.read_csv("../Data/Comparisons/Matrisome_Hs_MasterList.csv") # MOCSvsMOSE
#matrix_gene
In [20]:
matrix_index = ["COL11A1","COMP","FN1","VCAN","CTSB","COL1A1","AGT","ANXA5","ANXA6",
            "LAMB1","FBLN2","LAMC1","LGALS3","CTSG",
            "HSPG2","ANXA1","LAMA4","COL6A6","VWF",
            "ABI3B","PTNXB","COL15A1"] # matrix index gene list

len(matrix_index)
Out[20]:
22
In [ ]:
 
In [21]:
# the rows that only contains the matrix index genes
matrix_gene_final=matrix_gene[matrix_gene['Gene.symbol'].isin
            (["COL11A1","COMP","FN1","VCAN","CTSB","COL1A1","AGT","ANXA5","ANXA6",
            "LAMB1","FBLN2","LAMC1","LGALS3","CTSG",
            "HSPG2","ANXA1","LAMA4","COL6A6","VWF",
            "ABI3B","PTNXB","COL15A1"])]
matrix_gene_final = matrix_gene_final.drop(matrix_gene_final.columns[[0]], axis=1)  # delete first column
matrix_gene_final.to_csv(r'../Python/ECM22genes.csv') # export as csv
matrix_gene_final
#len(matrix_gene_final)

# The genes that are not in the matrisome:
#ABI3B
#PTNXB
Out[21]:
Category Gene.symbol
15 ECM Glycoproteins COMP
43 ECM Glycoproteins FBLN2
54 ECM Glycoproteins FN1
79 ECM Glycoproteins LAMA4
81 ECM Glycoproteins LAMB1
85 ECM Glycoproteins LAMC1
186 ECM Glycoproteins VWF
196 Collagens COL11A1
201 Collagens COL15A1
206 Collagens COL1A1
232 Collagens COL6A6
253 Proteoglycans HSPG2
273 Proteoglycans VCAN
274 ECM-affiliated Proteins ANXA1
281 ECM-affiliated Proteins ANXA5
282 ECM-affiliated Proteins ANXA6
366 ECM-affiliated Proteins LGALS3
492 ECM Regulators AGT
520 ECM Regulators CTSB
525 ECM Regulators CTSG
-->

No comments:

Post a Comment