In [19]:
# import important modules import pandas as pd matrix_gene = pd.read_csv("../Data/Comparisons/Matrisome_Hs_MasterList.csv") # MOCSvsMOSE #matrix_gene
In [20]:
matrix_index = ["COL11A1","COMP","FN1","VCAN","CTSB","COL1A1","AGT","ANXA5","ANXA6", "LAMB1","FBLN2","LAMC1","LGALS3","CTSG", "HSPG2","ANXA1","LAMA4","COL6A6","VWF", "ABI3B","PTNXB","COL15A1"] # matrix index gene list len(matrix_index)
Out[20]:
22
In [ ]:
In [21]:
# the rows that only contains the matrix index genes matrix_gene_final=matrix_gene[matrix_gene['Gene.symbol'].isin (["COL11A1","COMP","FN1","VCAN","CTSB","COL1A1","AGT","ANXA5","ANXA6", "LAMB1","FBLN2","LAMC1","LGALS3","CTSG", "HSPG2","ANXA1","LAMA4","COL6A6","VWF", "ABI3B","PTNXB","COL15A1"])] matrix_gene_final = matrix_gene_final.drop(matrix_gene_final.columns[[0]], axis=1) # delete first column matrix_gene_final.to_csv(r'../Python/ECM22genes.csv') # export as csv matrix_gene_final #len(matrix_gene_final) # The genes that are not in the matrisome: #ABI3B #PTNXB
Out[21]:
| Category | Gene.symbol | |
|---|---|---|
| 15 | ECM Glycoproteins | COMP |
| 43 | ECM Glycoproteins | FBLN2 |
| 54 | ECM Glycoproteins | FN1 |
| 79 | ECM Glycoproteins | LAMA4 |
| 81 | ECM Glycoproteins | LAMB1 |
| 85 | ECM Glycoproteins | LAMC1 |
| 186 | ECM Glycoproteins | VWF |
| 196 | Collagens | COL11A1 |
| 201 | Collagens | COL15A1 |
| 206 | Collagens | COL1A1 |
| 232 | Collagens | COL6A6 |
| 253 | Proteoglycans | HSPG2 |
| 273 | Proteoglycans | VCAN |
| 274 | ECM-affiliated Proteins | ANXA1 |
| 281 | ECM-affiliated Proteins | ANXA5 |
| 282 | ECM-affiliated Proteins | ANXA6 |
| 366 | ECM-affiliated Proteins | LGALS3 |
| 492 | ECM Regulators | AGT |
| 520 | ECM Regulators | CTSB |
| 525 | ECM Regulators | CTSG |
No comments:
Post a Comment