linkedin sandra-acebes mail google github
abrir

Pandas

Data Analysis using Pandas II (Plot representation with Matplotlib)

Plot representation: Matplotlib

In [3]:
import pandas as pd
import numpy as np
import sys
import matplotlib.pyplot as plt
In [4]:
df =pd.read_csv('./Result_protein.txt', sep=" " )
In [6]:
df.head(10)
Out[6]:
pdbid chain type length family nhydropho %nhydropho npolar %npolar naroma %naroma nposich %nposich nnegach %nnegach
0 101m A protein 154 MYOGLOBIN 60 0.361446 29 0.174699 22 0.132530 35 0.210843 20 0.120482
1 102l A protein 165 T4_LYSOZYME 60 0.361446 46 0.277108 15 0.090361 27 0.162651 18 0.108434
2 102m A protein 154 MYOGLOBIN 60 0.363636 29 0.175758 22 0.133333 34 0.206061 20 0.121212
3 103l A protein 167 T4_LYSOZYME 60 0.357143 47 0.279762 15 0.089286 27 0.160714 19 0.113095
4 103m A protein 154 MYOGLOBIN 60 0.363636 29 0.175758 22 0.133333 34 0.206061 20 0.121212
5 104l A protein 166 T4_LYSOZYME 61 0.365269 46 0.275449 15 0.089820 27 0.161677 18 0.107784
6 104l B protein 166 T4_LYSOZYME 61 0.365269 46 0.275449 15 0.089820 27 0.161677 18 0.107784
7 104m A protein 153 MYOGLOBIN 58 0.351515 28 0.169697 23 0.139394 35 0.212121 21 0.127273
8 105m A protein 153 MYOGLOBIN 58 0.351515 28 0.169697 23 0.139394 35 0.212121 21 0.127273
9 106m A protein 154 MYOGLOBIN 58 0.349398 29 0.174699 24 0.144578 35 0.210843 20 0.120482
In [25]:
x = df['family'][0:10]
y = df['npolar'][0:10]
In [27]:
plt.scatter(x, y)
plt.show()
In [55]:
x=df.iloc[:,6][0:1000]
y=df.iloc[:,8][0:1000]
plt.scatter(x,y)


plt.xlabel(r'% Hydrophobic', fontsize=15)
plt.ylabel(r'% Polar', fontsize=15)
plt.title('Proteins', fontsize=25)

plt.grid(True)


plt.show()
In [ ]:
#Myoglobin analysis
In [33]:
familyname = ["MYOGLOBIN"]
len(df[df.family.isin(familyname)])
df2=df[df.family.isin(familyname)]
len(df2)
Out[33]:
163
In [36]:
x = df2['nhydropho']
y = df2['npolar']
In [37]:
plt.scatter(x, y)
plt.show()