Plot representation: Matplotlib¶

import pandas as pd
import numpy as np
import sys
import matplotlib.pyplot as plt

df =pd.read_csv('./Result_protein.txt', sep=" " )

df.head(10)

x = df['family'][0:10]
y = df['npolar'][0:10]

plt.scatter(x, y)
plt.show()

x=df.iloc[:,6][0:1000]
y=df.iloc[:,8][0:1000]
plt.scatter(x,y)


plt.xlabel(r'% Hydrophobic', fontsize=15)
plt.ylabel(r'% Polar', fontsize=15)
plt.title('Proteins', fontsize=25)

plt.grid(True)


plt.show()

#Myoglobin analysis

familyname = ["MYOGLOBIN"]
len(df[df.family.isin(familyname)])
df2=df[df.family.isin(familyname)]
len(df2)

163

x = df2['nhydropho']
y = df2['npolar']

plt.scatter(x, y)
plt.show()

	pdbid	chain	type	length	family	nhydropho	%nhydropho	npolar	%npolar	naroma	%naroma	nposich	%nposich	nnegach	%nnegach
0	101m	A	protein	154	MYOGLOBIN	60	0.361446	29	0.174699	22	0.132530	35	0.210843	20	0.120482
1	102l	A	protein	165	T4_LYSOZYME	60	0.361446	46	0.277108	15	0.090361	27	0.162651	18	0.108434
2	102m	A	protein	154	MYOGLOBIN	60	0.363636	29	0.175758	22	0.133333	34	0.206061	20	0.121212
3	103l	A	protein	167	T4_LYSOZYME	60	0.357143	47	0.279762	15	0.089286	27	0.160714	19	0.113095
4	103m	A	protein	154	MYOGLOBIN	60	0.363636	29	0.175758	22	0.133333	34	0.206061	20	0.121212
5	104l	A	protein	166	T4_LYSOZYME	61	0.365269	46	0.275449	15	0.089820	27	0.161677	18	0.107784
6	104l	B	protein	166	T4_LYSOZYME	61	0.365269	46	0.275449	15	0.089820	27	0.161677	18	0.107784
7	104m	A	protein	153	MYOGLOBIN	58	0.351515	28	0.169697	23	0.139394	35	0.212121	21	0.127273
8	105m	A	protein	153	MYOGLOBIN	58	0.351515	28	0.169697	23	0.139394	35	0.212121	21	0.127273
9	106m	A	protein	154	MYOGLOBIN	58	0.349398	29	0.174699	24	0.144578	35	0.210843	20	0.120482

Pandas

Data Analysis using Pandas II (Plot representation with Matplotlib)

Plot representation: Matplotlib¶