Pandas
Data Analysis using Pandas II (Plot representation with Matplotlib)
In [ ]:
# Example Data Exploration with Pandas
In [ ]:
## 1.- Data exploration
In [10]:
import seaborn as sns
planets=sns.load_dataset('planets')
In [11]:
planets.tail()
Out[11]:
In [8]:
planets.head()
Out[8]:
In [9]:
planets.shape
Out[9]:
In [13]:
planets.dropna().describe()
Out[13]:
In [ ]:
#Listing of Pandas aggregation methods:
# count(); first(); last(); mean(); median(); min(); max(); std(); var(); mad(); prod(); sum()
In [14]:
planets.number.mean()
Out[14]:
In [15]:
planets.mass.min()
Out[15]:
In [ ]:
# Group by
In [20]:
planets.dropna().groupby('year').sum()
Out[20]:
In [24]:
planets.dropna().groupby('year')['mass'].mean()
Out[24]:
In [26]:
#Iteration over groups
planets.groupby('method')['year'].describe().unstack()[:3]
Out[26]:
In [ ]:
# aggregate, filter, transform, apply
In [ ]:
#Transform. Syntaxis: df.transform(lambda x:x-x.mean())
In [34]:
#pivot tables
planets.pivot_table('distance', index='year', columns='method')[0:3]
#makes a summary of the information
Out[34]:
In [35]:
planets.pivot_table('distance', 'year', 'method')[0:3]
Out[35]: