Kembali ke EDA

MATPLOTLIB

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

1. Line Plot

2. Scatter Plot

3. Bar Chart

4. Pie Chart

5. Sub Plot

#Fungsi Mendasar Matplotlib
#axes.plot(x,y,label='Sesuatu')

x=[0,1,2,3,4,5,6]
y=[10,20,30,40,50,60,70]
fig,axes= plt.subplots(nrows=1,ncols=1,figsize=(12,8))
axes.plot(x,y,label='Percobaan')

dates=['2000-01-01','2000-01-02','2000-01-03','2000-01-04','2000-01-05',
       '2000-01-06','2000-01-07','2000-01-08','2000-01-09','2000-01-10']

min_temperature=[20.7,17.9,18.8,14.6,17.4,21.8,20.0,15.8,15.8,15.8]
max_temperature=[34.7,28.9,31.8,28.4,30.8,32.0,25.6,28.8,21.8,22.8]

fig,axes = plt.subplots(nrows=1,ncols=1,figsize=(12,8))

axes.plot(dates,min_temperature,label='Min Temperature')
axes.plot(dates,max_temperature,label='Max Temperature')
axes.legend()

Axes.set_xlabel() Mengganti label untuk sumbu x

Axes.set_ylabel() Mengganti label untuk sumbu y

Axes.set_xlim() Membatasi sumbu xnya

Axes.set_ylim() Membatasi sumbu ynya

fig,axes=plt.subplots(nrows=1,ncols=1,figsize=(12,8))

axes.plot(dates,min_temperature,label='Min Temperature')
axes.plot(dates,max_temperature,label='Max Temperature')

axes.set_xlabel('Date',fontsize=12)
axes.set_ylabel('Temperature',fontsize=12)

axes.set_title("Minimum and Maximum Temperature",fontsize=12)

axes.set_xticks(dates)
axes.set_xticklabels(dates)
axes.tick_params('x',labelsize=12,labelrotation=45,size=12)

axes.set_ylim(10,40)
axes.set_yticks(np.arange(10,41,2))

axes.legend(fontsize=12,loc='upper left')

x=[1,2,3,4] #<----- definisikan titik-titik yang ingin dipetakan
y=[1,2,3,4] #<----- definisikan titik-titik yang ingin dipetakan

fig,axes=plt.subplots(nrows=1,ncols=1,figsize=(12,8))

axes.plot(x,y,label='nama')

DATAFRAMES

Silakan download: Canada.xlsx

hai='Canada.xlsx'
df=pd.read_excel(hai,sheet_name='Canada by Citizenship',skiprows=range(20),skipfooter=2)

df.head() #Kurang spesifik ngambil yang mana
Type Coverage OdName AREA AreaName REG RegName DEV DevName 1980 ... 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
0 Immigrants Foreigners Afghanistan 935 Asia 5501 Southern Asia 902 Developing regions 16 ... 2978 3436 3009 2652 2111 1746 1758 2203 2635 2004
1 Immigrants Foreigners Albania 908 Europe 925 Southern Europe 901 Developed regions 1 ... 1450 1223 856 702 560 716 561 539 620 603
2 Immigrants Foreigners Algeria 903 Africa 912 Northern Africa 902 Developing regions 80 ... 3616 3626 4807 3623 4005 5393 4752 4325 3774 4331
3 Immigrants Foreigners American Samoa 909 Oceania 957 Polynesia 902 Developing regions 0 ... 0 0 1 0 0 0 0 0 0 0
4 Immigrants Foreigners Andorra 908 Europe 925 Southern Europe 901 Developed regions 0 ... 0 0 1 1 0 0 0 0 1 1

5 rows × 43 columns

#Menghapus yang ga mau dipake
df.drop(['AREA','REG','DEV','Type','Coverage'],axis=1,inplace=True)

df.head()
OdName AreaName RegName DevName 1980 1981 1982 1983 1984 1985 ... 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
0 Afghanistan Asia Southern Asia Developing regions 16 39 39 47 71 340 ... 2978 3436 3009 2652 2111 1746 1758 2203 2635 2004
1 Albania Europe Southern Europe Developed regions 1 0 0 0 0 0 ... 1450 1223 856 702 560 716 561 539 620 603
2 Algeria Africa Northern Africa Developing regions 80 67 71 69 63 44 ... 3616 3626 4807 3623 4005 5393 4752 4325 3774 4331
3 American Samoa Oceania Polynesia Developing regions 0 1 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
4 Andorra Europe Southern Europe Developed regions 0 0 0 0 0 0 ... 0 0 1 1 0 0 0 0 1 1

5 rows × 38 columns

df.rename(columns={'OdName':'Country','AreaName':'Continent','RegName':'Region'},inplace=True)
df.head()
Country Continent Region DevName 1980 1981 1982 1983 1984 1985 ... 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
0 Afghanistan Asia Southern Asia Developing regions 16 39 39 47 71 340 ... 2978 3436 3009 2652 2111 1746 1758 2203 2635 2004
1 Albania Europe Southern Europe Developed regions 1 0 0 0 0 0 ... 1450 1223 856 702 560 716 561 539 620 603
2 Algeria Africa Northern Africa Developing regions 80 67 71 69 63 44 ... 3616 3626 4807 3623 4005 5393 4752 4325 3774 4331
3 American Samoa Oceania Polynesia Developing regions 0 1 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
4 Andorra Europe Southern Europe Developed regions 0 0 0 0 0 0 ... 0 0 1 1 0 0 0 0 1 1

5 rows × 38 columns

df.columns=list(map(str,df.columns)) #Mengubah semua nama kolom menjadi string
df.head()
Country Continent Region DevName 1980 1981 1982 1983 1984 1985 ... 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
0 Afghanistan Asia Southern Asia Developing regions 16 39 39 47 71 340 ... 2978 3436 3009 2652 2111 1746 1758 2203 2635 2004
1 Albania Europe Southern Europe Developed regions 1 0 0 0 0 0 ... 1450 1223 856 702 560 716 561 539 620 603
2 Algeria Africa Northern Africa Developing regions 80 67 71 69 63 44 ... 3616 3626 4807 3623 4005 5393 4752 4325 3774 4331
3 American Samoa Oceania Polynesia Developing regions 0 1 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
4 Andorra Europe Southern Europe Developed regions 0 0 0 0 0 0 ... 0 0 1 1 0 0 0 0 1 1

5 rows × 38 columns

df.set_index('Country',inplace=True) #Mengubah Country menajadi index
df.head()
Continent Region DevName 1980 1981 1982 1983 1984 1985 1986 ... 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
Country
Afghanistan Asia Southern Asia Developing regions 16 39 39 47 71 340 496 ... 2978 3436 3009 2652 2111 1746 1758 2203 2635 2004
Albania Europe Southern Europe Developed regions 1 0 0 0 0 0 1 ... 1450 1223 856 702 560 716 561 539 620 603
Algeria Africa Northern Africa Developing regions 80 67 71 69 63 44 69 ... 3616 3626 4807 3623 4005 5393 4752 4325 3774 4331
American Samoa Oceania Polynesia Developing regions 0 1 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
Andorra Europe Southern Europe Developed regions 0 0 0 0 0 0 2 ... 0 0 1 1 0 0 0 0 1 1

5 rows × 37 columns

df['Total']=df.sum(axis=1)
df.head()
Continent Region DevName 1980 1981 1982 1983 1984 1985 1986 ... 2005 2006 2007 2008 2009 2010 2011 2012 2013 Total
Country
Afghanistan Asia Southern Asia Developing regions 16 39 39 47 71 340 496 ... 3436 3009 2652 2111 1746 1758 2203 2635 2004 58639
Albania Europe Southern Europe Developed regions 1 0 0 0 0 0 1 ... 1223 856 702 560 716 561 539 620 603 15699
Algeria Africa Northern Africa Developing regions 80 67 71 69 63 44 69 ... 3626 4807 3623 4005 5393 4752 4325 3774 4331 69439
American Samoa Oceania Polynesia Developing regions 0 1 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 6
Andorra Europe Southern Europe Developed regions 0 0 0 0 0 0 2 ... 0 1 1 0 0 0 0 1 1 15

5 rows × 38 columns

years=list(map(str,range(1980,2014))) #buatlah sebuah list berupa pemetaan string dimana rangenya itu dari 1980 sampai 2014
years
['1980',
 '1981',
 '1982',
 '1983',
 '1984',
 '1985',
 '1986',
 '1987',
 '1988',
 '1989',
 '1990',
 '1991',
 '1992',
 '1993',
 '1994',
 '1995',
 '1996',
 '1997',
 '1998',
 '1999',
 '2000',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '2011',
 '2012',
 '2013']

LINE CHART

algeria = df.loc['Algeria',years]
algeria
1980      80
1981      67
1982      71
1983      69
1984      63
1985      44
1986      69
1987     132
1988     242
1989     434
1990     491
1991     872
1992     795
1993     717
1994     595
1995    1106
1996    2054
1997    1842
1998    2292
1999    2389
2000    2867
2001    3418
2002    3406
2003    3072
2004    3616
2005    3626
2006    4807
2007    3623
2008    4005
2009    5393
2010    4752
2011    4325
2012    3774
2013    4331
Name: Algeria, dtype: object
algeria.index=algeria.index.map(int) #Mengubah tahun menjadi integer
algeria
1980      80
1981      67
1982      71
1983      69
1984      63
1985      44
1986      69
1987     132
1988     242
1989     434
1990     491
1991     872
1992     795
1993     717
1994     595
1995    1106
1996    2054
1997    1842
1998    2292
1999    2389
2000    2867
2001    3418
2002    3406
2003    3072
2004    3616
2005    3626
2006    4807
2007    3623
2008    4005
2009    5393
2010    4752
2011    4325
2012    3774
2013    4331
Name: Algeria, dtype: object
algeria.plot(kind='line',figsize=(10,6))

plt.title('Citizenship Algeria')

plt.ylabel('Banyak Warga')
plt.xlabel('Tahun')
Text(0.5, 0, 'Tahun')

albania=df.loc['Albania',years]
albania.index=albania.index.map(int)

albania.plot(kind='line',figsize=(10,6))
algeria.plot(kind='line',figsize=(10,6))

plt.title('Citizenship Algeria')

plt.ylabel('Banyak Warga')
plt.xlabel('Tahun')
Text(0.5, 0, 'Tahun')

SCATTER PLOT

df_total=pd.DataFrame(df[years].sum(axis=0))
df_total
0
1980 99137
1981 110563
1982 104271
1983 75550
1984 73417
1985 69978
1986 86048
1987 134771
1988 139306
1989 164432
1990 188054
1991 207509
1992 221687
1993 222049
1994 193665
1995 187712
1996 200085
1997 192885
1998 160727
1999 179818
2000 216712
2001 242643
2002 223111
2003 217297
2004 232083
2005 257457
2006 247057
2007 232405
2008 243047
2009 248768
2010 276956
2011 246194
2012 256222
2013 257537
df_total.reset_index(inplace=True)
df_total
index 0
0 1980 99137
1 1981 110563
2 1982 104271
3 1983 75550
4 1984 73417
5 1985 69978
6 1986 86048
7 1987 134771
8 1988 139306
9 1989 164432
10 1990 188054
11 1991 207509
12 1992 221687
13 1993 222049
14 1994 193665
15 1995 187712
16 1996 200085
17 1997 192885
18 1998 160727
19 1999 179818
20 2000 216712
21 2001 242643
22 2002 223111
23 2003 217297
24 2004 232083
25 2005 257457
26 2006 247057
27 2007 232405
28 2008 243047
29 2009 248768
30 2010 276956
31 2011 246194
32 2012 256222
33 2013 257537
df_total.columns=['year','total']
df_total
year total
0 1980 99137
1 1981 110563
2 1982 104271
3 1983 75550
4 1984 73417
5 1985 69978
6 1986 86048
7 1987 134771
8 1988 139306
9 1989 164432
10 1990 188054
11 1991 207509
12 1992 221687
13 1993 222049
14 1994 193665
15 1995 187712
16 1996 200085
17 1997 192885
18 1998 160727
19 1999 179818
20 2000 216712
21 2001 242643
22 2002 223111
23 2003 217297
24 2004 232083
25 2005 257457
26 2006 247057
27 2007 232405
28 2008 243047
29 2009 248768
30 2010 276956
31 2011 246194
32 2012 256222
33 2013 257537
df_total.plot('year','total',kind='scatter',figsize=(10,6))

df_total.plot('year','total',kind='scatter',figsize=(10,6))

plt.title('Scatters Plot')
plt.xticks(rotation='vertical')
([0,
  1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33],
 <a list of 34 Text major ticklabel objects>)

PIE CHARTS

df_continents=df.groupby('Continent',axis=0).sum()
df_continents
1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 ... 2005 2006 2007 2008 2009 2010 2011 2012 2013 Total
Continent
Africa 3951 4363 3819 2671 2639 2650 3782 7494 7552 9894 ... 27523 29188 28284 29890 34534 40892 35441 38083 38543 618948
Asia 31025 34314 30214 24696 27274 23850 28739 43203 47454 60256 ... 159253 149054 133459 139894 141434 163845 146894 152218 155075 3317794
Europe 39760 44802 42720 24638 22287 20844 24370 46698 54726 60893 ... 35955 33053 33495 34692 35078 33425 26778 29177 28691 1410947
Latin America and the Caribbean 13081 15215 16769 15427 13678 15171 21179 28471 21924 25060 ... 24747 24676 26011 26547 26867 28818 27856 27173 24950 765148
Northern America 9378 10030 9074 7100 6661 6543 7074 7705 6469 6790 ... 8394 9613 9463 10190 8995 8142 7677 7892 8503 241142
Oceania 1942 1839 1675 1018 878 920 904 1200 1181 1539 ... 1585 1473 1693 1834 1860 1834 1548 1679 1775 55174

6 rows × 35 columns

df_continents['Total'].plot(kind='pie',figsize=(5,6),autopct='%1.1f%%') #yang persen gaje nambahin persen

df_continents['Total'].plot(kind='pie',figsize=(5,6),autopct='%1.1f%%',startangle=90,shadow=True) #wow diputer 90 derajat plus ada bayangan

plt.axis('equal') #untuk bulat sempurna katanya sih gitu
plt.show()

BAR CHART

algeria.head()
1980    80
1981    67
1982    71
1983    69
1984    63
Name: Algeria, dtype: object
algeria.plot(kind='bar',figsize=(10,6))

plt.xlabel('Year')
plt.ylabel('Banyak Warga')
plt.title('Bar Chart')
Text(0.5, 1.0, 'Bar Chart')

algeria.plot(kind='barh',figsize=(10,6))

plt.xlabel('Year')
plt.ylabel('Banyak Warga')
plt.title('Bar Chart')
Text(0.5, 1.0, 'Bar Chart')

SUBPLOT

sns.set_style('whitegrid')

fig=plt.figure(figsize=(30,20))
ax0=fig.add_subplot(1,2,1)
ax1=fig.add_subplot(1,2,2)

#pie
df_continents['Total'].plot(kind='pie',ax=ax0) #Maksudnya plotnya ini ditaruh di kanvas ax0
ax0.set_title('Pie Chart')
ax0.axis('equal')

#line
algeria.plot(kind='line',figsize=(15,6),ax=ax1)
albania.plot(kind='line',figsize=(15,6),ax=ax1)
ax1.set_title('Line Plot')

ax1.set_ylabel('Jumlah Warga')
ax1.set_xlabel('Tahun')

plt.show()