import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
Kembali ke EDA
#load dataset bawaan dari seaborn
= sns.load_dataset('iris')
iris iris
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
... | ... | ... | ... | ... | ... |
145 | 6.7 | 3.0 | 5.2 | 2.3 | virginica |
146 | 6.3 | 2.5 | 5.0 | 1.9 | virginica |
147 | 6.5 | 3.0 | 5.2 | 2.0 | virginica |
148 | 6.2 | 3.4 | 5.4 | 2.3 | virginica |
149 | 5.9 | 3.0 | 5.1 | 1.8 | virginica |
150 rows × 5 columns
Apa saja cara yang bisa dipakai untuk membuat plot di seaborn?
- List/Series/array
- Pandas Dataframe dan kolom
- Langsung Dataframenya
*** List/Array/Series ***
#Mengambil isinya saja kolom sepal length dan sepal width
=iris['sepal_length'].values
length=iris['sepal_width'].values
widthprint(length,width)
[5.1 4.9 4.7 4.6 5. 5.4 4.6 5. 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4 5.1
5.7 5.1 5.4 5.1 4.6 5.1 4.8 5. 5. 5.2 5.2 4.7 4.8 5.4 5.2 5.5 4.9 5.
5.5 4.9 4.4 5.1 5. 4.5 4.4 5. 5.1 4.8 5.1 4.6 5.3 5. 7. 6.4 6.9 5.5
6.5 5.7 6.3 4.9 6.6 5.2 5. 5.9 6. 6.1 5.6 6.7 5.6 5.8 6.2 5.6 5.9 6.1
6.3 6.1 6.4 6.6 6.8 6.7 6. 5.7 5.5 5.5 5.8 6. 5.4 6. 6.7 6.3 5.6 5.5
5.5 6.1 5.8 5. 5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3
6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7 6. 6.9 5.6 7.7 6.3 6.7 7.2
6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7 6.3 6.4 6. 6.9 6.7 6.9 5.8 6.8
6.7 6.7 6.3 6.5 6.2 5.9] [3.5 3. 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 3.7 3.4 3. 3. 4. 4.4 3.9 3.5
3.8 3.8 3.4 3.7 3.6 3.3 3.4 3. 3.4 3.5 3.4 3.2 3.1 3.4 4.1 4.2 3.1 3.2
3.5 3.6 3. 3.4 3.5 2.3 3.2 3.5 3.8 3. 3.8 3.2 3.7 3.3 3.2 3.2 3.1 2.3
2.8 2.8 3.3 2.4 2.9 2.7 2. 3. 2.2 2.9 2.9 3.1 3. 2.7 2.2 2.5 3.2 2.8
2.5 2.8 2.9 3. 2.8 3. 2.9 2.6 2.4 2.4 2.7 2.7 3. 3.4 3.1 2.3 3. 2.5
2.6 3. 2.6 2.3 2.7 3. 2.9 2.9 2.5 2.8 3.3 2.7 3. 2.9 3. 3. 2.5 2.9
2.5 3.6 3.2 2.7 3. 2.5 2.8 3.2 3. 3.8 2.6 2.2 3.2 2.8 2.8 2.7 3.3 3.2
2.8 3. 2.8 3. 2.8 3.8 2.8 2.8 2.6 3. 3.4 3.1 3. 3.1 3.1 3.1 2.7 3.2
3.3 3. 2.5 3. 3.4 3. ]
=length,y=width) sns.scatterplot(x
*** Dataframe dan Nama Kolomnya ***
=iris['sepal_length'],y=iris['sepal_width']) sns.scatterplot(x
='sepal_length',y='sepal_width',data=iris) sns.scatterplot(x
*** Langsung Menggunakan Dataframenya ***
=iris) sns.boxplot(data
YUK BELAJAR BUAT PLOTNYA
- Scatter Plot
- Distribution Plot
- Count Plot
- Box Plot
- Heatmap (Benda)
*** SCATTER PLOT ***
#sns.scatterplot(x,y,data) xnya apa, y nya apa dan datanya apa jika pakai dataframe
='sepal_length',y='sepal_width',data=iris) sns.scatterplot(x
#Jika Menggunakan Matplotlib
='sepal_length',y='sepal_width',data=iris) plt.scatter(x
#Seaborn bisa menambahkan hue, Membagi warnanya berdasarkan sesuatu umumnya berdasarkan data kategorik
='sepal_length',y='sepal_width',data=iris,hue='species') sns.scatterplot(x
#Argumen pallete untuk mengasih warna
='sepal_length',y='sepal_width',data=iris,hue='species',palette='Accent_r') sns.scatterplot(x
#Bisa bikin scatterplot dengan ada garis regresinya
='sepal_length',y='sepal_width',data=iris) sns.regplot(x
='petal_length',y='petal_width',data=iris) sns.regplot(x
#bisa mencari nilai korelasinya yaitu dengan .corr
'petal_length'].corr(iris['petal_width']) iris[
0.962865431402796
*** DISTRIBUTION PLOT ***
'petal_length']) sns.distplot(iris[
F:\kim\anaconda3\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
'sepal_width']) sns.distplot(iris[
F:\kim\anaconda3\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
'sepal_width'].skew() #Distribusi Normal skewness nya 0 iris[
0.31896566471359966
'petal_length'].skew() iris[
-0.27488417975101276
'sepal_width']) sns.histplot(iris[
'petal_width']) sns.histplot(iris[
#Mencari kumulatif dari sepal width
'sepal_width'],cumulative=True) sns.histplot(iris[
*** COUNT PLOT ***
#Ganti data dengan data bawaan seaborn juga yaitu tips
=sns.load_dataset('tips')
tips10) tips.head(
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
5 | 25.29 | 4.71 | Male | No | Sun | Dinner | 4 |
6 | 8.77 | 2.00 | Male | No | Sun | Dinner | 2 |
7 | 26.88 | 3.12 | Male | No | Sun | Dinner | 4 |
8 | 15.04 | 1.96 | Male | No | Sun | Dinner | 2 |
9 | 14.78 | 3.23 | Male | No | Sun | Dinner | 2 |
='day',data=tips) sns.countplot(x
='sex',data=tips,palette='Accent') sns.countplot(x
='day',data=tips,hue='sex',palette='Blues') sns.countplot(x
#Jika ingin mengammbar dalam sumbu vertikal ya y=
='sex',data=tips,hue='smoker') sns.countplot(x
='sex',data=tips,hue='smoker') sns.countplot(y
*** BOX PLOT ***
='sepal_width',data=iris)
sns.boxplot(x#Yang ditengah itu median, yang masih di kotakan berwarna sebelah kiri itu Q1 dan sebelah kanan itu Q3
#untuk jarak kotak Biru ke garis verticalnya itu adalah 1.5 dikali (Q3-Q1)
#Dan yang diluar dari garis vertical dianggap sebagai outlier
='petal_width',data=iris)
sns.boxplot(x#Kenapa jaraknya garisnya kecil? karena itu adalah nilai terkecil dan tidak ada lagi yang makin ke kiri
*** HEATMAP ***
'species', axis=1, inplace=True) iris.drop(
iris.corr()
sepal_length | sepal_width | petal_length | petal_width | |
---|---|---|---|---|
sepal_length | 1.000000 | -0.117570 | 0.871754 | 0.817941 |
sepal_width | -0.117570 | 1.000000 | -0.428440 | -0.366126 |
petal_length | 0.871754 | -0.428440 | 1.000000 | 0.962865 |
petal_width | 0.817941 | -0.366126 | 0.962865 | 1.000000 |
sns.heatmap(iris.corr())
='YlGnBu') sns.heatmap(iris.corr(),cmap
='YlGnBu')
sns.heatmap(iris.corr(),cmap=45) plt.xticks(rotation
#Korelasi iris nya harus disimpan ke variabel baru agar code di bawah tidak error
= iris.corr() korelasi_iris
>= 0.5) | (korelasi_iris <= -0.5)], annot = True, cmap = 'Blues', linewidth = 1, linecolor = 'black') sns.heatmap(korelasi_iris[(korelasi_iris