import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as snsKembali ke EDA
#load dataset bawaan dari seaborn
iris = sns.load_dataset('iris')
iris| sepal_length | sepal_width | petal_length | petal_width | species | |
|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
| ... | ... | ... | ... | ... | ... |
| 145 | 6.7 | 3.0 | 5.2 | 2.3 | virginica |
| 146 | 6.3 | 2.5 | 5.0 | 1.9 | virginica |
| 147 | 6.5 | 3.0 | 5.2 | 2.0 | virginica |
| 148 | 6.2 | 3.4 | 5.4 | 2.3 | virginica |
| 149 | 5.9 | 3.0 | 5.1 | 1.8 | virginica |
150 rows × 5 columns
Apa saja cara yang bisa dipakai untuk membuat plot di seaborn?
- List/Series/array
- Pandas Dataframe dan kolom
- Langsung Dataframenya
*** List/Array/Series ***
#Mengambil isinya saja kolom sepal length dan sepal width
length=iris['sepal_length'].values
width=iris['sepal_width'].values
print(length,width)[5.1 4.9 4.7 4.6 5. 5.4 4.6 5. 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4 5.1
5.7 5.1 5.4 5.1 4.6 5.1 4.8 5. 5. 5.2 5.2 4.7 4.8 5.4 5.2 5.5 4.9 5.
5.5 4.9 4.4 5.1 5. 4.5 4.4 5. 5.1 4.8 5.1 4.6 5.3 5. 7. 6.4 6.9 5.5
6.5 5.7 6.3 4.9 6.6 5.2 5. 5.9 6. 6.1 5.6 6.7 5.6 5.8 6.2 5.6 5.9 6.1
6.3 6.1 6.4 6.6 6.8 6.7 6. 5.7 5.5 5.5 5.8 6. 5.4 6. 6.7 6.3 5.6 5.5
5.5 6.1 5.8 5. 5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3
6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7 6. 6.9 5.6 7.7 6.3 6.7 7.2
6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7 6.3 6.4 6. 6.9 6.7 6.9 5.8 6.8
6.7 6.7 6.3 6.5 6.2 5.9] [3.5 3. 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 3.7 3.4 3. 3. 4. 4.4 3.9 3.5
3.8 3.8 3.4 3.7 3.6 3.3 3.4 3. 3.4 3.5 3.4 3.2 3.1 3.4 4.1 4.2 3.1 3.2
3.5 3.6 3. 3.4 3.5 2.3 3.2 3.5 3.8 3. 3.8 3.2 3.7 3.3 3.2 3.2 3.1 2.3
2.8 2.8 3.3 2.4 2.9 2.7 2. 3. 2.2 2.9 2.9 3.1 3. 2.7 2.2 2.5 3.2 2.8
2.5 2.8 2.9 3. 2.8 3. 2.9 2.6 2.4 2.4 2.7 2.7 3. 3.4 3.1 2.3 3. 2.5
2.6 3. 2.6 2.3 2.7 3. 2.9 2.9 2.5 2.8 3.3 2.7 3. 2.9 3. 3. 2.5 2.9
2.5 3.6 3.2 2.7 3. 2.5 2.8 3.2 3. 3.8 2.6 2.2 3.2 2.8 2.8 2.7 3.3 3.2
2.8 3. 2.8 3. 2.8 3.8 2.8 2.8 2.6 3. 3.4 3.1 3. 3.1 3.1 3.1 2.7 3.2
3.3 3. 2.5 3. 3.4 3. ]
sns.scatterplot(x=length,y=width)
*** Dataframe dan Nama Kolomnya ***
sns.scatterplot(x=iris['sepal_length'],y=iris['sepal_width'])
sns.scatterplot(x='sepal_length',y='sepal_width',data=iris)
*** Langsung Menggunakan Dataframenya ***
sns.boxplot(data=iris)
YUK BELAJAR BUAT PLOTNYA
- Scatter Plot
- Distribution Plot
- Count Plot
- Box Plot
- Heatmap (Benda)
*** SCATTER PLOT ***
#sns.scatterplot(x,y,data) xnya apa, y nya apa dan datanya apa jika pakai dataframesns.scatterplot(x='sepal_length',y='sepal_width',data=iris)
#Jika Menggunakan Matplotlib
plt.scatter(x='sepal_length',y='sepal_width',data=iris)
#Seaborn bisa menambahkan hue, Membagi warnanya berdasarkan sesuatu umumnya berdasarkan data kategorik
sns.scatterplot(x='sepal_length',y='sepal_width',data=iris,hue='species')
#Argumen pallete untuk mengasih warna
sns.scatterplot(x='sepal_length',y='sepal_width',data=iris,hue='species',palette='Accent_r')
#Bisa bikin scatterplot dengan ada garis regresinya
sns.regplot(x='sepal_length',y='sepal_width',data=iris)
sns.regplot(x='petal_length',y='petal_width',data=iris)
#bisa mencari nilai korelasinya yaitu dengan .corr
iris['petal_length'].corr(iris['petal_width'])0.962865431402796
*** DISTRIBUTION PLOT ***
sns.distplot(iris['petal_length'])F:\kim\anaconda3\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)

sns.distplot(iris['sepal_width'])F:\kim\anaconda3\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)

iris['sepal_width'].skew() #Distribusi Normal skewness nya 00.31896566471359966
iris['petal_length'].skew()-0.27488417975101276
sns.histplot(iris['sepal_width'])
sns.histplot(iris['petal_width'])
#Mencari kumulatif dari sepal width
sns.histplot(iris['sepal_width'],cumulative=True)
*** COUNT PLOT ***
#Ganti data dengan data bawaan seaborn juga yaitu tips
tips=sns.load_dataset('tips')
tips.head(10)| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
| 5 | 25.29 | 4.71 | Male | No | Sun | Dinner | 4 |
| 6 | 8.77 | 2.00 | Male | No | Sun | Dinner | 2 |
| 7 | 26.88 | 3.12 | Male | No | Sun | Dinner | 4 |
| 8 | 15.04 | 1.96 | Male | No | Sun | Dinner | 2 |
| 9 | 14.78 | 3.23 | Male | No | Sun | Dinner | 2 |
sns.countplot(x='day',data=tips)
sns.countplot(x='sex',data=tips,palette='Accent')
sns.countplot(x='day',data=tips,hue='sex',palette='Blues')
#Jika ingin mengammbar dalam sumbu vertikal ya y=
sns.countplot(x='sex',data=tips,hue='smoker')
sns.countplot(y='sex',data=tips,hue='smoker')
*** BOX PLOT ***
sns.boxplot(x='sepal_width',data=iris)
#Yang ditengah itu median, yang masih di kotakan berwarna sebelah kiri itu Q1 dan sebelah kanan itu Q3
#untuk jarak kotak Biru ke garis verticalnya itu adalah 1.5 dikali (Q3-Q1)
#Dan yang diluar dari garis vertical dianggap sebagai outlier
sns.boxplot(x='petal_width',data=iris)
#Kenapa jaraknya garisnya kecil? karena itu adalah nilai terkecil dan tidak ada lagi yang makin ke kiri
*** HEATMAP ***
iris.drop('species', axis=1, inplace=True)iris.corr()| sepal_length | sepal_width | petal_length | petal_width | |
|---|---|---|---|---|
| sepal_length | 1.000000 | -0.117570 | 0.871754 | 0.817941 |
| sepal_width | -0.117570 | 1.000000 | -0.428440 | -0.366126 |
| petal_length | 0.871754 | -0.428440 | 1.000000 | 0.962865 |
| petal_width | 0.817941 | -0.366126 | 0.962865 | 1.000000 |
sns.heatmap(iris.corr())
sns.heatmap(iris.corr(),cmap='YlGnBu')
sns.heatmap(iris.corr(),cmap='YlGnBu')
plt.xticks(rotation=45)
#Korelasi iris nya harus disimpan ke variabel baru agar code di bawah tidak error
korelasi_iris = iris.corr()sns.heatmap(korelasi_iris[(korelasi_iris >= 0.5) | (korelasi_iris <= -0.5)], annot = True, cmap = 'Blues', linewidth = 1, linecolor = 'black')