ブログ記事 R vs Python:データ解析を比較を読みつつ、Pythonの練習を少し。
$ brew install python3
$ pip3 install --upgrade pip setuptools
$ brew linkapps python3
$ pip3 install pandas
$ pip3 install seaborn
$ pip3 install sklearn
$ wget https://www.dropbox.com/s/b3nv38jjo5dxcl6/nba_2013.csv?dl=0
import pandas
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
= pandas.read_csv("nba_2013.csv")
nba print(nba.shape)
print(nba.head(1))
print(nba.mean())
"ast", "fg", "trb"]])
sns.pairplot(nba[[
plt.show()
= KMeans(n_clusters=5, random_state=1)
kmeans_model = nba._get_numeric_data().dropna(axis=1)
good_columns
kmeans_model.fit(good_columns)= kmeans_model.labels_
labels
= PCA(2)
pca_2 = pca_2.fit_transform(good_columns)
plot_columns =plot_columns[:,0], y=plot_columns[:,1], c=labels)
plt.scatter(x plt.show()
$ python3 test.py