### 机器学习预测共享单车

import sklearn
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib
%matplotlib inline
matplotlib.rcParams['font.sans-serif']=['SimHei']
#分割数据集
from sklearn.model_selection import train_test_split
print(data.shape)    #打印几行几列
# 删除不需要数据 ,按照列名删除
df=data.drop(['dteday','casual','registered'],axis=1)
print(df.shape) # (731, 12)
# print(df.info()) #查看是否有空值
# 分隔数据集
dataset = df.as_matrix() # 将pandas转为np.ndarray
train_set,test_set=train_test_split(dataset,test_size=0.1,random_state=37)
#查看他俩的数据,看看是不是 1/9分
print(train_set.shape) # (657, 13)
print(test_set.shape) # (74, 13)
print(dataset[:3])
# 构建随机森林回归模型
from sklearn.ensemble import RandomForestRegressor
rf_regressor =RandomForestRegressor()
#n_estimators:决策树的个数，越大越好，但是会达到一定边界
rf_regressor=RandomForestRegressor(n_estimators=1000,max_depth=10,min_samples_split=10)
rf_regressor.fit(train_set[:,:-1],train_set[:,-1]) # 训练模型
# 使用测试集来评价该回归模型
predict_test_y=rf_regressor.predict(test_set[:,:-1])

import sklearn.metrics as metrics
print('随机森林回归模型的评测结果----->>>')
print('均方误差MSE ：{}'.format(round(metrics.mean_squared_error(predict_test_y,test_set[:,-1])/)))
print('解释方差分  ：{}'.format(round(metrics.explained_variance_score(predict_test_y,test_set[:,-1]),2)))
print('R平方得分   ：{}'.format(round(metrics.r2_score(predict_test_y,test_set[:,-1]),2)))

df=data.drop(['dteday','casual','registered'],axis=1)  #删除某些不要的数据,axis=1是列,axis=0是行
dataset = df.as_matrix()   # 将pandas转为np.ndarray(将表格数据转为矩阵)