机器学习之回归模型

python
import numpy as np
import pandas as pd



```python
x = np.random.randn(100,4)
#为因变量添加截距项系数1
intercept = np.ones(100).reshape(100,1)
x = np.concatenate([intercept,x], axis = 1)
X = np.matrix(x)
X
X.shape
(100, 5)
W = np.array([1,2,3,4,5]).reshape(5,1)
y = np.dot(X, W)
#为生成的自变量添加噪声
noise_scale = 0.1
y = y + noise_scale * np.random.randn(100,1)
y = np.matrix(y)
y
y.shape
(100, 1)
#最小二乘法求解线性回归的模型参数w
w = np.linalg.inv((X.T * X)) * X.T * y
w
matrix([[0.99010829],
        [2.00487925],
        [3.01402089],
        [3.99920209],
        [4.98852767]])
#求解模型拟合的平均标准误差MSE
mse = np.sum(np.power((X * w - y),2)) / len(y)
mse
0.00938650299583853
#求解模型拟合的分数R2
R2 = 1 - mse / np.var(y)
R2
0.9998417334694403
class LR_LS():
    def __init__(self):
        self.w = None
        self.R2 = None
        self.y_pred = None
    def fit(self, X, y):
        # 最小二乘法矩阵求解模型系数
        self.w = np.linalg.inv((X.T * X)) * X.T * y
        # 模型预测值与样本之间的平均标准差
        mse = np.sum(np.power((X * w - y),2)) / len(y)
        self.R2 = 1 - mse / np.var(y)
        return w
        return R2
    def predict(self, X):
        # 用模型预测新新的值
        self.y_pred = X * w 
        return y_pred


if __name__ == "__main__":
    lr_ls = LR_LS()
    lr_ls.fit(X,y)
    print("估计的参数值:%s" %(lr_ls.w))
    print("模型拟合的分数:%s" %(lr_ls.R2))
    x_test = np.array([1,0.1,0.2,0.3,0.4]).reshape(1,-1)
    print("预测值为: %s" %(lr_ls.predict(x_test)))
估计的参数值:[[0.99010829]
 [2.00487925]
 [3.01402089]
 [3.99920209]
 [4.98852767]]
模型拟合的分数:0.9998417334694403



---------------------------------------------------------------------------

NameError                                 Traceback (most recent call last)

<ipython-input-65-f3e4c7a9a3f8> in <module>
     24     print("模型拟合的分数:%s" %(lr_ls.R2))
     25     x_test = np.array([1,0.1,0.2,0.3,0.4]).reshape(1,-1)
---> 26     print("预测值为: %s" %(lr_ls.predict(x_test)))


<ipython-input-65-f3e4c7a9a3f8> in predict(self, X)
     15         # 用模型预测新新的值
     16         self.y_pred = X * w
---> 17         return y_pred
     18 
     19 


NameError: name 'y_pred' is not defined