import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression as lr # true model: y = 10 + 20*x + e # normally distributed errors with mean 50, e = np.random.normal(0, 50, 100) x = np.array([]) for i in range(1, 11): x = np.append(x, [i]*10) bx = [20*t for t in x] # numpy array + list = pairwise addition y = [10]*100 + (bx + e) reg = lr() reg.fit(x.reshape(-1, 1), y.reshape(-1, 1)) plt.scatter(x, y) plt.scatter(5, y[np.argwhere(x == 5)].mean(), c='red', s=200) plt.plot(x.reshape(-1, 1), reg.predict(x.reshape(-1, 1)), color='k') plt.show()