import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression as lr

# true model: y = 10 + 20*x + e

# normally distributed errors with mean 50,
e = np.random.normal(0, 50, 100)

x = np.array([])

for i in range(1, 11):
    x = np.append(x, [i]*10)

bx = [20*t for t in x]

# numpy array + list = pairwise addition
y = [10]*100 + (bx + e)

reg = lr()
reg.fit(x.reshape(-1, 1), y.reshape(-1, 1))

plt.scatter(x, y)
plt.scatter(5, y[np.argwhere(x == 5)].mean(), c='red', s=200)
plt.plot(x.reshape(-1, 1), reg.predict(x.reshape(-1, 1)), color='k')
plt.show()