import matplotlib.pyplot as plt
import numpy as np
# my dataframe is df
# plot for see if there is a linear relation
plt.scatter(col1, col2)
plt.show()
# create training and test data set
rand = np.random.rand(len(df)) < 0.8
train = cdf[rand]
test = cdf[~rand]
# MODEL
from sklearn import linear_model
regr = linear_model.LinearRegression()
train_x = np.asanyarray(train[['col1']])
train_y = np.asanyarray(train[['col2']])
regr.fit (train_x, train_y)
# coefficients
print ('Coefficients: ', regr.coef_)
print ('Intercept: ',regr.intercept_)
# plot output
plt.scatter(train.col1, train.col2)
plt.plot(train_x, regr.coef_[0][0]*train_x + regr.intercept_[0], color='red')
# EVALUATION
from sklearn.metrics import r2_score
test_x = np.asanyarray(test[['col1']])
test_y = np.asanyarray(test[['col2']])
test_y_ = regr.predict(test_x)
print("Mean absolute error: %.2f" % np.mean(np.absolute(test_y_ - test_y)))
print("Residual sum of squares (MSE): %.2f" % np.mean((test_y_ - test_y) ** 2))
print("R2-score: %.2f" % r2_score(test_y_ , test_y) )
Categories
Bash
(3)
BOT
(2)
C#
(1)
Cluster Analysis
(1)
Data Cleaning
(6)
Data Ingestion
(2)
Data Science Specialization
(10)
Data Visualization
(15)
ggplot2
(1)
Hadoop
(1)
Hashnode
(3)
Machine Learning
(5)
MapReduce
(1)
Maps
(1)
Markdown
(7)
Market Basket Analysis
(1)
MATLAB
(1)
Matplotlib
(3)
Numpy
(2)
Octave
(1)
Pandas
(3)
Python
(17)
R
(22)
Regression
(7)
scikit-learn
(1)
Seaborn
(1)
Shell
(3)
Shiny App
(1)
SSIS
(3)
Statistical Inference
(2)
T-SQL
(8)
Unix
(3)
No comments:
Post a Comment