import pandas as pd
import numpy as np
import sklearn
from sklearn import linear_model
from sklearn.utils import shuffle
# data =pd.read_csv("student-mat.csv",sep=";")
data=data[["G1","G2","G3","studytime","failures","absences","freetime","age"]]
predict="G3"
x= np.array(data.drop([predict],1))
y= np.array(data[predict])
x_train,x_test,y_train,y_test = sklearn.model_selection.train_test_split(x,y,test_size= .2)
linear =linear_model.LinearRegression()
linear.fit(x_train, y_train)
linear.score(x_test,y_test)
acc= linear.score(x_test,y_test)
prediction= linear.predict(x_test)
for i in range (len(prediction)):
print(prediction[i],x_test[i],y_test[i])
print(acc)
import pandas as pd
import numpy as np
np.random.seed(42)
# Matplotlib and seaborn for plotting
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
matplotlib.rcParams['font.size'] = 16
matplotlib.rcParams['figure.figsize'] = (9, 9)
import seaborn as sns
from IPython.core.pylabtools import figsize
# Scipy helper functions
from scipy.stats import percentileofscore
from scipy import stats
# Standard ML Models for comparison
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
# Splitting data into training/testing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
# Metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error
# Distributions
import scipy
# Read in class scores
df = pd.read_csv("student-mat.csv",sep=";")
# Filter out grades that were 0
df = df[~df['G3'].isin([0, 1])]
df = df.rename(columns={'G3': 'Grade'})
df.head()
df.shape
df.describe()
# Print the value counts for categorical columns
for col in df.columns:
if df[col].dtype == 'object':
print('\nColumn Name:', col,)
print(df[col].value_counts())
df['Grade'].describe()
df['Grade'].value_counts()
# Bar plot of grades
plt.bar(df['Grade'].value_counts().index,
df['Grade'].value_counts().values,
fill = 'navy', edgecolor = 'k', width = 1)
plt.xlabel('Grade'); plt.ylabel('Count'); plt.title('Distribution of Final Grades');
plt.xticks(list(range(5, 20)));