Answer To: 1) Please clearly mention questions (a)or(b) in your solution word and python not to be confused. 2)...
Suravi answered on Oct 04 2021
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas_datareader.data as reader\n",
"import pandas as pd\n",
"import datetime as dt\n",
"import statsmodels.api as sm\n",
"import os\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"end = dt.datetime(2021, 8,31 )\n",
"start = dt.datetime(2018, 9,1 )\n",
"funds= ['QMNIX']\n",
"fundsret = reader.get_data_yahoo(funds,start,end)['Adj Close'].pct_change()\n",
"fundsret.shape\n",
"\n",
"fundsret = fundsret[2:]\n",
"fundsret.head()\n",
"fundsret.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"factors = reader.DataReader('F-F_Research_Data_5_Factors_2x3_Daily','famafrench',start,end)[0]\n",
"factors.shape\n",
"\n",
"factors = factors[1:]\n",
"factors.head()\n",
"factors.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"fundsret.index=factors.index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"merge = pd.merge(fundsret,factors,on='Date',how='left')\n",
"merge.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"merge[['Mkt-RF','SMB','HML','RMW','CMA']]=merge[['Mkt-RF','SMB','HML','RMW','CMA']]/100"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"merge.mean(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"merge.std(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"merge.skew(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"merge.kurtosis(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"merge.min(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"merge.max(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"merge.corr()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pandas.plotting import scatter_matrix\n",
"scatter_matrix(merge, alpha = 0.2, figsize = (6, 6), diagonal = 'kde')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"merge['QMNIX-RF'] = merge.QMNIX - merge.RF\n",
"merge.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Y=merge[['QMNIX-RF']]\n",
"X=merge[['Mkt-RF','SMB','HML','RMW','CMA']]\n",
"\n",
"X_sm=sm.add_constant(X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model1= sm.OLS(Y,X_sm)\n",
"results = model1.fit()\n",
"results.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"model2 = LinearRegression()\n",
"model2.fit(X,Y)\n",
"model2_r2 = model2.score(X,Y)\n",
"print('R^2: {0}'.format(model2_r2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(X)\n",
"df['QMNIX-RF'] = Y\n",
"df.head()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import datasets\n",
"linear_X, linear_y = datasets.make_regression(n_samples=merge.shape[0],\n",
" n_features=merge.shape[1],\n",
" noise=75)\n",
"linear_model = LinearRegression()\n",
"linear_model.fit(linear_X, linear_y)\n",
"linear_r2 = linear_model.score(linear_X, linear_y)\n",
"print('R^2: {0}'.format(linear_r2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def calculate_residuals(model, features, label):\n",
" predictions = model.predict(features)\n",
" df_results = pd.DataFrame({'Actual': label, 'Predicted': predictions })\n",
" df_results['Residuals'] = abs(df_results['Actual']) - abs(df_results['Predicted'])\n",
" \n",
" return df_results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"def linear_assumption(model, features, label):\n",
" print('Assumption 1: Linear Relationship between the Target and the Feature', '\\n')\n",
" print('Checking with a scatter plot of actual vs. predicted.',\n",
"...