from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')
We took the data from the yahoo finance database.
We have replaced the missed worst days and the missed best days with a return equal to 0.
import matplotlib.pyplot as plt
import numpy as np, pandas as pd
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
plt.style.use('bmh')
from scipy.stats import skew
df1 = pd.read_csv('SPY.csv')
df1.set_index('Date', inplace = True)
df1.index = pd.to_datetime(df1.index)
df = df1.copy()
log_rets = (np.log(df['Adj Close']) - np.log(df['Adj Close'].shift(1)))[1:]
length_period = len(df.resample('Y')) - 1
cagr = ((df['Adj Close'][-1] - df['Adj Close'][0]) / df['Adj Close'][0])**(1/length_period) - 1
three_moments = pd.DataFrame([np.mean(log_rets), np.std(log_rets), skew(log_rets)], index = ['Mean', 'Std', 'Skewness']\
, columns = ['1993-2020 SPY daily returns stats'])
pd.DataFrame(dict(CAGR = cagr), index = ['1993-2020'])
three_moments
The effects of the negative skewness can be further investigated by wondering:
1) What would our final capital have been if we had excluded the top $n$ worst days?
2) What would our final capital have been if we had excluded the top $n$ best days?
def growth_without_worstdays(df1):
wealths = []
total_log_ret = np.sum(df1)
final_wealth = np.exp(total_log_ret)*1000
print('Growth of 1000$ from 1993 to 2020: {}$'.format((np.exp(total_log_ret)*1000).round(2)))
print()
df = df1.copy()
wealths.append(final_wealth)
missed_worst_days = [1,5,15,25]
xticks_ = ['Top ' + str(i) for i in [1,5,15,25]]
for n in missed_worst_days:
values_to_convert_in_zero = df.sort_values().values[:n]
log_rets_minus_n_worstdays = df.where(df > values_to_convert_in_zero.max(),0.)
total_log_ret = np.sum(log_rets_minus_n_worstdays)
final_wealth = np.exp(total_log_ret)*1000
wealths.append(final_wealth)
print('Growth of 1000$ without {} top worst days: {}$'.format(n, final_wealth.round(2)))
print()
fig, ax = plt.subplots(figsize=(10,8))
xpos = [i+1 for i,_ in enumerate(missed_worst_days)]
sns.barplot([0]+xpos, wealths, ax = ax)
for i,t in zip(ax.patches,wealths):
# get_x pulls left or right; get_height pushes up or down
ax.text(i.get_x()+.2, i.get_height()+10, \
str(int(t))+' $', fontsize=15,
color='yellow', backgroundcolor = 'black' )
plt.xticks([0]+xpos, ['Total period'] + xticks_, fontsize = 'large')
plt.xlabel('Missed worst days', fontsize = 'xx-large')
plt.ylabel('Growth of 1000$', fontsize = 'xx-large')
plt.title('Performance of SPY 1993-2020 ',fontsize = 'x-large',color = 'yellow',backgroundcolor = 'black')
return None
growth_without_worstdays(log_rets)
def growth_without_bestdays(df1):
wealths = []
total_log_ret = np.sum(df1)
final_wealth = np.exp(total_log_ret)*1000
print('Growth of 1000$ from 1993 to 2020 {}$'.format((np.exp(total_log_ret)*1000).round(2)))
print()
df = df1.copy()
wealths.append(final_wealth)
missed_best_days = [1,5,15,25]
xticks_ = ['Top ' + str(i) for i in [1,5,15,25]]
for n in missed_best_days:
values_to_convert_in_zero = df.sort_values(ascending = False).values[:n]
log_rets_plus_n_bestdays = df.where(df < values_to_convert_in_zero.min(),0.)
total_log_ret = np.sum(log_rets_plus_n_bestdays)
final_wealth = np.exp(total_log_ret)*1000
wealths.append(final_wealth)
print('Growth of 1000$ without {} best days: {}$'.format(n, final_wealth.round(2)))
print()
fig, ax = plt.subplots(figsize=(10,8))
xpos = [i+1 for i,_ in enumerate(missed_best_days)]
sns.barplot([0]+xpos, wealths, ax = ax)
for i,t in zip(ax.patches,wealths):
# get_x pulls left or right; get_height pushes up or down
ax.text(i.get_x()+.2, i.get_height()+10, \
str(int(t))+' $', fontsize=15,
color='yellow', backgroundcolor = 'black' )
plt.xticks([0]+xpos, ['Total period'] + xticks_, fontsize = 'large')
plt.xlabel('Missed best days', fontsize = 'xx-large')
plt.ylabel('Growth of 1000$', fontsize = 'xx-large')
plt.title('Performance of SPY, 1993-2020 ',fontsize = 'x-large', color = 'yellow',backgroundcolor = 'black')
return None
growth_without_bestdays(log_rets)
Let's suppose that we have a risk-neutral prototype investor under a single bet scenario.
In other words, under a single gamble scenario, such investor is indifferent to risk to lose \$100 in order to win \$100 if the probability is the same (0.5).
And let's suppose that we have the following gamble: with probability 0.5 our investor will obtain the final capital without the top worst $n$ days or with the same probability he will obtain the final capital without the top best $n$ days.
The graphs above show that our prototype investor would refuse the gamble for $n$ equal to 1.
Indeed $12548$ (the final capital without the gamble) $> 0.5 \times 10957 + 0.5 \times 13918$.
But for $n$ >1 our investor will take the gamble because the gain he would obtain if he missed the $n$ worst days would exceed the money he would lose if he missed the $n$ best days (and this fact is increasingly evident as $n$ increases).
This is a natural consequence of the negative skewness of historical daily returns of the S&P 500 index.