#----------------#
#   Aufgabe 1    #
#----------------#

?read.table()

# Daten nach R importieren: den Datei-Pfad  "C:/..."  unten anpassen:
# ACHTUNG: Es muss so ein Schraegstrich / sein, nicht so \ einer, im 
# Windows-Dateiexplorer wird \ benutzt:

dax = read.table("C:/Users/detlef/HSRM/Vorlesungen/WS201819/Oekonometrie/DAXwithComp.txt", header=TRUE, sep=";" )

# alles mal anschauen:
dax

# nur die ersten oder letzten 6 Zeilen:
head(dax)
tail(dax)
# 20 statt 6 Zeilen:
head(dax,20)
tail(dax,20)

# kompakte Uebersicht der numerischen Daten:
summary(dax)

# Informationen ueber die Objekt/Daten-Struktur: 
str(dax)
mode(dax)
class(dax)

# eventuelle Namen von Spalten oder Zeilen:
names(dax)
dax[,4]               # die 4.Spalte
dax$BAS.DE            # ist dasselbe
dax$BAS               # ist dasselbe


# jetzt noch die plots:

plot(dax$GDAX)
plot(dax$GDAX,type="l")
plot(dax$BAS,type="l")

# oder:
gdax = dax$GDAX
bas = dax$BAS
plot(gdax,type="l")
plot(bas,type="l")




#----------------#
#   Aufgabe 2    #
#----------------#


-------
# 2a) #
-------

# Importieren der Daten nach R:

spx = read.table("C:/Users/detlef/HSRM/Vorlesungen/WS201819/Oekonometrie/SPX.txt",header=TRUE,sep=";")

spx                # quite large..

head(spx)
tail(spx)
names(spx)

# technical information:
str(spx)           # ist vom Daten-Typ "data.frame"
class(spx)
mode(spx)

# extract columns:
days = spx$Date
index = spx$Adj.Close

plot(index)
plot(log(index))

# versuchen wir, die tatsaechlichen Zeiten auf die 
# x-Achse zu bekommen:

plot(days,index)   # takes some time until we get something 
                   # which is not that what we want..

str(days)          # ist "Factor", muesste vielleicht sowas 
                   # wie "Date" sein..
class(days)
mode(days)
str(index)
class(index)
mode(index)



#-----------------------------------------------------------
# in the following we take a closer look to date-formatting,
# see also the 4 pdf-pages "Date Formatting in R" auf der 
# Vorlesungs-homepage: 

days = as.Date(days)                         # doesn't work
days = as.Date(days,format="%d-%m-%y")

str(days)          # ok, ist jetzt "Date"
class(days)
mode(days)
head(days)
tail(days)

plot(days,index)           # technically we have Date format now,
                           # but 1950 has turned to 2050..

# we fix this by hand:
# dates can be added and subtracted, so let's try the following:

days[1]
startdate = as.Date("1950-01-03")
startdate
wrongstartdate = days[1]
wrongstartdate

days[1] - wrongstartdate + startdate
days[2] - wrongstartdate + startdate

# ok, that seem to work.  

# we have to correct only entries with year >= 2050:
# extract the 4-digit year from the date:

years = format(days,"%Y")
years
head(years)
tail(years)
length(years)

# let's try this:

days = ifelse( years>=2050, as.Date( days - wrongstartdate + startdate ), days)
head(days)

# internally, date-values are given by integers equal to the number of 
# days since January 1st 1970, with negative numbers for earlier dates. 
# days now is represented by these integers. Let's try to see the actual 
# dates again:

class(days)
days = as.Date(days)   # does not work
days2 = format(days, format="%d-%m-%Y")
days2                  # also does not work..

refDate = as.Date("1970-01-01")
refDate
class(refDate)

days3 = refDate + days

head(days3)
tail(days3)        # ok, finally we've made it...
                   # eventually there is a more quick solution..
days = days3

# End of we take a closer look to date-formating.
#-----------------------------------------------------------



# now we should get a nice plot:

plot(days,index)
plot(days,index,type="l")
plot(days,log(index))              # alright, looks all good now
plot(days,log(index),type="l")



-------
# 2b) #
-------

# in order to do the regression, we take the logarithm to obtain:
#
# log(SP500_t) = log(S_0) + r*(t-t_0)
#
# thus we can do a simple linear regression with 1 regressor being 
# the vector x = t_k - t_0 and y = log(SP500_{t_k}):

logindex = log(index)
times = (days-days[1])/365.25        # t-t_0 in year-fraction
head(times)
tail(times)                 # looks good, 65, almost 66 years.

# the fact that we calculate t-t_0 in year-fraction means that 
# r will have the meaning of a yearly growth rate:

# now the actual regression, just 1 line of code:

res = lm(logindex ~ times)
res

# beta0 = log(S_0) and beta1 = r:

res$coeff                   
r = res$coeff[2]
S0 = exp(res$coeff[1])
r                         # a growth rate of about 7% per year
S0                        # comparable to spx[1,]
spx[1,]

# let's look at the fit:
plot(times,logindex)
points(times,res$fit,col="red")

plot(times,index)
points(times,exp(res$fit),col="red")




-------
# 2c) #
-------

summary(res)

# Vermutung: Std.Error ist die Groesse
#
# sqrt( hat(s^2)* (X^T*X)^(-1)_{j,j} )

n = length(times)
n
p = 2                     # number of regressors, including constant

hat_s_squared = 1/(n-p) * sum(res$residuals^2) 

X = cbind(rep(1,n),times)            # Regressoren mit Konstante
XTXinv = solve(t(X)%*%X)             # die Matrix (X^T*X)^{-1}
XTXinv
XTXinv00 = XTXinv[1,1]
XTXinv11 = XTXinv[2,2]

stderr0 = sqrt( hat_s_squared * XTXinv00 )
stderr1 = sqrt( hat_s_squared * XTXinv11 )
stderr0
stderr1
summary(res)              # ok, das passt



-------
# 2d) #
-------

x90 = qt(0.95,df=n-p)
x90
x95 = qt(0.975,df=n-p)
x95
x99 = qt(0.995,df=n-p)
x99
 
r = res$coeff[2]
r

rup90 = r + x90*stderr1
rup95 = r + x95*stderr1
rup99 = r + x99*stderr1
rdown90 = r - x90*stderr1
rdown95 = r - x95*stderr1
rdown99 = r - x99*stderr1

confint90 = c(rdown90,rup90)*100     # in Prozent
confint95 = c(rdown95,rup95)*100
confint99 = c(rdown99,rup99)*100
confint90
confint95
confint99                # quite narrow intervalls, 
                         # due to large n