--------------- # Aufgabe 1 # --------------- ------- # 1a) # ------- # Importieren der Daten nach R: spx = read.table("C:/Users/detlef/HSRM/Vorlesungen/WS201617/Oekonometrie/Data/SPX.txt",header=TRUE,sep=";") spx # quite large.. head(spx) tail(spx) names(spx) # technical information: str(spx) # ist vom Daten-Typ "data.frame" class(spx) mode(spx) # extract columns: days = spx$Date index = spx$Adj.Close plot(index) plot(log(index)) # versuchen wir, die tatsaechlichen Zeiten auf die # x-Achse zu bekommen: plot(days,index) # takes some time until we get something # which is not that what we want.. str(days) # ist "Factor", muesste vielleicht sowas # wie "Date" sein.. class(days) mode(days) str(index) class(index) mode(index) #----------------------------------------------------------- # in the following we take a closer look to date-formatting, # see also the 4 pdf-pages "Date Formatting in R" auf der # Vorlesungs-homepage: days = as.Date(days) # doesn't work days = as.Date(days,format="%d-%m-%y") str(days) # ok, ist jetzt "Date" class(days) mode(days) head(days) tail(days) plot(days,index) # technically we have Date format now, # but 1950 has turned to 2050.. # we fix this by hand: # dates can be added and subtracted, so let's try the following: days[1] startdate = as.Date("1950-01-03") startdate wrongstartdate = days[1] wrongstartdate days[1] - wrongstartdate + startdate days[2] - wrongstartdate + startdate # ok, that seem to work. # we have to correct only entries with year >= 2050: # extract the 4-digit year from the date: years = format(days,"%Y") years head(years) tail(years) length(years) # let's try this: days = ifelse( years>=2050, as.Date( days - wrongstartdate + startdate ), days) head(days) # internally, date-values are given by integers equal to the number of # days since January 1st, 1970 with negative numbers for earlier dates. # days now is represented by these integers. Let's try to see the actual # dates again: class(days) days = as.Date(days) # does not work days2 = format(days, format="%d-%m-%Y") days2 # also does not work.. refDate = as.Date("1970-01-01") refDate class(refDate) days3 = refDate + days head(days3) tail(days3) # ok, finally we've made it... # eventually there is a more quick solution.. days = days3 # End of we take a closer look to date-formating. #----------------------------------------------------------- # now we should get a nice plot: plot(days,index) plot(days,log(index)) # alright, looks all good now ------- # 1b) # ------- # in order to do the regression, we take the logarithm to obtain: # # log(SP500_t) = log(S_0) + r*(t-t_0) # # thus we can do a simple linear regression with 1 regressor being # the vector x = t_k - t_0 and y = log(SP500_{t_k}): logindex = log(index) times = (days-days[1])/365.25 # t-t_0 in year-fraction head(times) tail(times) # looks good, 65, almost 66 years. # the fact that we calculate t-t_0 in year-fraction means that # r will have the meaning of a yearly growth rate: # now the actual regression, just 1 line of code: res = lm(logindex ~ times) res # beta0 = log(S_0) and beta1 = r: res$coeff r = res$coeff[2] S0 = exp(res$coeff[1]) r # a growth rate of about 7% per year S0 # comparable to spx[1,] spx[1,] # let's look at the fit: plot(times,logindex) points(times,res$fit,col="red") plot(times,index) points(times,exp(res$fit),col="red") ------- # 1c) # ------- # Vermutung: Std.Error ist die Groesse # # sqrt( hat(s^2)* (X^T*X)^(-1)_{j,j} ) n = length(times) n p = 2 # number of regrssors, including constant hat_s_squared = 1/(n-p) * sum(res$residuals^2) X = cbind(rep(1,n),times) # Regressoren mit Konstante XTXinv = solve(t(X)%*%X) # die Matrix (X^T*X)^{-1} XTXinv XTXinv00 = XTXinv[1,1] XTXinv11 = XTXinv[2,2] stderr0 = sqrt( hat_s_squared * XTXinv00 ) stderr1 = sqrt( hat_s_squared * XTXinv11 ) stderr0 stderr1 summary(res) # ok, das passt ------- # 1d) # ------- x90 = qt(0.95,df=n-p) x90 x95 = qt(0.975,df=n-p) x95 x99 = qt(0.995,df=n-p) x99 r = res$coeff[2] r rup90 = r + x90*stderr1 rup95 = r + x95*stderr1 rup99 = r + x99*stderr1 rdown90 = r - x90*stderr1 rdown95 = r - x95*stderr1 rdown99 = r - x99*stderr1 confint90 = c(rdown90,rup90)*100 # in Prozent confint95 = c(rdown95,rup95)*100 confint99 = c(rdown99,rup99)*100 confint90 confint95 confint99 # quite narrow intervalls, # due to large n --------------- # Aufgabe 2 # --------------- # 2a) bevdata = read.table("C:/Users/detlef/HSRM/Vorlesungen/WS201617/Oekonometrie/weltbevoelkerung.csv",header=FALSE,sep=";") bevdata bev = bevdata[,2] bev zeiten = bevdata[,1] zeiten = zeiten -1950 zeiten logbev = log(bev) logbev plot(zeiten,bev) # 2b) # Modell (1): res1 = lm(logbev ~ zeiten) beta0 = res1$coef[1] beta1 = res1$coef[2] B0 = exp(beta0) r = beta1 B0 r bevfit1 = B0 * exp(r*zeiten) lines(zeiten,bevfit1,col=2) # Modell (2): res2 = lm(bev ~ zeiten) a0 = res2$coef[1] a1 = res2$coef[2] a0 a1 bevfit2 = res2$fit lines(zeiten,bevfit2,col=3) # Modell (3): res3 = lm(bev ~ zeiten + I(zeiten^2) ) b0 = res3$coef[1] b1 = res3$coef[2] b2 = res3$coef[3] b0 b1 b2 bevfit3 = res3$fit lines(zeiten,bevfit3,col=4) # 2c) # Die Vertrauensintervalle sind gegeben durch # # hat_beta plus/minus stddev * q_alpha # # wobei die Standardabweichung stddev aus den Re- # gressionsresultaten abgelesen werden kann und # bei Vertauenslevel alpha=90% # # q90 = qt(0.95,df=n-p) # # wenn n der Stichprobenumfang und p die Anzahl # der Regressoren ist (mit intercept oder konstantem # Term). # Modell (1): summary(res1) # also: stddev_beta0 = 0.0164246 stddev_beta1 = 0.0004295 n = length(zeiten) n p = 2 q90 = qt(0.95,df=n-p) q90 # damit: beta0up = beta0 + stddev_beta0*q90 beta0down = beta0 - stddev_beta0*q90 beta1up = beta1 + stddev_beta1*q90 beta1down = beta1 - stddev_beta1*q90 confint_B0 = c(exp(beta0down),exp(beta0up)) confint_r = c(beta1down,beta1up) confint_B0 confint_r # Modell (2): summary(res2) # also: stddev_a0 = 0.055615 stddev_a1 = 0.001454 n = length(zeiten) p = 2 q90 = qt(0.95,df=n-p) # damit: a0up = a0 + stddev_a0*q90 a0down = a0 - stddev_a0*q90 a1up = a1 + stddev_a1*q90 a1down = a1 - stddev_a1*q90 confint_a0 = c(a0down,a0up) confint_a1 = c(a1down,a1up) confint_a0 confint_a1 # Modell (3): summary(res3) # also: stddev_b0 = 4.348*10^(-2) stddev_b1 = 3.107*10^(-3) stddev_b2 = 4.608*10^(-5) n = length(zeiten) p = 3 q90 = qt(0.95,df=n-p) # damit: b0up = b0 + stddev_b0*q90 b0down = b0 - stddev_b0*q90 b1up = b1 + stddev_b1*q90 b1down = b1 - stddev_b1*q90 b2up = b2 + stddev_b2*q90 b2down = b2 - stddev_b2*q90 confint_b0 = c(b0down,b0up) confint_b1 = c(b1down,b1up) confint_b2 = c(b2down,b2up) confint_b0 confint_b1 confint_b2 --------------- # Aufgabe 3 # --------------- # 3a) store the Excel-data as csv-file and make sure that numbers in Excel are # formatted properly (ohne Hochkomma oder aehnliches..) bip = read.table("C:/Users/detlef/HSRM/Vorlesungen/WS201617/Oekonometrie/BIP_Deutschland.csv",header=FALSE,sep=";") bip y = bip[,2] y x = bip[,1] x x = x - 1991 x plot(x,y) # 3b) res = lm(y ~ x) betas = res$coeff beta0hat = betas[1] beta1hat = betas[2] beta0hat # = B_{t_0} beta1hat # = r plot(x,y) lines(x,res$fit,col=2) # 3c) Vertrauensintervalle: # # beta0hat +/- q_alpha stddev_beta0 # beta1hat +/- q_alpha stddev_beta1 n = length(x) n p = 2 q90 = qt(95/100,df=n-p) q90 q95 = qt(97.5/100,df=n-p) q95 q99 = qt(99.5/100,df=n-p) q99 summary(res) # also: stddev_beta0 = 18.066 stddev_beta1 = 1.346 beta0up90 = beta0hat + q90*stddev_beta0 beta0up95 = beta0hat + q95*stddev_beta0 beta0up99 = beta0hat + q99*stddev_beta0 beta0down90 = beta0hat - q90*stddev_beta0 beta0down95 = beta0hat - q95*stddev_beta0 beta0down99 = beta0hat - q99*stddev_beta0 confint0_90 = c(beta0down90,beta0up90) confint0_95 = c(beta0down95,beta0up95) confint0_99 = c(beta0down99,beta0up99) confint0_90 confint0_95 confint0_99 beta1up90 = beta1hat + q90*stddev_beta1 beta1up95 = beta1hat + q95*stddev_beta1 beta1up99 = beta1hat + q99*stddev_beta1 beta1down90 = beta1hat - q90*stddev_beta1 beta1down95 = beta1hat - q95*stddev_beta1 beta1down99 = beta1hat - q99*stddev_beta1 confint1_90 = c(beta1down90,beta1up90) confint1_95 = c(beta1down95,beta1up95) confint1_99 = c(beta1down99,beta1up99) confint1_90 confint1_95 confint1_99