# Löschen aller Objekte aus dem Workspace
rm(list = ls())
# Setze einen CRAN-Spiegel
options(repos = c(CRAN = "https://cloud.r-project.org/"))
# Installieren der Packages
install.packages("data.table")
##
## There is a binary version available but the source version is later:
## binary source needs_compilation
## data.table 1.14.10 1.15.4 TRUE
## installing the source package 'data.table'
install.packages("survival")
##
## There is a binary version available but the source version is later:
## binary source needs_compilation
## survival 3.5-7 3.7-0 TRUE
## installing the source package 'survival'
install.packages("dplyr")
##
## The downloaded binary packages are in
## /var/folders/7h/41t12qvn21n5dlkhpcg8sbyh0000gp/T//Rtmp7Pr95W/downloaded_packages
install.packages("survey")
##
## There is a binary version available but the source version is later:
## binary source needs_compilation
## survey 4.2-1 4.4-2 TRUE
## installing the source package 'survey'
install.packages("tidyverse")
##
## The downloaded binary packages are in
## /var/folders/7h/41t12qvn21n5dlkhpcg8sbyh0000gp/T//Rtmp7Pr95W/downloaded_packages
install.packages("readstata13")
##
## The downloaded binary packages are in
## /var/folders/7h/41t12qvn21n5dlkhpcg8sbyh0000gp/T//Rtmp7Pr95W/downloaded_packages
install.packages("fastDummies")
##
## The downloaded binary packages are in
## /var/folders/7h/41t12qvn21n5dlkhpcg8sbyh0000gp/T//Rtmp7Pr95W/downloaded_packages
# laden der libraries
library(haven)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(labelled)
library(naniar)
library(naniar)
library(Hmisc)
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(haven)
library(ggplot2)
library(lfe)
## Loading required package: Matrix
library(fastDummies)
## Thank you for using fastDummies!
## To acknowledge our work, please cite the package:
## Kaplan, J. & Schlegel, B. (2023). fastDummies: Fast Creation of Dummy (Binary) Columns and Rows from Categorical Variables. Version 1.7.1. URL: https://github.com/jacobkap/fastDummies, https://jacobkap.github.io/fastDummies/.
library(labelled)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(childhoodmortality)
library(survival)
library(survey)
## Loading required package: grid
##
## Attaching package: 'survey'
## The following object is masked from 'package:Hmisc':
##
## deff
## The following object is masked from 'package:graphics':
##
## dotchart
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ data.table::between() masks dplyr::between()
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ data.table::first() masks dplyr::first()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ data.table::last() masks dplyr::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ lubridate::second() masks data.table::second()
## ✖ Hmisc::src() masks dplyr::src()
## ✖ Hmisc::summarize() masks dplyr::summarize()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
library(ggplot2)
# Daten vorbereiten
# Lade die Daten
file_path <- "/Users/nilsborgmannprivate/Downloads/KEBR8BSV (1)/KEBR8BFL.SAV"
data <- read_sav(file_path)
# Extrahieren der relevanten Variablen und Filtern der Daten
filtered_data <- data %>%
select(CASEID, B3, B7, V008, V106, V107, V024) # Auswahl der relevanten Variablen
# Entfernen der Zeilen mit fehlenden Werten in V107
filtered_data_no_na <- filtered_data %>%
filter(!is.na(V107))
# Anzahl der Individuen im Datensatz nach dem Filtern
num_individuals_after_filter <- nrow(filtered_data_no_na)
print(paste("Anzahl der Individuen nach dem Filtern:", num_individuals_after_filter))
## [1] "Anzahl der Individuen nach dem Filtern: 59508"
# Überblick über die Struktur der Daten
str(filtered_data_no_na)
## tibble [59,508 × 7] (S3: tbl_df/tbl/data.frame)
## $ CASEID: chr [1:59508] " 1 7 2" " 1 10 1" " 1 13 2" " 1 13 2" ...
## ..- attr(*, "label")= chr "Case Identification"
## ..- attr(*, "format.spss")= chr "A15"
## ..- attr(*, "display_width")= int 17
## $ B3 : num [1:59508] 1272 1330 1432 1402 1319 ...
## ..- attr(*, "label")= chr "Date of birth (CMC)"
## ..- attr(*, "format.spss")= chr "F4.0"
## ..- attr(*, "display_width")= int 6
## $ B7 : num [1:59508] NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "label")= chr "Age at death (months, imputed)"
## ..- attr(*, "format.spss")= chr "F3.0"
## ..- attr(*, "display_width")= int 5
## $ V008 : num [1:59508] 1468 1468 1468 1468 1468 ...
## ..- attr(*, "label")= chr "Date of interview (CMC)"
## ..- attr(*, "format.spss")= chr "F4.0"
## ..- attr(*, "display_width")= int 6
## $ V106 : dbl+lbl [1:59508] 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 2...
## ..@ label : chr "Highest educational level"
## ..@ format.spss : chr "F1.0"
## ..@ display_width: int 6
## ..@ labels : Named num [1:4] 0 1 2 3
## .. ..- attr(*, "names")= chr [1:4] "No education" "Primary" "Secondary" "Higher"
## $ V107 : dbl+lbl [1:59508] 4, 8, 4, 4, 4, 4, 4, 8, 8, 4, 4, 4, 3, 3, 3, 8, 8, 4...
## ..@ label : chr "Highest year of education"
## ..@ format.spss : chr "F2.0"
## ..@ display_width: int 6
## ..@ labels : Named num 0
## .. ..- attr(*, "names")= chr "No years completed at level V106"
## $ V024 : dbl+lbl [1:59508] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
## ..@ label : chr "Region"
## ..@ format.spss : chr "F2.0"
## ..@ display_width: int 6
## ..@ labels : Named num [1:47] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..- attr(*, "names")= chr [1:47] "Mombasa" "Kwale" "Kilifi" "Tana River" ...
# Fehlende Werte in den Daten
missing_values <- colSums(is.na(filtered_data_no_na))
print(missing_values)
## CASEID B3 B7 V008 V106 V107 V024
## 0 0 56165 0 0 0 0
#Verteilung der Stichproben auf die Countys (Regionen)
# Anzahl der Individuen im Datensatz
num_individuals <- nrow(filtered_data_no_na)
print(num_individuals)
## [1] 59508
# Überprüfen Sie die Spaltennamen
colnames(filtered_data_no_na)
## [1] "CASEID" "B3" "B7" "V008" "V106" "V107" "V024"
# Annahme: 'filtered_data_no_na' ist dein Dataframe nach dem Filtern der NA-Werte in V107
# Verteilung der Stichprobe auf die Countys (Regionen) anzeigen
county_distribution <- filtered_data_no_na %>%
dplyr::group_by(V024) %>%
dplyr::summarise(Anzahl = dplyr::n())
# Verteilung ausgeben
print(county_distribution)
## # A tibble: 47 × 2
## V024 Anzahl
## <dbl+lbl> <int>
## 1 1 [Mombasa] 1171
## 2 2 [Kwale] 1131
## 3 3 [Kilifi] 1201
## 4 4 [Tana River] 818
## 5 5 [Lamu] 1253
## 6 6 [Taita Taveta] 1040
## 7 7 [Garissa] 217
## 8 8 [Wajir] 228
## 9 9 [Mandera] 182
## 10 10 [Marsabit] 414
## # ℹ 37 more rows
# Erstellen eines Diagramms der Verteilung der Individuen pro County
ggplot(county_distribution, aes(x = as.factor(V024), y = Anzahl)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Verteilung der Individuen pro County",
x = "County",
y = "Anzahl der Individuen") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
# Überprüfen, ob die Summe der Individuen pro County der Stichprobengrösse entspricht
total_count_from_distribution <- sum(county_distribution$Anzahl)
if(total_count_from_distribution == num_individuals_after_filter) {
print("Die Summe der Individuen pro County entspricht der Anzahl der Individuen in der gefilterten Stichprobe.")
} else {
print("Es gibt eine Diskrepanz zwischen der Summe der Individuen pro County und der Anzahl der Individuen in der gefilterten Stichprobe.")
}
## [1] "Die Summe der Individuen pro County entspricht der Anzahl der Individuen in der gefilterten Stichprobe."
print(paste("Summe der Individuen pro County:", total_count_from_distribution))
## [1] "Summe der Individuen pro County: 59508"
print(paste("Anzahl der Individuen in der gefilterten Stichprobe:", num_individuals_after_filter))
## [1] "Anzahl der Individuen in der gefilterten Stichprobe: 59508"
# Deskriptive Statistiken für Bildungsjahre (V107)
descriptive_stats <- filtered_data_no_na %>%
summarise(Mean = mean(V107, na.rm = TRUE),
Median = median(V107, na.rm = TRUE),
SD = sd(V107, na.rm = TRUE),
Min = min(V107, na.rm = TRUE),
Max = max(V107, na.rm = TRUE))
print(descriptive_stats)
## # A tibble: 1 × 5
## Mean Median SD Min Max
## <dbl> <dbl> <dbl> <dbl+lbl> <dbl+lbl>
## 1 5.24 5 2.38 0 [No years completed at level V106] 12
# Verteilung der Bildungsjahre anzeigen
education_years_distribution <- filtered_data_no_na %>%
dplyr::group_by(V107) %>%
dplyr::summarise(Count = dplyr::n()) %>%
dplyr::mutate(Percentage = (Count / sum(Count)) * 100)
# Verteilung ausgeben
print(education_years_distribution)
## # A tibble: 11 × 3
## V107 Count Percentage
## <dbl+lbl> <int> <dbl>
## 1 0 [No years completed at level V106] 596 1.00
## 2 1 2529 4.25
## 3 2 6474 10.9
## 4 3 6101 10.3
## 5 4 12097 20.3
## 6 5 2446 4.11
## 7 6 4352 7.31
## 8 7 7755 13.0
## 9 8 17151 28.8
## 10 9 4 0.00672
## 11 12 3 0.00504
# Verteilung der Bildungsjahre anzeigen
education_years_distribution <- filtered_data_no_na %>%
dplyr::group_by(V107) %>%
dplyr::summarise(Count = dplyr::n()) %>%
dplyr::mutate(Percentage = (Count / sum(Count)) * 100)
# Diagramm der Verteilung der Bildungsjahre
ggplot(education_years_distribution, aes(x = as.factor(V107), y = Count, fill = as.factor(V107))) +
geom_bar(stat = "identity") +
labs(title = "Verteilung der Bildungsjahre",
x = "Anzahl der Bildungsjahre",
y = "Anzahl der Individuen") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
# Verteilung der Anzahl Schuljahre in den verschiedenen Countys berechnen und die Spalte umbenennen
# plyr entladen, falls geladen
if("package:plyr" %in% search()) detach("package:plyr", unload=TRUE)
# Verteilung der Anzahl Schuljahre in den verschiedenen Countys berechnen
county_schooling_distribution <- filtered_data_no_na %>%
group_by(V024) %>%
summarise(Avg_Years_of_Schooling = mean(V107, na.rm = TRUE)) %>%
rename(County = V024)
# Verteilung ausgeben
print(county_schooling_distribution)
## # A tibble: 47 × 2
## County Avg_Years_of_Schooling
## <dbl+lbl> <dbl>
## 1 1 [Mombasa] 5.14
## 2 2 [Kwale] 5.63
## 3 3 [Kilifi] 5.07
## 4 4 [Tana River] 5.47
## 5 5 [Lamu] 5.19
## 6 6 [Taita Taveta] 5.25
## 7 7 [Garissa] 4.68
## 8 8 [Wajir] 5.58
## 9 9 [Mandera] 5.32
## 10 10 [Marsabit] 5.76
## # ℹ 37 more rows
# Diagramm der Durchschnittsjahre der Schulbildung pro County erstellen
ggplot(county_schooling_distribution, aes(x = as.factor(County), y = Avg_Years_of_Schooling)) +
geom_bar(stat = "identity", fill = "coral") +
labs(title = "Durchschnittsjahre der Schulbildung pro County",
x = "County",
y = "Durchschnittsjahre der Schulbildung") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
# Erstellen der binären Variable für Kindersterblichkeit
filtered_data_no_na <- filtered_data_no_na %>%
mutate(death_before_5 = ifelse(B7 < 60, 1, 0)) # Kinder, die vor dem 5. Geburtstag starben
# Anzahl und Anteil der Kinder, die vor dem 5. Lebensjahr gestorben sind, pro County
mortality_by_county <- filtered_data_no_na %>%
group_by(V024) %>%
summarise(Count = n(),
Deaths = sum(death_before_5, na.rm = TRUE),
Mortality_Rate = sum(death_before_5, na.rm = TRUE) / n()) %>%
rename(County = V024)
# Ausgabe der Tabelle
print(mortality_by_county)
## # A tibble: 47 × 4
## County Count Deaths Mortality_Rate
## <dbl+lbl> <int> <dbl> <dbl>
## 1 1 [Mombasa] 1171 58 0.0495
## 2 2 [Kwale] 1131 26 0.0230
## 3 3 [Kilifi] 1201 45 0.0375
## 4 4 [Tana River] 818 40 0.0489
## 5 5 [Lamu] 1253 73 0.0583
## 6 6 [Taita Taveta] 1040 42 0.0404
## 7 7 [Garissa] 217 5 0.0230
## 8 8 [Wajir] 228 20 0.0877
## 9 9 [Mandera] 182 5 0.0275
## 10 10 [Marsabit] 414 13 0.0314
## # ℹ 37 more rows
# Diagramm der Kindersterblichkeit pro County erstellen
ggplot(mortality_by_county, aes(x = as.factor(County), y = Mortality_Rate)) +
geom_bar(stat = "identity", fill = "skyblue") +
labs(title = "Kindersterblichkeitsrate pro County",
x = "County",
y = "Kindersterblichkeitsrate") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
# Visualisierung der Kindersterblichkeitsrate pro County
ggplot(mortality_by_county, aes(x = as.factor(County), y = Mortality_Rate)) +
geom_bar(stat = "identity", fill = "salmon") +
labs(title = "Kindersterblichkeitsrate pro County",
x = "County",
y = "Kindersterblichkeitsrate") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
#Vorbereitung des Lineare Regressionsmodell
# Extrahieren der relevanten Variablen und Filtern der Daten
filtered_data <- data %>%
select(CASEID, B3, B7, V008, V106, V107, V024) # Auswahl der relevanten Variablen
# Filtern der Daten (keine NA-Werte in V107)
filtered_data_no_na <- filtered_data %>%
filter(!is.na(V107))
# Erstellen der binären Variable für Kindersterblichkeit
filtered_data_no_na <- filtered_data_no_na %>%
mutate(death_before_5 = ifelse(B7 < 60, 1, 0)) # Kinder, die vor dem 5. Geburtstag starben
# Anzahl der Individuen nach Erstellung der binären Variable
num_individuals_with_death_variable <- nrow(filtered_data_no_na)
print(paste("Anzahl der Individuen nach Erstellung der binären Variable für Kindersterblichkeit:", num_individuals_with_death_variable))
## [1] "Anzahl der Individuen nach Erstellung der binären Variable für Kindersterblichkeit: 59508"
# Sicherstellen, dass V024 als Faktor behandelt wird
filtered_data_no_na <- filtered_data_no_na %>%
mutate(V024 = as.factor(V024))
# Sicherstellen, dass die Struktur korrekt ist
str(filtered_data_no_na)
## tibble [59,508 × 8] (S3: tbl_df/tbl/data.frame)
## $ CASEID : chr [1:59508] " 1 7 2" " 1 10 1" " 1 13 2" " 1 13 2" ...
## ..- attr(*, "label")= chr "Case Identification"
## ..- attr(*, "format.spss")= chr "A15"
## ..- attr(*, "display_width")= int 17
## $ B3 : num [1:59508] 1272 1330 1432 1402 1319 ...
## ..- attr(*, "label")= chr "Date of birth (CMC)"
## ..- attr(*, "format.spss")= chr "F4.0"
## ..- attr(*, "display_width")= int 6
## $ B7 : num [1:59508] NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "label")= chr "Age at death (months, imputed)"
## ..- attr(*, "format.spss")= chr "F3.0"
## ..- attr(*, "display_width")= int 5
## $ V008 : num [1:59508] 1468 1468 1468 1468 1468 ...
## ..- attr(*, "label")= chr "Date of interview (CMC)"
## ..- attr(*, "format.spss")= chr "F4.0"
## ..- attr(*, "display_width")= int 6
## $ V106 : dbl+lbl [1:59508] 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 2...
## ..@ label : chr "Highest educational level"
## ..@ format.spss : chr "F1.0"
## ..@ display_width: int 6
## ..@ labels : Named num [1:4] 0 1 2 3
## .. ..- attr(*, "names")= chr [1:4] "No education" "Primary" "Secondary" "Higher"
## $ V107 : dbl+lbl [1:59508] 4, 8, 4, 4, 4, 4, 4, 8, 8, 4, 4, 4, 3, 3, 3, 8, 8, 4...
## ..@ label : chr "Highest year of education"
## ..@ format.spss : chr "F2.0"
## ..@ display_width: int 6
## ..@ labels : Named num 0
## .. ..- attr(*, "names")= chr "No years completed at level V106"
## $ V024 : Factor w/ 47 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ death_before_5: num [1:59508] NA NA NA NA NA NA NA NA NA NA ...
# Formel für felm-Modell erstellen
formula <- as.formula("death_before_5 ~ V107 | V024")
# Fixed Effects Modell ausführen
model_fe <- felm(formula, data = filtered_data_no_na)
summary(model_fe)
##
## Call:
## felm(formula = formula, data = filtered_data_no_na)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.96279 0.06546 0.10406 0.13658 0.23111
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## V107 -0.001594 0.002407 -0.662 0.508
##
## Residual standard error: 0.3157 on 3295 degrees of freedom
## (56165 observations deleted due to missingness)
## Multiple R-squared(full model): 0.02037 Adjusted R-squared: 0.006392
## Multiple R-squared(proj model): 0.000133 Adjusted R-squared: -0.01413
## F-statistic(full model):1.457 on 47 and 3295 DF, p-value: 0.023
## F-statistic(proj model): 0.4384 on 1 and 3295 DF, p-value: 0.5079
#nun noch für die Variable V106
# Verteilung der Bildungsniveaus
education_level_distribution <- filtered_data_no_na %>%
group_by(V106) %>%
summarise(Count = n()) %>%
mutate(Percentage = (Count / sum(Count)) * 100)
# Ausgabe der Verteilung
print(education_level_distribution)
## # A tibble: 3 × 3
## V106 Count Percentage
## <dbl+lbl> <int> <dbl>
## 1 1 [Primary] 36001 60.5
## 2 2 [Secondary] 16462 27.7
## 3 3 [Higher] 7045 11.8
# Diagramm der Verteilung der Bildungsniveaus
ggplot(education_level_distribution, aes(x = as.factor(V106), y = Count, fill = as.factor(V106))) +
geom_bar(stat = "identity") +
labs(title = "Verteilung der Bildungsniveaus",
x = "Bildungsniveau",
y = "Anzahl der Individuen") +
theme_minimal()
# Verteilung der Bildungsniveaus
education_level_distribution <- filtered_data_no_na %>%
group_by(V106) %>%
summarise(Count = n()) %>%
mutate(Percentage = (Count / sum(Count)) * 100)
# Ausgabe der Verteilung
print(education_level_distribution)
## # A tibble: 3 × 3
## V106 Count Percentage
## <dbl+lbl> <int> <dbl>
## 1 1 [Primary] 36001 60.5
## 2 2 [Secondary] 16462 27.7
## 3 3 [Higher] 7045 11.8
# Überprüfen der Spaltennamen
colnames(filtered_data_no_na)
## [1] "CASEID" "B3" "B7" "V008"
## [5] "V106" "V107" "V024" "death_before_5"
# Verteilung des höchsten Bildungsniveaus in den verschiedenen Countys meiner Stichprobe
county_education_distribution <- filtered_data_no_na %>%
group_by(V024) %>%
summarise(Avg_Education_Level = mean(V106, na.rm = TRUE)) %>%
rename(County = V024)
print(county_education_distribution)
## # A tibble: 47 × 2
## County Avg_Education_Level
## <fct> <dbl>
## 1 1 1.61
## 2 2 1.28
## 3 3 1.27
## 4 4 1.23
## 5 5 1.26
## 6 6 1.50
## 7 7 1.52
## 8 8 1.37
## 9 9 1.31
## 10 10 1.31
## # ℹ 37 more rows
# Diagramm der durchschnittlichen Bildungsniveaus pro County erstellen
ggplot(county_education_distribution, aes(x = as.factor(County), y = Avg_Education_Level)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Durchschnittliches Bildungsniveau pro County",
x = "County",
y = "Durchschnittliches Bildungsniveau") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
# Formel für das Fixed Effects Modell erstellen
formula_education <- as.formula("death_before_5 ~ V106 | V024")
# Fixed Effects Modell ausführen
model_fe_education <- felm(formula_education, data = filtered_data_no_na)
summary(model_fe_education)
##
## Call:
## felm(formula = formula_education, data = filtered_data_no_na)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.00669 0.06449 0.09893 0.13585 0.24110
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## V106 0.038017 0.009415 4.038 5.52e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.315 on 3295 degrees of freedom
## (56165 observations deleted due to missingness)
## Multiple R-squared(full model): 0.02506 Adjusted R-squared: 0.01115
## Multiple R-squared(proj model): 0.004924 Adjusted R-squared: -0.00927
## F-statistic(full model):1.802 on 47 and 3295 DF, p-value: 0.0007003
## F-statistic(proj model): 16.3 on 1 and 3295 DF, p-value: 5.519e-05