Take the steps used to clean the patients dataset and calculate BMI (see template for the code)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
patients_clean <- read.delim("patient-data.txt") %>%
tbl_df %>%
mutate(Sex = factor(str_trim(Sex))) %>%
mutate(Height= as.numeric(str_replace_all(Height,pattern = "cm",""))) %>%
mutate(Weight = as.numeric(str_replace_all(Weight,"kg",""))) %>%
mutate(BMI = (Weight/(Height/100)^2), Overweight = BMI > 25) %>%
mutate(Smokes = str_replace_all(Smokes, "Yes", TRUE)) %>%
mutate(Smokes = as.logical(str_replace_all(Smokes, "No", FALSE))) %>%
select(ID, Name, Birth,BMI,Smokes,Overweight,Died)
patients_clean
## Source: local data frame [100 x 7]
##
## ID Name Birth BMI Smokes Overweight Died
## (int) (fctr) (fctr) (dbl) (lgl) (lgl) (lgl)
## 1 1 Demetrius 1972-02-11 24.74586 TRUE FALSE TRUE
## 2 2 Rosario 1972-07-22 27.26799 TRUE TRUE TRUE
## 3 3 Julio 1971-11-23 28.30182 FALSE TRUE FALSE
## 4 4 Lupe 1971-10-03 27.37403 FALSE TRUE TRUE
## 5 5 Lavern 1972-11-23 30.41397 FALSE TRUE FALSE
## 6 6 Bernie 1972-07-30 26.75882 FALSE TRUE FALSE
## 7 7 Samuel 1971-11-13 32.73286 TRUE TRUE FALSE
## 8 8 Clair 1972-03-14 25.21343 TRUE TRUE FALSE
## 9 9 Shirley 1972-10-10 26.85241 FALSE TRUE TRUE
## 10 10 Merle 1972-02-28 27.14491 FALSE TRUE TRUE
## .. ... ... ... ... ... ... ...
Modify the workflow
candidates <- filter(patients_clean, Smokes & Overweight & !Died) %>%
write.table("candidates.txt")
read.delim("patient-data.txt") %>%
tbl_df %>%
mutate(Sex = factor(str_trim(Sex))) %>%
mutate(Height= as.numeric(str_replace_all(Height,pattern = "cm",""))) %>%
mutate(Weight = as.numeric(str_replace_all(Weight,"kg",""))) %>%
mutate(BMI = (Weight/(Height/100)^2), Overweight = BMI > 25) %>%
mutate(Smokes = str_replace_all(Smokes, "Yes", TRUE)) %>%
mutate(Smokes = as.logical(str_replace_all(Smokes, "No", FALSE))) %>%
select(ID, Name, Birth,BMI,Smokes,Overweight,Died) %>%
filter(Smokes & Overweight & !Died) %>%
write.csv("candidates.csv")