Take the steps used to clean the patients dataset and calculate BMI (see template for the code)


library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
patients_clean <- read.delim("patient-data.txt") %>% 
  tbl_df %>% 
  mutate(Sex = factor(str_trim(Sex))) %>% 
  mutate(Height= as.numeric(str_replace_all(Height,pattern = "cm",""))) %>% 
  mutate(Weight = as.numeric(str_replace_all(Weight,"kg",""))) %>% 
  mutate(BMI = (Weight/(Height/100)^2), Overweight = BMI > 25) %>% 
  mutate(Smokes = str_replace_all(Smokes, "Yes", TRUE)) %>% 
  mutate(Smokes = as.logical(str_replace_all(Smokes, "No", FALSE))) %>% 
  select(ID, Name, Birth,BMI,Smokes,Overweight,Died)
patients_clean
## Source: local data frame [100 x 7]
## 
##       ID      Name      Birth      BMI Smokes Overweight  Died
##    (int)    (fctr)     (fctr)    (dbl)  (lgl)      (lgl) (lgl)
## 1      1 Demetrius 1972-02-11 24.74586   TRUE      FALSE  TRUE
## 2      2   Rosario 1972-07-22 27.26799   TRUE       TRUE  TRUE
## 3      3     Julio 1971-11-23 28.30182  FALSE       TRUE FALSE
## 4      4      Lupe 1971-10-03 27.37403  FALSE       TRUE  TRUE
## 5      5    Lavern 1972-11-23 30.41397  FALSE       TRUE FALSE
## 6      6    Bernie 1972-07-30 26.75882  FALSE       TRUE FALSE
## 7      7    Samuel 1971-11-13 32.73286   TRUE       TRUE FALSE
## 8      8     Clair 1972-03-14 25.21343   TRUE       TRUE FALSE
## 9      9   Shirley 1972-10-10 26.85241  FALSE       TRUE  TRUE
## 10    10     Merle 1972-02-28 27.14491  FALSE       TRUE  TRUE
## ..   ...       ...        ...      ...    ...        ...   ...

Modify the workflow


  candidates <- filter(patients_clean, Smokes & Overweight & !Died) %>% 
  write.table("candidates.txt")
read.delim("patient-data.txt") %>% 
  tbl_df %>% 
  mutate(Sex = factor(str_trim(Sex))) %>% 
  mutate(Height= as.numeric(str_replace_all(Height,pattern = "cm",""))) %>% 
  mutate(Weight = as.numeric(str_replace_all(Weight,"kg",""))) %>% 
  mutate(BMI = (Weight/(Height/100)^2), Overweight = BMI > 25) %>% 
  mutate(Smokes = str_replace_all(Smokes, "Yes", TRUE)) %>% 
  mutate(Smokes = as.logical(str_replace_all(Smokes, "No", FALSE))) %>% 
  select(ID, Name, Birth,BMI,Smokes,Overweight,Died) %>% 
  filter(Smokes & Overweight & !Died) %>% 
  write.csv("candidates.csv")