I attach the zip file, including all the necessary materials. please do this based on the guideline file. Thank you.

1 answer below »
I attach the zip file, including all the necessary materials. please do this based on the guideline file.
Thank you.
Answered Same DayAug 20, 2021

Answer To: I attach the zip file, including all the necessary materials. please do this based on the guideline...

Manish Kumar answered on Aug 21 2021
141 Votes
## Importing packages
library(tidyverse) # utility functions
#import dataset
HW2 <- read.table("../input/greynode1/assignment 1b.txt", sep = "\t", header = T)
attach(HW2)
HW2
#load pac
kage
library(lubridate)
library(dplyr)
library(assertive)
#PartE
#Make master table for Part E
PartE_master <- HW2
PartE_lookup <- HW2 %>% select(cust_id, months_since_survey)
#Find out the difference between rows (first row for each id default 1)
require(dplyr)
PartE_month_diff <- PartE_lookup %>%
group_by(cust_id) %>%
mutate(month_diff = months_since_survey - lag(months_since_survey, default = months_since_survey[1] - 1))
PartE_month_diff
#Filter data which month difference more than 1
PartE_invalid <- PartE_month_diff %>%
filter(month_diff != 1)
#Find out invalid cust_id
PartE_cust_id_invalid <- unique(PartE_invalid$cust_id)
#filter out all invalid cust_id
PartE_final <- subset(PartE_master, !(cust_id %in% PartE_cust_id_invalid))
#How many cust have this data issue?
length(PartE_cust_id_invalid)
#how many rows removed?
nrow(PartE_master)-nrow(PartE_final)
#How many unique cust left in dataset
length(unique(PartE_final$cust_id))
#How many rows of data left
nrow(PartE_final)
#PartF
#Data frame for cust_id and survey_date
column_F<- PartE_final %>% select(cust_id, survey_date)
column_F
#change survey_date from factor to date format
require(lubridate)
(column_F$survey_date <- mdy(column_F$survey_date))
class(column_F$survey_date)
#counting the number of non missing survey_date values for each cust_id
Part_F_non <- column_F %>%
group_by(cust_id) %>%
summarise(non_na_count = sum(!is.na(survey_date)))
Part_F_non
#invalid cust_id
PartF_invalid <- Part_F_non %>%
filter(non_na_count != 1)
#number of customer having invalid values
PartF_cust_id_invalid <- unique(PartF_invalid$cust_id)
length(PartF_cust_id_invalid)
#final dataset for...
SOLUTION.PDF

Answer To This Question Is Available To Download

Related Questions & Answers

More Questions »

Submit New Assignment

Copy and Paste Your Assignment Here