#' --- #' title: "Extract labels and predictions for the validation dataset" #' authors: "Gaurav Kandoi, Julie A. Dickerson" #' date: "November 13, 2018" #' affiliation: "Iowa State University of Science and Technology" #' license: "CC BY 4.0" #' --- # Load the required libraries library(data.table) library(tidyverse) setwd("../Predictions") # Collect the names of the files containing the predictions. fileNames <- list.files(pattern="Predictions_MMU_.*csv.gz$", full.names=T) # Read the validation dataset. pairsToValidateDF1 <- read.table("pairsToValidate13Nov18.txt.gz", header=F, sep=",", stringsAsFactors = F) %>% unique() colnames(pairsToValidateDF1) <- c("Iso1", "Iso2") # Replicate the validation pairs to remove directionality. pairsToValidateDF <- pairsToValidateDF1 %>% rbind(pairsToValidateDF1 %>% rename(Iso2 = "Iso1", Iso1 = "Iso2")) # Define a function 'readDatFile' which, 1) decompresses the prediction files, 2) reads the prediction files, 3) compresses the prediction files and, 4) returns the predictions for the mRNA isoform pairs in the validation dataset. readDatFile1 <- function(f) { system(paste0("pigz -d -p 24 ", f)) tissue <- fread(tools::file_path_sans_ext(f), header = T, sep = ",", stringsAsFactors = F) system(paste0("pigz -p 24 ", tools::file_path_sans_ext(f))) validatedPairs <- inner_join(pairsToValidateDF, tissue) } # Apply the function 'readDatFile' to all the prediction files and save the results in a list. FNdata.files1 <- lapply(fileNames, readDatFile1) names(FNdata.files1) <- sapply(fileNames, function(x) gsub(pattern = "_Cases.csv.gz", replacement = "", basename((x)))) # Combine the results from every file into a single dataframe. functionalNetworks1 <- bind_rows(FNdata.files1, .id = "id") %>% unique() # Extract the tissue level predictions and discard other fields. validated <- functionalNetworks %>% dplyr::select(-1, -4) %>% dplyr::select(1,2,starts_with("pred"), 38) %>% mutate(Sum = base::rowSums(.[,-c(1,2,21)])) # Combine the current positive and negative validation datasets. TrueLabels <- read.table("MMU_TrueLabels_Validation_12Nov18.txt.gz", sep = ",", header = F, stringsAsFactors = F) # Combine the predictions and current labels for the validation dataset. Predictions <- left_join(validated, TrueLabels, by = c("Iso1" = "V1", "Iso2" = "V2")) # Write the results to file. write.table(Predictions, "../Data/validatedTPPairsLabelled13Nov18.txt", sep = ",", col.names = T, row.names = F, quote = F)