-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathR_Input.sh
85 lines (70 loc) · 2.56 KB
/
R_Input.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/bin/bash
# Source the configuration file to load paths and parameters
source "$1"
# Check if the necessary variables are loaded properly
if [ -z "$INFO" ] || [ -z "$cohorts" ]; then
echo "Error: Config file variables not loaded properly."
exit 1
fi
# Check if the cohorts file exists
if [ ! -f "$cohorts" ]; then
echo "Error: Cohorts file not found."
exit 1
fi
# Read the cohorts file into arrays
cohort_names=()
cohort_paths=()
Prefixes=()
Suffixes=()
skip_header=true
while IFS=',' read -r Cohort Pathway Prefix Suffix; do
if $skip_header; then
skip_header=false
continue
fi
cohort_names+=("$Cohort")
cohort_paths+=("$Pathway")
Prefixes+=("$Prefix")
Suffixes+=("$Suffix")
done < "$cohorts"
# Perform pairwise imputation
for i in "${!cohort_names[@]}"; do
input_cohort="${cohort_names[$i]}"
input_pathway="${cohort_paths[$i]}"
# Create a separate output log file for each input cohort
output_log="${input_pathway}/${input_cohort}_INFO_files.txt"
echo -e "Path\tReference\tchr" > "$output_log"
for j in "${!cohort_names[@]}"; do
target_cohort="${cohort_names[$j]}"
target_pathway="${cohort_paths[$j]}"
# Trim any leading/trailing whitespace from variables
input_pathway=$(echo "$input_pathway" | sed 's/[[:space:]]*$//')
target_pathway=$(echo "$target_pathway" | sed 's/[[:space:]]*$//')
input_cohort=$(echo "$input_cohort" | xargs)
target_cohort=$(echo "$target_cohort" | xargs)
# Ensure input cohort and target cohort are not the same
if [ "$input_cohort" != "$target_cohort" ]; then
# Loop through chromosomes 1 to 22
for chr in {1..22}; do
# Define the input and output file paths
Input_file="${input_pathway}/${input_cohort}_chr${chr}_imputed_${target_cohort}"
Output_file="${Input_file}.all.info.txt"
# Check if the input file exists
if [ ! -f "$Input_file".vcf.gz ]; then
echo "Skipping chromosome ${chr}: Input file ${Input_file} does not exist."
continue
fi
# Run the minimac4 command
echo "Processing chromosome ${chr} for ${input_cohort} -> ${target_cohort}"
"$INFO" "${input_pathway}/" "${input_cohort}_chr${chr}_imputed_${target_cohort}"
if [ $? -eq 0 ]; then
echo "Imputation of ${Input_file} is done"
# Append the output file path and target cohort (reference) to the cohort-specific log file
echo -e "${Output_file}\t${target_cohort}\t${chr}" >> "$output_log"
else
echo "Error: Imputation of ${Input_file} failed"
fi
done
fi
done
done