-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathrun_postprocess.sh
executable file
·98 lines (79 loc) · 3.19 KB
/
run_postprocess.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/bin/bash
# exit immediately if a command exits with a non-zero status
set -e
# directories & files
current_dir=`pwd`
data_dir="$current_dir/data"
output_dir="$current_dir/output"
folds_dir="$output_dir/folds"
final_dir="$output_dir/final"
dr_filename="domain_range.txt"
data_filename="data.txt"
entities_filename="entities.txt"
entity_full_names_filename="entity_full_names.txt"
entity_full_names_copy_filename="entity_full_names_copy.txt"
relations_filename="relations.txt"
hypotheses_filename="hypotheses.txt"
dr_filepath="$data_dir/$dr_filename"
entities_filepath="$output_dir/$entities_filename"
entity_full_names_filepath="$output_dir/$entity_full_names_filename"
entity_full_names_copy_filepath="$output_dir/$entity_full_names_copy_filename"
relations_filepath="$output_dir/$relations_filename"
hypotheses_filepath="$output_dir/$hypotheses_filename"
dr_copy_filepath="$output_dir/$dr_filename"
data_filepath="$output_dir/$data_filename"
data_copy_filepath="$folds_dir/$data_filename"
# variables
num_folds=5
# do post-processing
python3 postprocess_data.py
# process entity_full_names.txt
cp $entity_full_names_filepath $entity_full_names_copy_filepath
sed -i -E 's|(.+:.+:.+)(:)(.+)|\1#SEMICOLON#\3|g' $entity_full_names_copy_filepath
sed -i -E 's| |#SPACE#|g' $entity_full_names_copy_filepath
sed -i -E 's|,|#COMMA#|g' $entity_full_names_copy_filepath
# process entities.txt
sed -i -E 's|:|#SEMICOLON#|g' $entities_filepath
sed -i -E 's| |#SPACE#|g' $entities_filepath
sed -i -E 's|,|#COMMA#|g' $entities_filepath
# process data.txt
cp $data_filepath $data_copy_filepath
sed -i -E 's|:|#SEMICOLON#|g' $data_copy_filepath
sed -i -E 's| |#SPACE#|g' $data_copy_filepath
sed -i -E 's|,|#COMMA#|g' $data_copy_filepath
# process domain_range.txt
cp $dr_filepath $dr_copy_filepath
sed -i '1d' $dr_copy_filepath # remove first line
sed -i -E 's|:|#SEMICOLON#|g' $dr_copy_filepath
sed -i -E 's| |#SPACE#|g' $dr_copy_filepath
sed -i -E 's|,|#COMMA#|g' $dr_copy_filepath
# generate relations.txt file
cut -f 1 $dr_copy_filepath > $relations_filepath
# copy hypotheses to final directory as text.txt file
cp $hypotheses_filepath "$final_dir/test.txt"
# process folds
for ((i=0; i<num_folds; i++)); do
fold_i="fold_$i"
copy_to="$folds_dir/$fold_i"
# process the original files
find $copy_to -type f -exec sed -i -E 's|:|#SEMICOLON#|g' {} \;
find $copy_to -type f -exec sed -i -E 's| |#SPACE#|g' {} \;
find $copy_to -type f -exec sed -i -E 's|,|#COMMA#|g' {} \;
# copy the common files into each fold directories
cp $entities_filepath "$copy_to"
cp $entity_full_names_copy_filepath "$copy_to/$entity_full_names_filename"
cp $dr_copy_filepath "$copy_to"
cp $relations_filepath "$copy_to"
done
# process final data
find $final_dir -type f -exec sed -i -E 's|:|#SEMICOLON#|g' {} \;
find $final_dir -type f -exec sed -i -E 's| |#SPACE#|g' {} \;
find $final_dir -type f -exec sed -i -E 's|,|#COMMA#|g' {} \;
# copy the common files into each fold directories
cp $entities_filepath "$final_dir"
cp $entity_full_names_copy_filepath "$final_dir/$entity_full_names_filename"
cp $dr_copy_filepath "$final_dir"
cp $relations_filepath "$final_dir"
# remove temporary files
rm $dr_copy_filepath
rm $entity_full_names_copy_filepath