-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathAppendUniqueID.ecl
172 lines (154 loc) · 5.65 KB
/
AppendUniqueID.ecl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
/**
* Append a unique numeric record identifier to a dataset. The new dataset
* is returned.
*
* @param inFile The dataset to process; REQUIRED.
* @param startingID The minimum record identifier to assign;
* OPTIONAL; defaults to 1.
* @param strictlySequential If TRUE, the assigned IDs will be in numerically
* increasing order; if FALSE, the assigned IDs
* will be unique but there may be gaps,
* depending on how the records are distributed;
* OPTIONAL; defaults to FALSE.
* @param idAttr The name of the unique ID attribute to add to
* the dataset; this is a bare keyword, not a
* string; OPTIONAL, defaults to uid
*
* @return A new dataset with the appended unique identifer
* field.
*
* Origin: https://github.com/hpccsystems-solutions-lab/Useful_ECL
*/
EXPORT AppendUniqueID(inFile, startingID = 1, strictlySequential = FALSE, idAttr = 'uid') := FUNCTIONMACRO
IMPORT Std;
// Definition of %result% layout
#UNIQUENAME(ResultRec);
LOCAL %ResultRec% := RECORD
UNSIGNED6 idAttr;
RECORDOF(inFile);
END;
//--------------------------------------------------------------------------
// Assign sequential values for only a few records
//--------------------------------------------------------------------------
#UNIQUENAME(seqFew);
LOCAL %seqFew% := PROJECT
(
inFile,
TRANSFORM
(
%ResultRec%,
SELF.idAttr := startingID + COUNTER - 1,
SELF := LEFT
)
);
//--------------------------------------------------------------------------
// Assign sequential values for many records
//--------------------------------------------------------------------------
#UNIQUENAME(nodeNum);
#UNIQUENAME(nodeCount);
#UNIQUENAME(nodeOffset);
#UNIQUENAME(ResultRecWithNode);
LOCAL %ResultRecWithNode% := RECORD
%ResultRec%;
UNSIGNED4 %nodeNum%;
END;
// Append zero-value RecID value and the Thor node number to each record
#UNIQUENAME(inFileWithNodeNum);
LOCAL %inFileWithNodeNum% := PROJECT
(
inFile,
TRANSFORM
(
%ResultRecWithNode%,
SELF.idAttr := 0,
SELF.%nodeNum% := Std.System.Thorlib.Node(),
SELF := LEFT
),
LOCAL
);
// Count the number of records on each node
#UNIQUENAME(recCountsPerNode);
LOCAL %recCountsPerNode% := TABLE
(
%inFileWithNodeNum%,
{
UNSIGNED4 %nodeNum% := Std.System.Thorlib.Node(),
UNSIGNED6 %nodeCount% := COUNT(GROUP),
UNSIGNED6 %nodeOffset% := 0
},
LOCAL
);
// Compute the node offset, which is the minimum RecID value for any
// record on that node
#UNIQUENAME(nodeOffsets);
LOCAL %nodeOffsets% := ITERATE
(
%recCountsPerNode%,
TRANSFORM
(
RECORDOF(%recCountsPerNode%),
SELF.%nodeOffset% := IF(COUNTER = 1, startingID, LEFT.%nodeOffset% + LEFT.%nodeCount%),
SELF:=RIGHT
)
);
// Append the node offset to the data
#UNIQUENAME(inFileWithOffsets);
LOCAL %inFileWithOffsets% := JOIN
(
%inFileWithNodeNum%,
%nodeOffsets%,
LEFT.%nodeNum% = RIGHT.%nodeNum%,
MANY LOOKUP
);
// Iterate through the records on each node, computing the RecID value
#UNIQUENAME(inFileSequenced);
LOCAL %inFileSequenced% := ITERATE
(
%inFileWithOffsets%,
TRANSFORM
(
RECORDOF(%inFileWithOffsets%),
SELF.idAttr := IF(LEFT.%nodeNum% = RIGHT.%nodeNum%, LEFT.idAttr + 1, RIGHT.%nodeOffset%),
SELF := RIGHT
),
LOCAL
);
// Put the data in its final form
#UNIQUENAME(seqMany);
LOCAL %seqMany% := PROJECT
(
%inFileSequenced%,
TRANSFORM
(
%ResultRec%,
SELF := LEFT
),
LOCAL);
//--------------------------------------------------------------------------
// Assign unique values, not necessarily sequential
//--------------------------------------------------------------------------
#UNIQUENAME(uniq);
LOCAL %uniq% := PROJECT
(
inFile,
TRANSFORM
(
%ResultRec%,
SELF.idAttr := ((COUNTER - 1) * Std.System.Thorlib.Nodes()) + Std.System.Thorlib.Node() + startingID + COUNTER - 1,
SELF := LEFT
),
LOCAL
);
//--------------------------------------------------------------------------
// Create resulting dataset using a method dependent on the strictness
// of the RecID value and the size of the input
//--------------------------------------------------------------------------
#UNIQUENAME(result);
LOCAL %result% := MAP
(
strictlySequential and COUNT(inFile) >= 1000000 => %seqMany%,
strictlySequential => %seqFew%,
%uniq%
);
RETURN %result%;
ENDMACRO;