-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathDataFileUtil.spec
569 lines (472 loc) · 21.9 KB
/
DataFileUtil.spec
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
/*
Contains utilities for saving and retrieving data to and from KBase data
services. Requires Shock 0.9.6+ and Workspace Service 0.4.1+.
Note that some calls may create files or directories in the root of the scratch space (typically
/kb/module/work/tmp). For this reason client programmers should not request that DFU archive from
the root of the scratch space - always create a new directory (e.g. using a UUID for a name or a
standard library temporary directory utility) and add the target files to that directory when
archiving.
*/
module DataFileUtil {
/* A boolean - 0 for false, 1 for true.
@range (0, 1)
*/
typedef int boolean;
/* A handle for a file stored in Shock.
hid - the id of the handle in the Handle Service that references this
shock node
id - the id for the shock node
url - the url of the shock server
type - the type of the handle. This should always be shock.
file_name - the name of the file
remote_md5 - the md5 digest of the file.
*/
typedef structure {
string hid;
string file_name;
string id;
string url;
string type;
string remote_md5;
} Handle;
/* Input for the shock_to_file function.
Required parameters:
shock_id | handle_id - the ID of the Shock node, or the Handle to a shock node.
file_path - the location to save the file output. If this is a
directory, the file will be named as per the filename in Shock.
Optional parameters:
unpack - either null, 'uncompress', or 'unpack'. 'uncompress' will cause
any bzip or gzip files to be uncompressed. 'unpack' will behave the
same way, but it will also unpack tar and zip archive files
(uncompressing gzipped or bzipped archive files if necessary). If
'uncompress' is specified and an archive file is encountered, an
error will be thrown. If the file is an archive, it will be
unbundled into the directory containing the original output file.
Note that if the file name (either as provided by the user or by
Shock) without the a decompression extension (e.g. .gz, .zip or
.tgz -> .tar) points to an existing file and unpack is specified,
that file will be overwritten by the decompressed Shock file.
*/
typedef structure {
string shock_id;
string handle_id;
string file_path;
string unpack;
} ShockToFileParams;
/* Output from the shock_to_file function.
node_file_name - the filename of the file as stored in Shock.
file_path - the path to the downloaded file. If a directory was
specified in the input, this will be the directory appended with the
shock file name. If a file was specified, it will be that file path.
In either case, if the file is uncompressed any compression file
extensions will be removed (e.g. .gz) and or altered (e.g. .tgz ->
.tar) as appropriate.
size - the size of the file in bytes as stored in Shock, prior to
unpacking.
attributes - the file attributes, if any, stored in Shock.
*/
typedef structure {
string node_file_name;
string file_path;
int size;
mapping<string, UnspecifiedObject> attributes;
} ShockToFileOutput;
/* Download a file from Shock. */
funcdef shock_to_file(ShockToFileParams params)
returns (ShockToFileOutput out) authentication required;
/* Download multiple files from Shock. */
funcdef shock_to_file_mass(list<ShockToFileParams> params)
returns(list<ShockToFileOutput> out) authentication required;
/* Input for the file_to_shock function.
Required parameters:
file_path - the location of the file (or directory if using the
pack parameter) to load to Shock.
Optional parameters:
attributes - DEPRECATED: attributes are currently ignored by the upload function and
will be removed entirely in a future version. User-specified attributes to save to the
Shock node along with the file.
make_handle - make a Handle Service handle for the shock node. Default
false.
pack - compress a file or archive a directory before loading to Shock.
The file_path argument will be appended with the appropriate file
extension prior to writing. For gzips only, if the file extension
denotes that the file is already compressed, it will be skipped. If
file_path is a directory and tarring or zipping is specified, the
created file name will be set to the directory name, possibly
overwriting an existing file. Attempting to pack the root directory
is an error. Do not attempt to pack the scratch space root as noted
in the module description.
The allowed values are:
gzip - gzip the file given by file_path.
targz - tar and gzip the directory specified by the directory
portion of the file_path into the file specified by the
file_path.
zip - as targz but zip the directory.
*/
typedef structure {
string file_path;
mapping<string, UnspecifiedObject> attributes;
boolean make_handle;
string pack;
} FileToShockParams;
/* Output of the file_to_shock function.
shock_id - the ID of the new Shock node.
handle - the new handle, if created. Null otherwise.
node_file_name - the name of the file stored in Shock.
size - the size of the file stored in shock.
*/
typedef structure {
string shock_id;
Handle handle;
string node_file_name;
string size;
} FileToShockOutput;
/* Load a file to Shock. */
funcdef file_to_shock(FileToShockParams params)
returns (FileToShockOutput out) authentication required;
typedef structure {
string file_path;
} UnpackFileParams;
typedef structure {
string file_path;
} UnpackFileResult;
/*
Using the same logic as unpacking a Shock file, this method will cause
any bzip or gzip files to be uncompressed, and then unpack tar and zip
archive files (uncompressing gzipped or bzipped archive files if
necessary). If the file is an archive, it will be unbundled into the
directory containing the original output file.
*/
funcdef unpack_file(UnpackFileParams params)
returns (UnpackFileResult out) authentication required;
/*
Input parameters for the unpack_files function.
Required parameter:
file_path - the path to the file to unpack. The file will be unpacked into the file's
parent directory.
Optional parameter:
unpack - either 'uncompress' or 'unpack'. 'uncompress' will cause
any bzip or gzip files to be uncompressed. 'unpack' will behave the
same way, but it will also unpack tar and zip archive files
(uncompressing gzipped or bzipped archive files if necessary). If
'uncompress' is specified and an archive file is encountered, an
error will be thrown. If the file is an archive, it will be
unbundled into the directory containing the original output file.
Defaults to 'unpack'.
Note that if the file name (either as provided by the user or by
Shock) without the a decompression extension (e.g. .gz, .zip or
.tgz -> .tar) points to an existing file and unpack is specified,
that file will be overwritten by the decompressed Shock file.
*/
typedef structure {
string file_path;
string unpack;
} UnpackFilesParams;
/*
Output parameters for the unpack_files function.
file_path - the path to either
a) the unpacked file or
b) in the case of archive files, the path to the original archive file, possibly
uncompressed, or
c) in the case of regular files that don't need processing, the path to the input
file.
*/
typedef structure {
string file_path;
} UnpackFilesResult;
/*
Using the same logic as unpacking a Shock file, this method will cause
any bzip or gzip files to be uncompressed, and then unpack tar and zip
archive files (uncompressing gzipped or bzipped archive files if
necessary). If the file is an archive, it will be unbundled into the
directory containing the original output file.
The ordering of the input and output files is preserved in the input and output lists.
*/
funcdef unpack_files(list<UnpackFilesParams> params)
returns (list<UnpackFilesResult> out) authentication required;
/* Input for the pack_file function.
Required parameters:
file_path - the location of the file (or directory if using the
pack parameter) to load to Shock.
pack - The format into which the file or files will be packed.
The file_path argument will be appended with the appropriate file
extension prior to writing. For gzips only, if the file extension
denotes that the file is already compressed, it will be skipped. If
file_path is a directory and tarring or zipping is specified, the
created file name will be set to the directory name, possibly
overwriting an existing file. Attempting to pack the root directory
is an error. Do not attempt to pack the scratch space root as noted
in the module description.
The allowed values are:
gzip - gzip the file given by file_path.
targz - tar and gzip the directory specified by the directory
portion of the file_path into the file specified by the
file_path.
zip - as targz but zip the directory.
*/
typedef structure {
string file_path;
string pack;
} PackFileParams;
/* Output from the pack_file function.
file_path - the path to the packed file.
*/
typedef structure {
string file_path;
} PackFileResult;
/*
Pack a file or directory into gzip, targz, or zip archives.
*/
funcdef pack_file(PackFileParams params)
returns (PackFileResult out) authentication required;
/* Input for the package_for_download function.
Required parameters:
file_path - the location of the directory to compress as zip archive
before loading to Shock. This argument will be appended with the
'.zip' file extension prior to writing. If it is a directory, file
name of the created archive will be set to the directory name
followed by '.zip', possibly overwriting an existing file.
Attempting to pack the root directory is an error. Do not attempt
to pack the scratch space root as noted in the module description.
ws_ref - list of references to workspace objects which will be used to
produce info-files in JSON format containing workspace metadata and
provenance structures. It produces new files in folder pointed
by file_path (or folder containing file pointed by file_path if
it's not folder).
Optional parameters:
attributes - DEPRECATED: attributes are currently ignored by the upload function and
will be removed entirely in a future version. User-specified attributes to save to the
Shock node along with the file.
*/
typedef structure {
string file_path;
mapping<string, UnspecifiedObject> attributes;
list<string> ws_refs;
} PackageForDownloadParams;
/* Output of the package_for_download function.
shock_id - the ID of the new Shock node.
node_file_name - the name of the file stored in Shock.
size - the size of the file stored in shock.
*/
typedef structure {
string shock_id;
string node_file_name;
string size;
} PackageForDownloadOutput;
funcdef package_for_download(PackageForDownloadParams params)
returns (PackageForDownloadOutput) authentication required;
/* Load multiple files to Shock. */
funcdef file_to_shock_mass(list<FileToShockParams> params)
returns (list<FileToShockOutput> out) authentication required;
/* Input for the copy_shock_node function.
Required parameters:
shock_id - the id of the node to copy.
Optional parameters:
make_handle - make a Handle Service handle for the shock node. Default
false.
*/
typedef structure {
string shock_id;
boolean make_handle;
} CopyShockNodeParams;
/* Output of the copy_shock_node function.
shock_id - the id of the new Shock node.
handle - the new handle, if created. Null otherwise.
*/
typedef structure {
string shock_id;
Handle handle;
} CopyShockNodeOutput;
/* Copy a Shock node. */
funcdef copy_shock_node(CopyShockNodeParams params)
returns(CopyShockNodeOutput out) authentication required;
/* Input for the own_shock_node function.
Required parameters:
shock_id - the id of the node for which the user needs ownership.
Optional parameters:
make_handle - make or find a Handle Service handle for the shock node.
Default false.
*/
typedef structure {
string shock_id;
boolean make_handle;
} OwnShockNodeParams;
/* Output of the own_shock_node function.
shock_id - the id of the (possibly new) Shock node.
handle - the handle, if requested. Null otherwise.
*/
typedef structure {
string shock_id;
Handle handle;
} OwnShockNodeOutput;
/* Gain ownership of a Shock node.
Returns a shock node id which is owned by the caller, given a shock
node id.
If the shock node is already owned by the caller, returns the same
shock node ID. If not, the ID of a copy of the original node will be
returned.
If a handle is requested, the node is already owned by the caller, and
a handle already exists, that handle will be returned. Otherwise a new
handle will be created and returned.
*/
funcdef own_shock_node(OwnShockNodeParams params)
returns(OwnShockNodeOutput out) authentication required;
/* Translate a workspace name to a workspace ID. */
funcdef ws_name_to_id(string name) returns(int id) authentication required;
/* Information about an object, including user provided metadata.
objid - the numerical id of the object.
name - the name of the object.
type - the type of the object.
save_date - the save date of the object.
ver - the version of the object.
saved_by - the user that saved or copied the object.
wsid - the id of the workspace containing the object.
workspace - the name of the workspace containing the object.
chsum - the md5 checksum of the object.
size - the size of the object in bytes.
meta - arbitrary user-supplied metadata about
the object.
*/
typedef tuple<int objid, string name, string type, string save_date,
int version, string saved_by, int wsid, string workspace, string chsum,
int size, mapping<string, string> meta> object_info;
/* An object and associated data required for saving.
Required parameters:
type - the workspace type string for the object. Omit the version
information to use the latest version.
data - the object data.
One of an object name or id:
name - the name of the object.
objid - the id of the object to save over.
Optional parameters:
meta - arbitrary user-supplied metadata for the object,
not to exceed 16kb; if the object type specifies automatic
metadata extraction with the 'meta ws' annotation, and your
metadata name conflicts, then your metadata will be silently
overwritten.
hidden - true if this object should not be listed when listing
workspace objects.
extra_provenance_input_refs - (optional) if set, these refs will
be appended to the primary ProveanceAction input_ws_objects
reference list. In general, if the input WS object ref was
passed in from a narrative App, this will be set for you.
However, there are cases where the object ref passed to
the App is a container, and you are operating on a member
or subobject of the container, in which case to maintain
that direct mapping to those subobjects in the provenance
of new objects, you can provide additional object refs
here. For example, if the input is a ReadsSet, and your
App creates a new WS object for each read library in the
set, you may want a direct reference from each new WS
object not only to the set, but also to the individual
read library.
*/
typedef structure {
string type;
UnspecifiedObject data;
string name;
int objid;
mapping<string, string> meta;
boolean hidden;
list <string> extra_provenance_input_refs;
} ObjectSaveData;
/* Input parameters for the "save_objects" function.
Required parameters:
id - the numerical ID of the workspace.
objects - the objects to save.
The object provenance is automatically pulled from the SDK runner.
*/
typedef structure {
int id;
list<ObjectSaveData> objects;
} SaveObjectsParams;
/*
Save objects to the workspace.
The objects will be sorted prior to saving to avoid the Workspace sort memory limit.
Note that if the object contains workspace object refs in mapping keys that may cause
the Workspace to resort the data. To avoid this, convert any refs in mapping keys to UPA
format (e.g. #/#/#, where # is a positive integer).
If the data is very large, using the WSLargeDataIO SDK module is advised.
Saving over a deleted object undeletes it.
*/
funcdef save_objects(SaveObjectsParams params)
returns (list<object_info> info) authentication required;
/* Input parameters for the "get_objects" function.
Required parameters:
object_refs - a list of object references in the form X/Y/Z, where X is
the workspace name or id, Y is the object name or id, and Z is the
(optional) object version. In general, always use ids rather than
names if possible to avoid race conditions.
A reference path may be specified by separating references by a semicolon, e.g.
4/5/6;5/7/2;8/9/4 specifies that the user wishes to retrieve the fourth version of
the object with id 9 in workspace 8, and that there exists a reference path from
the sixth version of the object with id 5 in workspace 4, to which the user has access.
The user may or may not have access to workspaces 5 and 8.
Optional parameters:
ignore_errors - ignore any errors that occur when fetching an object
and instead insert a null into the returned list.
*/
typedef structure {
list<string> object_refs;
boolean ignore_errors;
} GetObjectsParams;
/* The data and supplemental info for an object.
UnspecifiedObject data - the object's data or subset data.
object_info info - information about the object.
*/
typedef structure {
UnspecifiedObject data;
object_info info;
} ObjectData;
/* Results from the get_objects function.
list<ObjectData> data - the returned objects.
*/
typedef structure {
list<ObjectData> data;
} GetObjectsResults;
/* Get objects from the workspace. */
funcdef get_objects(GetObjectsParams params)
returns(GetObjectsResults results) authentication required;
/* Get the versions of the Workspace service and Shock service. */
funcdef versions() returns(string wsver, string shockver)
authentication required;
/* Input parameters for the "download_staging_file" function.
Required parameters:
staging_file_subdir_path: subdirectory file path
e.g.
for file: /data/bulk/user_name/file_name
staging_file_subdir_path is file_name
for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
staging_file_subdir_path is subdir_1/subdir_2/file_name
*/
typedef structure {
string staging_file_subdir_path;
}DownloadStagingFileParams;
/* Results from the download_staging_file function.
copy_file_path: copied file scratch area path
*/
typedef structure {
string copy_file_path;
}DownloadStagingFileOutput;
/* Download a staging area file to scratch area */
funcdef download_staging_file(DownloadStagingFileParams params)
returns(DownloadStagingFileOutput results) authentication required;
/* Input parameters for the "download_web_file" function.
Required parameters:
file_url: file URL
download_type: one of ['Direct Download', 'FTP', 'DropBox', 'Google Drive']
*/
typedef structure {
string file_url;
string download_type;
}DownloadWebFileParams;
/* Results from the download_web_file function.
copy_file_path: copied file scratch area path
*/
typedef structure {
string copy_file_path;
}DownloadWebFileOutput;
/* Download a web file to scratch area */
funcdef download_web_file(DownloadWebFileParams params)
returns(DownloadWebFileOutput results) authentication required;
};