1
1
"""Tools for working with Descriptors."""
2
+ from abc import abstractmethod
2
3
from typing import Type , List , Mapping , Optional , Union
3
4
from uuid import UUID
4
5
7
8
from citrine ._serialization .serializable import Serializable
8
9
from citrine .informatics .descriptors import Descriptor
9
10
from citrine .resources .file_link import FileLink
11
+ from citrine .resources .gemtables import GemTable
10
12
11
13
__all__ = ['DataSource' ,
12
14
'CSVDataSource' ,
@@ -34,13 +36,39 @@ def get_type(cls, data) -> Type[Serializable]:
34
36
if "type" not in data :
35
37
raise ValueError ("Can only get types from dicts with a 'type' key" )
36
38
types : List [Type [Serializable ]] = [
37
- CSVDataSource , GemTableDataSource , ExperimentDataSourceRef
39
+ CSVDataSource , GemTableDataSource , ExperimentDataSourceRef , SnapshotDataSource
38
40
]
39
41
res = next ((x for x in types if x .typ == data ["type" ]), None )
40
42
if res is None :
41
43
raise ValueError ("Unrecognized type: {}" .format (data ["type" ]))
42
44
return res
43
45
46
+ @property
47
+ @abstractmethod
48
+ def _data_source_type (self ) -> str :
49
+ """The data source type string, which is the leading term of the data_source_id."""
50
+
51
+ @classmethod
52
+ def from_data_source_id (cls , data_source_id : str ) -> "DataSource" :
53
+ """Build a DataSource from a datasource_id."""
54
+ terms = data_source_id .split ("::" )
55
+ types : List [Type [Serializable ]] = [
56
+ CSVDataSource , GemTableDataSource , ExperimentDataSourceRef , SnapshotDataSource
57
+ ]
58
+ res = next ((x for x in types if x ._data_source_type == terms [0 ]), None )
59
+ if res is None :
60
+ raise ValueError ("Unrecognized type: {}" .format (terms [0 ]))
61
+ return res ._data_source_id_builder (* terms [1 :])
62
+
63
+ @classmethod
64
+ @abstractmethod
65
+ def _data_source_id_builder (cls , * args ) -> "DataSource" :
66
+ """Build a DataSource based on a parsed data_source_id."""
67
+
68
+ @abstractmethod
69
+ def to_data_source_id (self ) -> str :
70
+ """Generate the data_source_id for this DataSource."""
71
+
44
72
45
73
class CSVDataSource (Serializable ['CSVDataSource' ], DataSource ):
46
74
"""A data source based on a CSV file stored on the data platform.
@@ -65,6 +93,8 @@ class CSVDataSource(Serializable['CSVDataSource'], DataSource):
65
93
properties .String , properties .Object (Descriptor ), "column_definitions" )
66
94
identifiers = properties .Optional (properties .List (properties .String ), "identifiers" )
67
95
96
+ _data_source_type = "csv"
97
+
68
98
def __init__ (self ,
69
99
* ,
70
100
file_link : FileLink ,
@@ -74,6 +104,20 @@ def __init__(self,
74
104
self .column_definitions = column_definitions
75
105
self .identifiers = identifiers
76
106
107
+ @classmethod
108
+ def _data_source_id_builder (cls , * args ) -> DataSource :
109
+ # TODO Figure out how to populate the column definitions
110
+ return CSVDataSource (
111
+ file_link = FileLink (url = args [0 ], filename = args [1 ]),
112
+ column_definitions = {}
113
+ )
114
+
115
+ def to_data_source_id (self ) -> str :
116
+ """Generate the data_source_id for this DataSource."""
117
+ return "::" .join (
118
+ str (x ) for x in [self ._data_source_type , self .file_link .url , self .file_link .filename ]
119
+ )
120
+
77
121
78
122
class GemTableDataSource (Serializable ['GemTableDataSource' ], DataSource ):
79
123
"""A data source based on a GEM Table hosted on the data platform.
@@ -92,13 +136,37 @@ class GemTableDataSource(Serializable['GemTableDataSource'], DataSource):
92
136
table_id = properties .UUID ("table_id" )
93
137
table_version = properties .Integer ("table_version" )
94
138
139
+ _data_source_type = "gemd"
140
+
95
141
def __init__ (self ,
96
142
* ,
97
143
table_id : UUID ,
98
144
table_version : Union [int , str ]):
99
145
self .table_id : UUID = table_id
100
146
self .table_version : Union [int , str ] = table_version
101
147
148
+ @classmethod
149
+ def _data_source_id_builder (cls , * args ) -> DataSource :
150
+ return GemTableDataSource (table_id = UUID (args [0 ]), table_version = args [1 ])
151
+
152
+ def to_data_source_id (self ) -> str :
153
+ """Generate the data_source_id for this DataSource."""
154
+ return "::" .join (
155
+ str (x ) for x in [self ._data_source_type , self .table_id , self .table_version ]
156
+ )
157
+
158
+ @classmethod
159
+ def from_gemtable (cls , table : GemTable ) -> "GemTableDataSource" :
160
+ """Generate a DataSource that corresponds to a GemTable.
161
+
162
+ Parameters
163
+ ----------
164
+ table: GemTable
165
+ The GemTable object to reference
166
+
167
+ """
168
+ return GemTableDataSource (table_id = table .uid , table_version = table .version )
169
+
102
170
103
171
class ExperimentDataSourceRef (Serializable ['ExperimentDataSourceRef' ], DataSource ):
104
172
"""A reference to a data source based on an experiment result hosted on the data platform.
@@ -113,5 +181,42 @@ class ExperimentDataSourceRef(Serializable['ExperimentDataSourceRef'], DataSourc
113
181
typ = properties .String ('type' , default = 'experiments_data_source' , deserializable = False )
114
182
datasource_id = properties .UUID ("datasource_id" )
115
183
184
+ _data_source_type = "experiments"
185
+
116
186
def __init__ (self , * , datasource_id : UUID ):
117
187
self .datasource_id : UUID = datasource_id
188
+
189
+ @classmethod
190
+ def _data_source_id_builder (cls , * args ) -> DataSource :
191
+ return ExperimentDataSourceRef (datasource_id = UUID (args [0 ]))
192
+
193
+ def to_data_source_id (self ) -> str :
194
+ """Generate the data_source_id for this DataSource."""
195
+ return "::" .join (str (x ) for x in [self ._data_source_type , self .datasource_id ])
196
+
197
+
198
+ class SnapshotDataSource (Serializable ['SnapshotDataSource' ], DataSource ):
199
+ """A reference to a data source based on a Snapshot on the data platform.
200
+
201
+ Parameters
202
+ ----------
203
+ snapshot_id: UUID
204
+ Unique identifier for the Snapshot Data Source
205
+
206
+ """
207
+
208
+ typ = properties .String ('type' , default = 'snapshot_data_source' , deserializable = False )
209
+ snapshot_id = properties .UUID ("snapshot_id" )
210
+
211
+ _data_source_type = "snapshot"
212
+
213
+ def __init__ (self , * , snapshot_id : UUID ):
214
+ self .snapshot_id = snapshot_id
215
+
216
+ @classmethod
217
+ def _data_source_id_builder (cls , * args ) -> DataSource :
218
+ return SnapshotDataSource (snapshot_id = UUID (args [0 ]))
219
+
220
+ def to_data_source_id (self ) -> str :
221
+ """Generate the data_source_id for this DataSource."""
222
+ return "::" .join (str (x ) for x in [self ._data_source_type , self .snapshot_id ])
0 commit comments