Skip to content

Commit

Permalink
fix no attribute 'current_tag' when executing local tests (#335)
Browse files Browse the repository at this point in the history
  • Loading branch information
drcege authored Jun 28, 2024
1 parent 9b337fc commit c85e024
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions data_juicer/utils/unittest_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,10 @@ def generate_dataset(self, data):
type (str, optional): "standalone" or "ray".
Defaults to "standalone".
"""
if self.current_tag.startswith('standalone'):
current_tag = getattr(self, 'current_tag', 'standalone')
if current_tag.startswith('standalone'):
return Dataset.from_list(data)
elif self.current_tag.startswith('ray'):
elif current_tag.startswith('ray'):
dataset = rd.from_items(data)
if Fields.stats not in dataset.columns(fetch_if_missing=False):

Expand All @@ -79,15 +80,16 @@ def process_batch_arrow(table: pa.Table) -> pa.Table:

def run_single_op(self, dataset, op, column_names):
"""Run operator in the specific executor."""
if self.current_tag.startswith('standalone'):
current_tag = getattr(self, 'current_tag', 'standalone')
if current_tag.startswith('standalone'):
if isinstance(op, Filter) and Fields.stats not in dataset.features:
dataset = dataset.add_column(name=Fields.stats,
column=[{}] * dataset.num_rows)
dataset = dataset.map(op.compute_stats)
dataset = dataset.filter(op.process)
dataset = dataset.select_columns(column_names=column_names)
return dataset.to_list()
elif self.current_tag.startswith('ray'):
elif current_tag.startswith('ray'):
dataset = dataset.map(op.compute_stats)
dataset = dataset.filter(op.process)
dataset = dataset.to_pandas().get(column_names)
Expand Down

0 comments on commit c85e024

Please sign in to comment.