Skip to content

Commit

Permalink
restrict Boltz sequence length
Browse files Browse the repository at this point in the history
  • Loading branch information
Valentin Zulkower committed Dec 17, 2024
1 parent 6ab6482 commit 6a9609b
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
9 changes: 7 additions & 2 deletions ginkgo_ai_client/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,9 @@ class PromoterActivityQuery(QueryBase):
The name of the query. It will appear in the API response and can be used to
handle exceptions.
inference_framework: Literal["promoter-0"] = "promoter-0"
The inference framework to use for the inference. Currently only supports
The inference framework to use for the inference. Currently only supports
borzoi_model: Literal["human-fold0"] = "human-fold0"
The model to use for the inference. Currently only supports the trained
The model to use for the inference. Currently only supports the trained
model of "human-fold0".
Returns
-------
Expand Down Expand Up @@ -511,6 +511,11 @@ class _Protein(pydantic.BaseModel):

@pydantic.validator("sequence")
def validate_sequence(cls, sequence):
if len(sequence) > 1000:
raise ValueError(
f"We currently only accept sequences of length 1000 or less for Boltz "
f"structure prediction (length: {len(sequence)})"
)
sequence = sequence.upper()
invalid_chars = [c for c in sequence if c not in "LAGVSERTIDPKQNFYMHWCXBUZO"]
if len(invalid_chars) > 0:
Expand Down
8 changes: 8 additions & 0 deletions test/test_query_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,11 @@ def test_boltz_structure_prediction_query_from_protein_sequence():
query = BoltzStructurePredictionQuery.from_protein_sequence(sequence="MLLKP")
sequences = query.model_dump(exclude_none=True)["sequences"]
assert sequences == [{"protein": {"id": "A", "sequence": "MLLKP"}}]


def test_boltz_structure_prediction_query_fails_on_sequence_too_long():
expected_error_message = re.escape(
"We currently only accept sequences of length 1000 or less"
)
with pytest.raises(ValueError, match=expected_error_message):
BoltzStructurePredictionQuery.from_protein_sequence(sequence=1100 * "A")

0 comments on commit 6a9609b

Please sign in to comment.