Skip to content

Commit

Permalink
Compute padding manually to mimic python's binascii behavior (#290)
Browse files Browse the repository at this point in the history
The JDK decoder behaves differently in particularly two corner cases:

- It is more restrictive regarding superfluous padding.
- It is more permissive regarding lack of padding.

To counter this, we must manually compute the expected padding to cover for these two cases.

This is very similar to CPython: python/cpython@1bf9cc5/Modules/binascii.c#L468-L473
  • Loading branch information
mahmoudimus authored May 27, 2022
1 parent ada686b commit fac1d5a
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,70 @@ public class BinasciiModule implements StarlarkValue {
public static final BinasciiModule INSTANCE = new BinasciiModule();
private static final String NON_HEX_DIGIT_FOUND = "Found non hex digit";
private static final String ODD_LENGTH_STRING = "String has odd length";
private static final char BASE64_PAD = '=';

private byte[] b64decode(byte[] data) throws EvalException {
int dataLen = data.length;
/*
* The JDK decoder behaves differently in particularly two corner cases:
* - It is more restrictive regarding superfluous padding.
* - It is more permissive regarding lack of padding.
*
* To counter this, we manually compute the expected padding to cover for these
* two cases.
*/
// Compute the expected and real padding
int base64chars = 0;
int lastBase64Char = -1;
int padding = 0;
for (int i = 0; i < dataLen; i++) {
byte c = data[i];
if (((c >= 'a') && (c <= 'z'))
|| ((c >= 'A') && (c <= 'Z'))
|| ((c >= '0') && (c <= '9'))
|| (c == '+') || (c == '/')) {
lastBase64Char = i;
base64chars++;
padding = 0;
}
if (c == BASE64_PAD) {
padding++;
}
}
int expectedPadding = 0;

switch(base64chars % 4) {
case 1:
throw new EvalException("Invalid base64-encoded string: number of data characters (1) cannot be 1 more than a multiple of 4");
case 2:
expectedPadding = 2;
break;
case 3:
expectedPadding = 1;
break;
default:
break;
}

if (padding < expectedPadding) {
throw new EvalException("Incorrect padding");
}

// Find the end of the expected padding, if any
int decodeLen = lastBase64Char + 1;
int correctedPadding = 0;
for (int i = decodeLen; correctedPadding < expectedPadding && i < dataLen; i++) {
if (data[i] == BASE64_PAD) {
correctedPadding++;
decodeLen = i + 1;
}
}

try {
// Mimic CPython's MIME decoder and skip over anything that is not the alphabet
return Base64.getMimeDecoder().decode(data);
// Mimic CPython's MIME decoder and skip over anything that is not the alphabet
return Base64.getMimeDecoder().decode(Arrays.copyOf(data,decodeLen));
} catch (IllegalArgumentException e) {
throw new EvalException(e);
throw new EvalException(e);
}
}

Expand Down
5 changes: 4 additions & 1 deletion larky/src/test/resources/stdlib_tests/test_binascii.star
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,7 @@ asserts.assert_that(str(bin_data)).is_equal_to("Blinka")
asserts.assert_that(repr(bin_data)).is_equal_to("b\"Blinka\"")
# print("Converted b64 ASCII->Binary Data: ", repr(bin_data))
asserts.assert_that(type(bin_data)).is_equal_to("bytes")
asserts.assert_(bin_data == data, "Expected binary data does not match.")
asserts.assert_(bin_data == data, "Expected binary data does not match.")

asserts.assert_that(a2b_base64("AQAB=="), b'\x01\x00\x01')
asserts.assert_that(a2b_base64("AQAB==="), b'\x01\x00\x01')

0 comments on commit fac1d5a

Please sign in to comment.