diff --git a/changelog.d/20240222_174417_abdul.muqadim_binary_file_detection_update.md b/changelog.d/20240222_174417_abdul.muqadim_binary_file_detection_update.md new file mode 100644 index 0000000000..b606729361 --- /dev/null +++ b/changelog.d/20240222_174417_abdul.muqadim_binary_file_detection_update.md @@ -0,0 +1 @@ +- [Improvement] This is a non-breaking change. Enhanced is_binary_file function in env file for better binary file detection. (by @Abdul-Muqadim-Arbisoft) diff --git a/tests/test_env.py b/tests/test_env.py index 27cff9678b..5322f6e7bf 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -42,6 +42,15 @@ def test_files_are_rendered(self) -> None: def test_is_binary_file(self) -> None: self.assertTrue(env.is_binary_file("/home/somefile.ico")) + def test_is_binary_file_with_text_extension(self) -> None: + self.assertFalse(env.is_binary_file("/home/script.js")) + + def test_is_binary_file_with_unrecognized_extension(self) -> None: + self.assertFalse(env.is_binary_file("/home/unknown.extension")) + + def test_is_binary_file_without_extension(self) -> None: + self.assertFalse(env.is_binary_file("/home/file")) + def test_find_os_path(self) -> None: environment = env.JinjaEnvironment() path = environment.find_os_path("local/docker-compose.yml") diff --git a/tutor/env.py b/tutor/env.py index 470f9dc5a3..7bf6ecb260 100644 --- a/tutor/env.py +++ b/tutor/env.py @@ -6,6 +6,8 @@ import typing as t from copy import deepcopy +import mimetypes + import jinja2 import importlib_resources @@ -26,6 +28,8 @@ ".woff", ".woff2", ] +TEXT_MIME_TYPES = ["application/xml", "application/json"] +TEXT_FILE_EXTENSIONS = [".html", ".xml", ".json", ".css", ".js"] JinjaFilter = t.Callable[..., t.Any] @@ -501,7 +505,29 @@ def read_core_template_file(*path: str) -> str: def is_binary_file(path: str) -> bool: - ext = os.path.splitext(path)[1] + """ + Determines if the specified file is binary based on its MIME type or file extension. + + This function first attempts to guess the MIME type of the file based on its path. + If the MIME type indicates that the file is not text and not a known text-based MIME type, + it is considered binary. If the MIME type cannot be determined or is not indicative + of a binary file, the function then checks the file's extension against a predefined + list of binary file extensions, as well as a list of known text file extensions. + + Parameters: + - path (str): The path to the file whose type is to be determined. + + Returns: + - bool: True if the file is determined to be binary, False otherwise. + """ + mime_type, _ = mimetypes.guess_type(path) + if mime_type: + if mime_type.startswith("text/") or mime_type in TEXT_MIME_TYPES: + return False + return True + ext = os.path.splitext(path)[1].lower() + if ext in TEXT_FILE_EXTENSIONS: + return False return ext in BIN_FILE_EXTENSIONS