diff --git a/src/monty/dev.py b/src/monty/dev.py index e2680860..882d0ec8 100644 --- a/src/monty/dev.py +++ b/src/monty/dev.py @@ -30,7 +30,8 @@ def deprecated( replacement (Callable | str): A replacement class or function. message (str): A warning message to be displayed. deadline (Optional[tuple[int, int, int]]): Optional deadline for removal - of the old function/class, in format (yyyy, MM, dd). + of the old function/class, in format (yyyy, MM, dd). A CI warning would + be raised after this date if is running in code owner' repo. category (Warning): Choose the category of the warning to issue. Defaults to FutureWarning. Another choice can be DeprecationWarning. Note that FutureWarning is meant for end users and is always shown unless silenced. diff --git a/src/monty/io.py b/src/monty/io.py index 96a89987..1b454899 100644 --- a/src/monty/io.py +++ b/src/monty/io.py @@ -35,7 +35,7 @@ def zopen( This function wraps around `[bz2/gzip/lzma].open` and `open` to deal intelligently with compressed or uncompressed files. Supports context manager: - `with zopen(filename, mode="rt", ...)`. + `with zopen(filename, mode="rt", ...)` Important Notes: - Default `mode` should not be used, and would not be allow @@ -43,11 +43,12 @@ def zopen( - Always explicitly specify binary/text in `mode`, i.e. always pass `t` or `b` in `mode`, implicit binary/text mode would not be allow in future versions. - - Always provide an explicit `encoding` in text mode. + - Always provide an explicit `encoding` in text mode, it would + be set to UTF-8 by default otherwise. Args: filename (str | Path): The file to open. - mode (str): The mode in which the file is opened, you MUST + mode (str): The mode in which the file is opened, you should explicitly specify "b" for binary or "t" for text. **kwargs: Additional keyword arguments to pass to `open`. @@ -79,14 +80,16 @@ def zopen( stacklevel=2, ) - # Warn against default `encoding` in text mode + # Warn against default `encoding` in text mode if + # `PYTHONWARNDEFAULTENCODING` environment variable is set (PEP 597) if "t" in mode and kwargs.get("encoding", None) is None: - warnings.warn( - "We strongly encourage explicit `encoding`, " - "and we would use UTF-8 by default as per PEP 686", - category=EncodingWarning, - stacklevel=2, - ) + if os.getenv("PYTHONWARNDEFAULTENCODING", False): + warnings.warn( + "We strongly encourage explicit `encoding`, " + "and we would use UTF-8 by default as per PEP 686", + category=EncodingWarning, + stacklevel=2, + ) kwargs["encoding"] = "utf-8" _name, ext = os.path.splitext(filename) @@ -141,7 +144,7 @@ def _get_line_ending( If file is empty, "\n" would be used as default. """ if isinstance(file, (str, Path)): - with zopen(file, "rb") as f: + with zopen(file, mode="rb") as f: first_line = f.readline() elif isinstance(file, io.TextIOWrapper): first_line = file.buffer.readline() # type: ignore[attr-defined] @@ -187,7 +190,7 @@ def reverse_readfile( l_end = _get_line_ending(filename) len_l_end = len(l_end) - with zopen(filename, "rb") as file: + with zopen(filename, mode="rb") as file: if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): for line in reversed(file.readlines()): # "readlines" would keep the line end character diff --git a/src/monty/re.py b/src/monty/re.py index 76152ff1..feeaed24 100644 --- a/src/monty/re.py +++ b/src/monty/re.py @@ -49,7 +49,7 @@ def regrep( gen = ( reverse_readfile(filename) if reverse - else zopen(filename, "rt", encoding="utf-8") + else zopen(filename, mode="rt", encoding="utf-8") ) for i, line in enumerate(gen): for k, p in compiled.items(): diff --git a/src/monty/serialization.py b/src/monty/serialization.py index 13f3fb00..72c2d104 100644 --- a/src/monty/serialization.py +++ b/src/monty/serialization.py @@ -22,10 +22,15 @@ if TYPE_CHECKING: from pathlib import Path - from typing import Any, Optional, TextIO, Union + from typing import Any, Literal, TextIO, Union -def loadfn(fn: Union[str, Path], *args, fmt: Optional[str] = None, **kwargs) -> Any: +def loadfn( + fn: Union[str, Path], + *args, + fmt: Literal["json", "yaml", "mpk"] | None = None, + **kwargs, +) -> Any: """ Loads json/yaml/msgpack directly from a filename instead of a File-like object. File may also be a BZ2 (".BZ2") or GZIP (".GZ", ".Z") @@ -39,9 +44,8 @@ def loadfn(fn: Union[str, Path], *args, fmt: Optional[str] = None, **kwargs) -> Args: fn (str/Path): filename or pathlib.Path. *args: Any of the args supported by json/yaml.load. - fmt (string): If specified, the fmt specified would be used instead - of autodetection from filename. Supported formats right now are - "json", "yaml" or "mpk". + fmt ("json" | "yaml" | "mpk"): If specified, the fmt specified would + be used instead of autodetection from filename. **kwargs: Any of the kwargs supported by json/yaml.load. Returns: @@ -64,10 +68,10 @@ def loadfn(fn: Union[str, Path], *args, fmt: Optional[str] = None, **kwargs) -> ) if "object_hook" not in kwargs: kwargs["object_hook"] = object_hook - with zopen(fn, "rb") as fp: + with zopen(fn, mode="rb") as fp: return msgpack.load(fp, *args, **kwargs) # pylint: disable=E1101 else: - with zopen(fn, "rt", encoding="utf-8") as fp: + with zopen(fn, mode="rt", encoding="utf-8") as fp: if fmt == "yaml": if YAML is None: raise RuntimeError("Loading of YAML files requires ruamel.yaml.") @@ -81,7 +85,13 @@ def loadfn(fn: Union[str, Path], *args, fmt: Optional[str] = None, **kwargs) -> raise TypeError(f"Invalid format: {fmt}") -def dumpfn(obj: object, fn: Union[str, Path], *args, fmt=None, **kwargs) -> None: +def dumpfn( + obj: object, + fn: Union[str, Path], + *args, + fmt: Literal["json", "yaml", "mpk"] | None = None, + **kwargs, +) -> None: """ Dump to a json/yaml directly by filename instead of a File-like object. File may also be a BZ2 (".BZ2") or GZIP (".GZ", ".Z") @@ -95,6 +105,8 @@ def dumpfn(obj: object, fn: Union[str, Path], *args, fmt=None, **kwargs) -> None Args: obj (object): Object to dump. fn (str/Path): filename or pathlib.Path. + fmt ("json" | "yaml" | "mpk"): If specified, the fmt specified would + be used instead of autodetection from filename. *args: Any of the args supported by json/yaml.dump. **kwargs: Any of the kwargs supported by json/yaml.dump. @@ -117,10 +129,10 @@ def dumpfn(obj: object, fn: Union[str, Path], *args, fmt=None, **kwargs) -> None ) if "default" not in kwargs: kwargs["default"] = default - with zopen(fn, "wb") as fp: + with zopen(fn, mode="wb") as fp: msgpack.dump(obj, fp, *args, **kwargs) # pylint: disable=E1101 else: - with zopen(fn, "wt", encoding="utf-8") as fp: + with zopen(fn, mode="wt", encoding="utf-8") as fp: fp = cast(TextIO, fp) if fmt == "yaml": diff --git a/src/monty/shutil.py b/src/monty/shutil.py index 98047f57..418e5311 100644 --- a/src/monty/shutil.py +++ b/src/monty/shutil.py @@ -104,7 +104,7 @@ def compress_file( else: compressed_file = f"{str(filepath)}.{compression}" - with open(filepath, "rb") as f_in, zopen(compressed_file, "wb") as f_out: + with open(filepath, "rb") as f_in, zopen(compressed_file, mode="wb") as f_out: f_out.writelines(f_in) os.remove(filepath) @@ -157,7 +157,7 @@ def decompress_file( else: decompressed_file = str(filepath).removesuffix(file_ext) - with zopen(filepath, "rb") as f_in, open(decompressed_file, "wb") as f_out: + with zopen(filepath, mode="rb") as f_in, open(decompressed_file, "wb") as f_out: f_out.writelines(f_in) os.remove(filepath) diff --git a/tests/test_io.py b/tests/test_io.py index f79c9d7b..b45ac6cd 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -433,11 +433,13 @@ def test_lzw_files(self): f.read() @pytest.mark.parametrize("extension", [".txt", ".bz2", ".gz", ".xz", ".lzma"]) - def test_warnings(self, extension): + def test_warnings(self, extension, monkeypatch): filename = f"test_warning{extension}" content = "Test warning" with ScratchDir("."): + monkeypatch.setenv("PYTHONWARNDEFAULTENCODING", "1") + # Default `encoding` warning with ( pytest.warns(EncodingWarning, match="use UTF-8 by default"), @@ -445,6 +447,19 @@ def test_warnings(self, extension): ): f.write(content) + # No encoding warning if `PYTHONWARNDEFAULTENCODING` not set + monkeypatch.delenv("PYTHONWARNDEFAULTENCODING", raising=False) + + with warnings.catch_warnings(): + warnings.filterwarnings( + "error", + "We strongly encourage explicit `encoding`", + EncodingWarning, + ) + + with zopen(filename, "wt") as f: + f.write(content) + # Implicit text/binary `mode` warning warnings.filterwarnings( "ignore", category=EncodingWarning, message="argument not specified"