If you only want stable UTF-8
support on file read
/write
without same declarations everywhere, here are two solutions:
1. Patch io
module at runtime (danger operation at your own risk)
import pathlib as pathlib
import tempfile
import chardet
def patchIOWithUtf8Default():
import builtins
import importlib.util
import sys
spec = importlib.util.find_spec("io")
module = importlib.util.module_from_spec(spec)
exec(compile(spec.loader.get_source(spec.name) + """
def open(*args, **kwargs):
args = list(args)
mode = kwargs.get('mode', (args + [''])[1])
if (len(args) < 4 and 'b' not in mode) or 'encoding' in kwargs:
kwargs['encoding'] = 'utf8'
elif len(args) >= 4 and args[3] is None:
args[3] = 'utf8'
return _io.open(*args, **kwargs)
""", module.__spec__.origin, "exec"), module.__dict__)
sys.modules[module.__name__] = module
builtins.open = __import__("io").open
importlib.reload(importlib.import_module("pathlib"))
def main():
patchIOWithUtf8Default()
filename = tempfile.mktemp()
text = "Common\n常\nSense\n识\n天地玄黄"
print("Original text:", repr(text))
pathlib.Path(filename).write_text(text)
encoding = chardet.detect(open(filename, mode="rb").read())["encoding"]
print("Written encoding by pathlib:", encoding)
print("Written text by pathlib:", repr(open(filename, newline="", encoding=encoding).read()))
if __name__ == '__main__':
main()
Sample output:
Original text: 'Common\n常\nSense\n识\n天地玄黄'
Written encoding by pathlib: utf-8
Written text by pathlib: 'Common\r\n常\r\nSense\r\n识\r\n天地玄黄'
2. Use 3rd library as pathlib wrapper
https://github.com/baijifeilong/IceSpringPathLib
pip install IceSpringPathLib
import pathlib
import tempfile
import chardet
import IceSpringPathLib
tempfile.mktemp()
filename = tempfile.mktemp()
text = "Common\n常\nSense\n识\n天地玄黄"
print("Original text:", repr(text))
pathlib.Path(filename).write_text(text)
encoding = chardet.detect(open(filename, mode="rb").read())["encoding"]
print("\nWritten text by pathlib:", repr(open(filename, newline="", encoding=encoding).read()))
print("Written encoding by pathlib:", encoding)
IceSpringPathLib.Path(filename).write_text(text)
encoding = chardet.detect(open(filename, mode="rb").read())["encoding"]
print("\nWritten text by IceSpringPathLib:", repr(open(filename, newline="", encoding=encoding).read()))
print("Written encoding by IceSpringPathLib:", encoding)
Sample output:
Original text: 'Common\n常\nSense\n识\n天地玄黄'
Written text by pathlib: 'Common\r\n常\r\nSense\r\n识\r\n天地玄黄'
Written encoding by pathlib: GB2312
Written text by IceSpringPathLib: 'Common\n常\nSense\n识\n天地玄黄'
Written encoding by IceSpringPathLib: utf-8
The best solution is to learn to use encode and decode correctly instead of using hacks.
This was certainly possible with python2 at the cost of always remembering to do so / consistently using your own interface. My experience suggests that this becomes highly problematic when you are writing code that you want to work with both python2 and python3. – Bulgar