A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://mail.python.org/pipermail/python-dev/2007-June/073607.html below:

[Python-Dev] zipfile and unicode filenames

[Python-Dev] zipfile and unicode filenamesAlexey Borzenkov snaury at gmail.com
Mon Jun 11 06:30:29 CEST 2007
On 6/11/07, "Martin v. Löwis" <martin at v.loewis.de> wrote:
> For compatibility, I would propose to use UTF-8 only if the file
> name is not ASCII. Even though the OEM code pages vary, they
> are (mostly) ASCII supersets. So if the string can be encoded
> in ASCII, there is no need to set the UTF-8 flag bit.

Done:

Index: Lib/zipfile.py
===================================================================
--- Lib/zipfile.py	(revision 55850)
+++ Lib/zipfile.py	(working copy)
@@ -252,13 +252,29 @@
             self.extract_version = max(45, self.extract_version)
             self.create_version = max(45, self.extract_version)

+        filename, flag_bits = self._encodeFilenameFlags()
         header = struct.pack(structFileHeader, stringFileHeader,
-                 self.extract_version, self.reserved, self.flag_bits,
+                 self.extract_version, self.reserved, flag_bits,
                  self.compress_type, dostime, dosdate, CRC,
                  compress_size, file_size,
-                 len(self.filename), len(extra))
-        return header + self.filename + extra
+                 len(filename), len(extra))
+        return header + filename + extra

+    def _encodeFilenameFlags(self):
+        if isinstance(self.filename, unicode):
+            try:
+                return self.filename.encode('ascii'), self.flag_bits
+            except UnicodeEncodeError:
+                return self.filename.encode('utf-8'), self.flag_bits | 0x800
+        else:
+            return self.filename, self.flag_bits
+
+    def _decodeFilenameFlags(self):
+        if self.flag_bits & 0x800:
+            return self.filename.decode('utf-8'), self.flag_bits & ~0x800
+        else:
+            return self.filename, self.flag_bits
+
     def _decodeExtra(self):
         # Try to decode the extra field.
         extra = self.extra
@@ -684,6 +700,7 @@

             x._decodeExtra()
             x.header_offset = x.header_offset + concat
+            x.filename, x.flag_bits = x._decodeFilenameFlags()
             self.filelist.append(x)
             self.NameToInfo[x.filename] = x
             if self.debug > 2:
@@ -967,16 +984,17 @@
                     extract_version = zinfo.extract_version
                     create_version = zinfo.create_version

+                filename, flag_bits = zinfo._encodeFilenameFlags()
                 centdir = struct.pack(structCentralDir,
                   stringCentralDir, create_version,
                   zinfo.create_system, extract_version, zinfo.reserved,
-                  zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
+                  flag_bits, zinfo.compress_type, dostime, dosdate,
                   zinfo.CRC, compress_size, file_size,
-                  len(zinfo.filename), len(extra_data), len(zinfo.comment),
+                  len(filename), len(extra_data), len(zinfo.comment),
                   0, zinfo.internal_attr, zinfo.external_attr,
                   header_offset)
                 self.fp.write(centdir)
-                self.fp.write(zinfo.filename)
+                self.fp.write(filename)
                 self.fp.write(extra_data)
                 self.fp.write(zinfo.comment)

Index: Lib/test/test_zipfile.py
===================================================================
--- Lib/test/test_zipfile.py	(revision 55850)
+++ Lib/test/test_zipfile.py	(working copy)
@@ -515,6 +515,12 @@
         # and report that the first file in the archive was corrupt.
         self.assertRaises(RuntimeError, zipf.testzip)

+    def testUnicodeFilenames(self):
+        zf = zipfile.ZipFile(TESTFN, "w")
+        zf.writestr(u"foo.txt", "Test for unicode filename")
+        assert isinstance(zf.infolist()[0].filename, unicode)
+        zf.close()
+
     def tearDown(self):
         support.unlink(TESTFN)
         support.unlink(TESTFN2)

What I also changed is to encode filenames only for writing to the
target file, without damaging ZipInfo. The reason for this is that if
user decides to enumerate infolist after she wrote files to ZipFile,
she would expect ZipInfo.filename to be what she passed to
ZipFile.write/ZipFile.writestr.
More information about the Python-Dev mailing list

RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4