glob.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. """
  2. Filename globbing utility. Mostly a copy of `glob` from Python 3.5.
  3. Changes include:
  4. * `yield from` and PEP3102 `*` removed.
  5. * Hidden files are not ignored.
  6. """
  7. import os
  8. import re
  9. import fnmatch
  10. __all__ = ["glob", "iglob", "escape"]
  11. def glob(pathname, recursive=False):
  12. """Return a list of paths matching a pathname pattern.
  13. The pattern may contain simple shell-style wildcards a la
  14. fnmatch. However, unlike fnmatch, filenames starting with a
  15. dot are special cases that are not matched by '*' and '?'
  16. patterns.
  17. If recursive is true, the pattern '**' will match any files and
  18. zero or more directories and subdirectories.
  19. """
  20. return list(iglob(pathname, recursive=recursive))
  21. def iglob(pathname, recursive=False):
  22. """Return an iterator which yields the paths matching a pathname pattern.
  23. The pattern may contain simple shell-style wildcards a la
  24. fnmatch. However, unlike fnmatch, filenames starting with a
  25. dot are special cases that are not matched by '*' and '?'
  26. patterns.
  27. If recursive is true, the pattern '**' will match any files and
  28. zero or more directories and subdirectories.
  29. """
  30. it = _iglob(pathname, recursive)
  31. if recursive and _isrecursive(pathname):
  32. s = next(it) # skip empty string
  33. assert not s
  34. return it
  35. def _iglob(pathname, recursive):
  36. dirname, basename = os.path.split(pathname)
  37. if not has_magic(pathname):
  38. if basename:
  39. if os.path.lexists(pathname):
  40. yield pathname
  41. else:
  42. # Patterns ending with a slash should match only directories
  43. if os.path.isdir(dirname):
  44. yield pathname
  45. return
  46. if not dirname:
  47. if recursive and _isrecursive(basename):
  48. for x in glob2(dirname, basename):
  49. yield x
  50. else:
  51. for x in glob1(dirname, basename):
  52. yield x
  53. return
  54. # `os.path.split()` returns the argument itself as a dirname if it is a
  55. # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
  56. # contains magic characters (i.e. r'\\?\C:').
  57. if dirname != pathname and has_magic(dirname):
  58. dirs = _iglob(dirname, recursive)
  59. else:
  60. dirs = [dirname]
  61. if has_magic(basename):
  62. if recursive and _isrecursive(basename):
  63. glob_in_dir = glob2
  64. else:
  65. glob_in_dir = glob1
  66. else:
  67. glob_in_dir = glob0
  68. for dirname in dirs:
  69. for name in glob_in_dir(dirname, basename):
  70. yield os.path.join(dirname, name)
  71. # These 2 helper functions non-recursively glob inside a literal directory.
  72. # They return a list of basenames. `glob1` accepts a pattern while `glob0`
  73. # takes a literal basename (so it only has to check for its existence).
  74. def glob1(dirname, pattern):
  75. if not dirname:
  76. if isinstance(pattern, bytes):
  77. dirname = os.curdir.encode('ASCII')
  78. else:
  79. dirname = os.curdir
  80. try:
  81. names = os.listdir(dirname)
  82. except OSError:
  83. return []
  84. return fnmatch.filter(names, pattern)
  85. def glob0(dirname, basename):
  86. if not basename:
  87. # `os.path.split()` returns an empty basename for paths ending with a
  88. # directory separator. 'q*x/' should match only directories.
  89. if os.path.isdir(dirname):
  90. return [basename]
  91. else:
  92. if os.path.lexists(os.path.join(dirname, basename)):
  93. return [basename]
  94. return []
  95. # This helper function recursively yields relative pathnames inside a literal
  96. # directory.
  97. def glob2(dirname, pattern):
  98. assert _isrecursive(pattern)
  99. yield pattern[:0]
  100. for x in _rlistdir(dirname):
  101. yield x
  102. # Recursively yields relative pathnames inside a literal directory.
  103. def _rlistdir(dirname):
  104. if not dirname:
  105. if isinstance(dirname, bytes):
  106. dirname = os.curdir.encode('ASCII')
  107. else:
  108. dirname = os.curdir
  109. try:
  110. names = os.listdir(dirname)
  111. except os.error:
  112. return
  113. for x in names:
  114. yield x
  115. path = os.path.join(dirname, x) if dirname else x
  116. for y in _rlistdir(path):
  117. yield os.path.join(x, y)
  118. magic_check = re.compile('([*?[])')
  119. magic_check_bytes = re.compile(b'([*?[])')
  120. def has_magic(s):
  121. if isinstance(s, bytes):
  122. match = magic_check_bytes.search(s)
  123. else:
  124. match = magic_check.search(s)
  125. return match is not None
  126. def _isrecursive(pattern):
  127. if isinstance(pattern, bytes):
  128. return pattern == b'**'
  129. else:
  130. return pattern == '**'
  131. def escape(pathname):
  132. """Escape all special characters.
  133. """
  134. # Escaping is done by wrapping any of "*?[" between square brackets.
  135. # Metacharacters do not work in the drive part and shouldn't be escaped.
  136. drive, pathname = os.path.splitdrive(pathname)
  137. if isinstance(pathname, bytes):
  138. pathname = magic_check_bytes.sub(br'[\1]', pathname)
  139. else:
  140. pathname = magic_check.sub(r'[\1]', pathname)
  141. return drive + pathname