Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-38144: Add the root_dir and dir_fd parameters in glob.glob(). #16075

Merged
merged 11 commits into from
Jun 18, 2020
16 changes: 14 additions & 2 deletions Doc/library/glob.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ For example, ``'[?]'`` matches the character ``'?'``.
The :mod:`pathlib` module offers high-level path objects.


.. function:: glob(pathname, *, recursive=False)
.. function:: glob(pathname, *, root_dir=None, dir_fd=None, recursive=False)

Return a possibly-empty list of path names that match *pathname*, which must be
a string containing a path specification. *pathname* can be either absolute
Expand All @@ -45,6 +45,15 @@ For example, ``'[?]'`` matches the character ``'?'``.
symlinks are included in the results (as in the shell). Whether or not the
results are sorted depends on the file system.

If *root_dir* is not ``None``, it should be a :term:`path-like object`
specifying the root directory for searching. It has the same effect on
:func:`glob` as changing the current directory before calling it. If
*pathname* is relative, the result will contain paths relative to
*root_dir*.

This function can support :ref:`paths relative to directory descriptors
<dir_fd>` with the *dir_fd* parameter.

.. index::
single: **; in glob-style wildcards

Expand All @@ -62,8 +71,11 @@ For example, ``'[?]'`` matches the character ``'?'``.
.. versionchanged:: 3.5
Support for recursive globs using "``**``".

.. versionchanged:: 3.10
Added the *root_dir* and *dir_fd* parameters.


.. function:: iglob(pathname, *, recursive=False)
.. function:: iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False)

Return an :term:`iterator` which yields the same values as :func:`glob`
without actually storing them all simultaneously.
Expand Down
8 changes: 8 additions & 0 deletions Doc/whatsnew/3.10.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ New Modules
Improved Modules
================

glob
----

Added the *root_dir* and *dir_fd* parameters in :func:`~glob.glob` and
:func:`~glob.iglob` which allow to specify the root directory for searching.
(Contributed by Serhiy Storchaka in :issue:`38144`.)


Optimizations
=============

Expand Down
147 changes: 102 additions & 45 deletions Lib/glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
import os
import re
import fnmatch
import itertools
import stat
import sys

__all__ = ["glob", "iglob", "escape"]

def glob(pathname, *, recursive=False):
def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False):
"""Return a list of paths matching a pathname pattern.

The pattern may contain simple shell-style wildcards a la
Expand All @@ -18,9 +20,9 @@ def glob(pathname, *, recursive=False):
If recursive is true, the pattern '**' will match any files and
zero or more directories and subdirectories.
"""
return list(iglob(pathname, recursive=recursive))
return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive))

def iglob(pathname, *, recursive=False):
def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False):
"""Return an iterator which yields the paths matching a pathname pattern.

The pattern may contain simple shell-style wildcards a la
Expand All @@ -31,36 +33,43 @@ def iglob(pathname, *, recursive=False):
If recursive is true, the pattern '**' will match any files and
zero or more directories and subdirectories.
"""
sys.audit("glob.glob", pathname, recursive)
it = _iglob(pathname, recursive, False)
if recursive and _isrecursive(pathname):
s = next(it) # skip empty string
assert not s
if root_dir is not None:
root_dir = os.fspath(root_dir)
else:
root_dir = pathname[:0]
it = _iglob(pathname, root_dir, dir_fd, recursive, False)
if not pathname or recursive and _isrecursive(pathname[:2]):
try:
s = next(it) # skip empty string
if s:
it = itertools.chain((s,), it)
except StopIteration:
pass
return it

def _iglob(pathname, recursive, dironly):
def _iglob(pathname, root_dir, dir_fd, recursive, dironly):
dirname, basename = os.path.split(pathname)
if not has_magic(pathname):
assert not dironly
if basename:
if os.path.lexists(pathname):
if _lexists(_join(root_dir, pathname), dir_fd):
yield pathname
else:
# Patterns ending with a slash should match only directories
if os.path.isdir(dirname):
if _isdir(_join(root_dir, dirname), dir_fd):
yield pathname
return
if not dirname:
if recursive and _isrecursive(basename):
yield from _glob2(dirname, basename, dironly)
yield from _glob2(root_dir, basename, dir_fd, dironly)
else:
yield from _glob1(dirname, basename, dironly)
yield from _glob1(root_dir, basename, dir_fd, dironly)
return
# `os.path.split()` returns the argument itself as a dirname if it is a
# drive or UNC path. Prevent an infinite recursion if a drive or UNC path
# contains magic characters (i.e. r'\\?\C:').
if dirname != pathname and has_magic(dirname):
dirs = _iglob(dirname, recursive, True)
dirs = _iglob(dirname, root_dir, dir_fd, recursive, True)
else:
dirs = [dirname]
if has_magic(basename):
Expand All @@ -71,76 +80,121 @@ def _iglob(pathname, recursive, dironly):
else:
glob_in_dir = _glob0
for dirname in dirs:
for name in glob_in_dir(dirname, basename, dironly):
for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly):
yield os.path.join(dirname, name)

# These 2 helper functions non-recursively glob inside a literal directory.
# They return a list of basenames. _glob1 accepts a pattern while _glob0
# takes a literal basename (so it only has to check for its existence).

def _glob1(dirname, pattern, dironly):
names = list(_iterdir(dirname, dironly))
def _glob1(dirname, pattern, dir_fd, dironly):
names = list(_iterdir(dirname, dir_fd, dironly))
if not _ishidden(pattern):
names = (x for x in names if not _ishidden(x))
return fnmatch.filter(names, pattern)

def _glob0(dirname, basename, dironly):
if not basename:
# `os.path.split()` returns an empty basename for paths ending with a
# directory separator. 'q*x/' should match only directories.
if os.path.isdir(dirname):
def _glob0(dirname, basename, dir_fd, dironly):
if basename:
if _lexists(_join(dirname, basename), dir_fd):
return [basename]
else:
if os.path.lexists(os.path.join(dirname, basename)):
# `os.path.split()` returns an empty basename for paths ending with a
# directory separator. 'q*x/' should match only directories.
if _isdir(dirname, dir_fd):
return [basename]
return []

# Following functions are not public but can be used by third-party code.

def glob0(dirname, pattern):
return _glob0(dirname, pattern, False)
return _glob0(dirname, pattern, None, False)

def glob1(dirname, pattern):
return _glob1(dirname, pattern, False)
return _glob1(dirname, pattern, None, False)

# This helper function recursively yields relative pathnames inside a literal
# directory.

def _glob2(dirname, pattern, dironly):
def _glob2(dirname, pattern, dir_fd, dironly):
assert _isrecursive(pattern)
yield pattern[:0]
yield from _rlistdir(dirname, dironly)
yield from _rlistdir(dirname, dir_fd, dironly)

# If dironly is false, yields all file names inside a directory.
# If dironly is true, yields only directory names.
def _iterdir(dirname, dironly):
if not dirname:
if isinstance(dirname, bytes):
dirname = bytes(os.curdir, 'ASCII')
else:
dirname = os.curdir
def _iterdir(dirname, dir_fd, dironly):
try:
with os.scandir(dirname) as it:
for entry in it:
try:
if not dironly or entry.is_dir():
yield entry.name
except OSError:
pass
fd = None
fsencode = None
if dir_fd is not None:
if dirname:
fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd)
else:
arg = dir_fd
if isinstance(dirname, bytes):
fsencode = os.fsencode
elif dirname:
arg = dirname
elif isinstance(dirname, bytes):
arg = bytes(os.curdir, 'ASCII')
else:
arg = os.curdir
try:
with os.scandir(arg) as it:
for entry in it:
try:
if not dironly or entry.is_dir():
if fsencode is not None:
yield fsencode(entry.name)
else:
yield entry.name
except OSError:
pass
finally:
if fd is not None:
os.close(fd)
except OSError:
return

# Recursively yields relative pathnames inside a literal directory.
def _rlistdir(dirname, dironly):
names = list(_iterdir(dirname, dironly))
def _rlistdir(dirname, dir_fd, dironly):
names = list(_iterdir(dirname, dir_fd, dironly))
for x in names:
if not _ishidden(x):
yield x
path = os.path.join(dirname, x) if dirname else x
for y in _rlistdir(path, dironly):
yield os.path.join(x, y)
path = _join(dirname, x) if dirname else x
for y in _rlistdir(path, dir_fd, dironly):
yield _join(x, y)


def _lexists(pathname, dir_fd):
# Same as os.path.lexists(), but with dir_fd
if dir_fd is None:
return os.path.lexists(pathname)
try:
os.lstat(pathname, dir_fd=dir_fd)
except (OSError, ValueError):
return False
else:
return True

def _isdir(pathname, dir_fd):
# Same as os.path.isdir(), but with dir_fd
if dir_fd is None:
return os.path.isdir(pathname)
try:
st = os.stat(pathname, dir_fd=dir_fd)
except (OSError, ValueError):
return False
else:
return stat.S_ISDIR(st.st_mode)

def _join(dirname, basename):
# It is common if dirname or basename is empty
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this comment is necessary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not? It explains why the special case was added.

Or maybe you can suggest better wording?

if not dirname or not basename:
return dirname or basename
return os.path.join(dirname, basename)

magic_check = re.compile('([*?[])')
magic_check_bytes = re.compile(b'([*?[])')

Expand Down Expand Up @@ -171,3 +225,6 @@ def escape(pathname):
else:
pathname = magic_check.sub(r'[\1]', pathname)
return drive + pathname


_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
50 changes: 50 additions & 0 deletions Lib/test/test_glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@


class GlobTests(unittest.TestCase):
dir_fd = None

def norm(self, *parts):
return os.path.normpath(os.path.join(self.tempdir, *parts))
Expand Down Expand Up @@ -38,8 +39,14 @@ def setUp(self):
os.symlink(self.norm('broken'), self.norm('sym1'))
os.symlink('broken', self.norm('sym2'))
os.symlink(os.path.join('a', 'bcd'), self.norm('sym3'))
if {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd:
self.dir_fd = os.open(self.tempdir, os.O_RDONLY | os.O_DIRECTORY)
else:
self.dir_fd = None

def tearDown(self):
if self.dir_fd is not None:
os.close(self.dir_fd)
shutil.rmtree(self.tempdir)

def glob(self, *parts, **kwargs):
Expand All @@ -53,6 +60,41 @@ def glob(self, *parts, **kwargs):
bres = [os.fsencode(x) for x in res]
self.assertCountEqual(glob.glob(os.fsencode(p), **kwargs), bres)
self.assertCountEqual(glob.iglob(os.fsencode(p), **kwargs), bres)

with change_cwd(self.tempdir):
res2 = glob.glob(pattern, **kwargs)
for x in res2:
self.assertFalse(os.path.isabs(x), x)
if pattern == '**' or pattern == '**' + os.sep:
expected = res[1:]
else:
expected = res
self.assertCountEqual([os.path.join(self.tempdir, x) for x in res2],
expected)
self.assertCountEqual(glob.iglob(pattern, **kwargs), res2)
bpattern = os.fsencode(pattern)
bres2 = [os.fsencode(x) for x in res2]
self.assertCountEqual(glob.glob(bpattern, **kwargs), bres2)
self.assertCountEqual(glob.iglob(bpattern, **kwargs), bres2)

self.assertCountEqual(glob.glob(pattern, root_dir=self.tempdir, **kwargs), res2)
serhiy-storchaka marked this conversation as resolved.
Show resolved Hide resolved
self.assertCountEqual(glob.iglob(pattern, root_dir=self.tempdir, **kwargs), res2)
btempdir = os.fsencode(self.tempdir)
self.assertCountEqual(
glob.glob(bpattern, root_dir=btempdir, **kwargs), bres2)
self.assertCountEqual(
glob.iglob(bpattern, root_dir=btempdir, **kwargs), bres2)

if self.dir_fd is not None:
self.assertCountEqual(
glob.glob(pattern, dir_fd=self.dir_fd, **kwargs), res2)
self.assertCountEqual(
glob.iglob(pattern, dir_fd=self.dir_fd, **kwargs), res2)
self.assertCountEqual(
glob.glob(bpattern, dir_fd=self.dir_fd, **kwargs), bres2)
self.assertCountEqual(
glob.iglob(bpattern, dir_fd=self.dir_fd, **kwargs), bres2)

return res

def assertSequencesEqual_noorder(self, l1, l2):
Expand All @@ -78,6 +120,14 @@ def test_glob_literal(self):
res = glob.glob(os.path.join(os.fsencode(os.curdir), b'*'))
self.assertEqual({type(r) for r in res}, {bytes})

def test_glob_empty_pattern(self):
self.assertEqual(glob.glob(''), [])
self.assertEqual(glob.glob(b''), [])
self.assertEqual(glob.glob('', root_dir=self.tempdir), [])
self.assertEqual(glob.glob(b'', root_dir=os.fsencode(self.tempdir)), [])
self.assertEqual(glob.glob('', dir_fd=self.dir_fd), [])
self.assertEqual(glob.glob(b'', dir_fd=self.dir_fd), [])

def test_glob_one_directory(self):
eq = self.assertSequencesEqual_noorder
eq(self.glob('a*'), map(self.norm, ['a', 'aab', 'aaa']))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added the *root_dir* and *dir_fd* parameters in :func:`glob.glob`.