# Copyright (c) 2013, Mahmoud Hashemi
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
#
# * The names of the contributors may not be used to endorse or
# promote products derived from this software without specific
# prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""The Python Garbage Collector (`GC`_) doesn't usually get too much
attention, probably because:
- Python's `reference counting`_ effectively handles the vast majority of
unused objects
- People are slowly learning to avoid implementing `object.__del__()`_
- The collection itself strikes a good balance between simplicity and
power (`tunable generation sizes`_)
- The collector itself is fast and rarely the cause of long pauses
associated with GC in other runtimes
Even so, for many applications, the time will come when the developer
will need to track down:
- Circular references
- Misbehaving objects (locks, ``__del__()``)
- Memory leaks
- Or just ways to shave off a couple percent of execution time
Thanks to the :mod:`gc` module, the GC is a well-instrumented entry
point for exactly these tasks, and ``gcutils`` aims to facilitate it
further.
.. _GC: https://docs.python.org/2/glossary.html#term-garbage-collection
.. _reference counting: https://docs.python.org/2/glossary.html#term-reference-count
.. _object.__del__(): https://docs.python.org/2/glossary.html#term-reference-count
.. _tunable generation sizes: https://docs.python.org/2/library/gc.html#gc.set_threshold
"""
# TODO: type survey
import gc
import sys
__all__ = ['get_all', 'GCToggler', 'toggle_gc', 'toggle_gc_postcollect']
[docs]
def get_all(type_obj, include_subtypes=True):
"""Get a list containing all instances of a given type. This will
work for the vast majority of types out there.
>>> class Ratking(object): pass
>>> wiki, hak, sport = Ratking(), Ratking(), Ratking()
>>> len(get_all(Ratking))
3
However, there are some exceptions. For example, ``get_all(bool)``
returns an empty list because ``True`` and ``False`` are
themselves built-in and not tracked.
>>> get_all(bool)
[]
Still, it's not hard to see how this functionality can be used to
find all instances of a leaking type and track them down further
using :func:`gc.get_referrers` and :func:`gc.get_referents`.
``get_all()`` is optimized such that getting instances of
user-created types is quite fast. Setting *include_subtypes* to
``False`` will further increase performance in cases where
instances of subtypes aren't required.
.. note::
There are no guarantees about the state of objects returned by
``get_all()``, especially in concurrent environments. For
instance, it is possible for an object to be in the middle of
executing its ``__init__()`` and be only partially constructed.
"""
# TODO: old-style classes
if not isinstance(type_obj, type):
raise TypeError('expected a type, not %r' % type_obj)
try:
type_is_tracked = gc.is_tracked(type_obj)
except AttributeError:
type_is_tracked = False # Python 2.6 and below don't get the speedup
if type_is_tracked:
to_check = gc.get_referrers(type_obj)
else:
to_check = gc.get_objects()
if include_subtypes:
ret = [x for x in to_check if isinstance(x, type_obj)]
else:
ret = [x for x in to_check if type(x) is type_obj]
return ret
_IS_PYPY = '__pypy__' in sys.builtin_module_names
if _IS_PYPY:
# pypy's gc is just different, y'all
del get_all
[docs]
class GCToggler:
"""The ``GCToggler`` is a context-manager that allows one to safely
take more control of your garbage collection schedule. Anecdotal
experience says certain object-creation-heavy tasks see speedups
of around 10% by simply doing one explicit collection at the very
end, especially if most of the objects will stay resident.
Two GCTogglers are already present in the ``gcutils`` module:
- :data:`toggle_gc` simply turns off GC at context entrance, and
re-enables at exit
- :data:`toggle_gc_postcollect` does the same, but triggers an
explicit collection after re-enabling.
>>> with toggle_gc:
... x = [object() for i in range(1000)]
Between those two instances, the ``GCToggler`` type probably won't
be used much directly, but is documented for inheritance purposes.
"""
def __init__(self, postcollect=False):
self.postcollect = postcollect
def __enter__(self):
gc.disable()
def __exit__(self, exc_type, exc_val, exc_tb):
gc.enable()
if self.postcollect:
gc.collect()
toggle_gc = GCToggler()
"""A context manager for disabling GC for a code block. See
:class:`GCToggler` for more details."""
toggle_gc_postcollect = GCToggler(postcollect=True)
"""A context manager for disabling GC for a code block, and collecting
before re-enabling. See :class:`GCToggler` for more details."""