Source code for pynguin.analyses.typesystem

#  This file is part of Pynguin.
#
#  SPDX-FileCopyrightText: 2019–2026 Pynguin Contributors
#
#  SPDX-License-Identifier: MIT
#
"""Provides analyses for a module's type information."""

from __future__ import annotations

import functools
import inspect
import logging
import re
import types
import typing
from abc import ABC, abstractmethod
from collections import Counter, defaultdict
from dataclasses import dataclass, field
from itertools import starmap
from typing import (  # type: ignore[attr-defined]
    Any,
    Final,
    ForwardRef,
    Generic,
    TypeVar,
    _BaseGenericAlias,  # noqa: PLC2701
    _eval_type,  # noqa: PLC2701
    cast,
    get_origin,
)

import networkx as nx
from networkx.drawing.nx_pydot import to_pydot
from typing_inspect import is_union_type

import pynguin.configuration as config
import pynguin.utils.typetracing as tt
from pynguin.analyses.string_subtypes import infer_regex_from_methods
from pynguin.analyses.type_inference import HintInference
from pynguin.utils import randomness
from pynguin.utils.orderedset import OrderedSet
from pynguin.utils.randomness import weighted_choice
from pynguin.utils.type_utils import (
    COLLECTIONS,
    PRIMITIVES,
    get_method_for_signature,
)

if typing.TYPE_CHECKING:
    from collections.abc import Callable, Mapping, Sequence
    from typing import ClassVar

    from pynguin.analyses.module import TypeGuessingStats
    from pynguin.analyses.type_inference import InferenceProvider

_LOGGER = logging.getLogger(__name__)


# The following classes are inspired by
# https://github.com/python/mypy/blob/master/mypy/types.py and most likely incomplete.
# The plan is to gradually expand this type representation.


T = TypeVar("T")


[docs] class ProperType(ABC): """Base class for all types. Might have to add another layer, like mypy's Type?. All subclasses of this class are immutable. """
[docs] @abstractmethod def accept(self, visitor: TypeVisitor[T]) -> T: """Accept a type visitor. Args: visitor: the visitor """
def __str__(self) -> str: return self.accept(TypeStringVisitor()) def __repr__(self) -> str: return self.accept(TypeReprVisitor()) def __lt__(self, other): return str(self) < str(other)
[docs] class AnyType(ProperType): """The Any Type."""
[docs] def accept(self, visitor: TypeVisitor[T]) -> T: # noqa: D102 return visitor.visit_any_type(self)
def __hash__(self): return hash(AnyType) def __eq__(self, other): return isinstance(other, AnyType)
[docs] class NoneType(ProperType): """The None type."""
[docs] def accept(self, visitor: TypeVisitor[T]) -> T: # noqa: D102 return visitor.visit_none_type(self)
def __hash__(self): return hash(NoneType) def __eq__(self, other): return isinstance(other, NoneType)
[docs] class Instance(ProperType): """An instance type of form C[T1, ..., Tn]. C is a class. Args can be empty. """ def __init__( # noqa: D107 self, typ: TypeInfo, args: tuple[ProperType, ...] | None = None ): assert typ.raw_type is not tuple, "Use TupleType instead!" self.type = typ if args is None: args = () self.args: Final[tuple[ProperType, ...]] = tuple(args) # Cached hash value self._hash: int | None = None
[docs] def accept(self, visitor: TypeVisitor[T]) -> T: # noqa: D102 return visitor.visit_instance(self)
def __hash__(self): if self._hash is None: self._hash = hash((self.type, self.args)) return self._hash def __eq__(self, other): return isinstance(other, Instance) and self.type == other.type and self.args == other.args
[docs] class TupleType(ProperType): """Tuple type Tuple[T1, ..., Tn]. Note that tuple is a special case and intentionally not `Instance(TypeInfo(tuple))` because tuple is varargs generic. """ def __init__( # noqa: D107 self, args: tuple[ProperType, ...], *, unknown_size: bool = False ): self.args: Final[tuple[ProperType, ...]] = args self.unknown_size: Final[bool] = unknown_size # Cached hash value self._hash: int | None = None
[docs] def accept(self, visitor: TypeVisitor[T]) -> T: # noqa: D102 return visitor.visit_tuple_type(self)
def __hash__(self): if self._hash is None: self._hash = hash((self.args, self.unknown_size)) return self._hash def __eq__(self, other): return ( isinstance(other, TupleType) and self.args == other.args and self.unknown_size == other.unknown_size )
[docs] class UnionType(ProperType): """The union type Union[T1, ..., Tn] (at least one type argument).""" def __init__(self, items: tuple[ProperType, ...]): # noqa: D107 self.items: Final[tuple[ProperType, ...]] = items # TODO(fk) think about flattening Unions, also order should not matter. assert len(self.items) > 0 # Cached hash value self._hash: int | None = None
[docs] def accept(self, visitor: TypeVisitor[T]) -> T: # noqa: D102 return visitor.visit_union_type(self)
def __hash__(self): if self._hash is None: self._hash = hash(self.items) return self._hash def __eq__(self, other): return isinstance(other, UnionType) and self.items == other.items
[docs] class StringSubtype(ProperType): """A subtype of str determined by its regex. TODO(lk): Add support to all visitors. """ def __init__(self, regex: re.Pattern): # noqa: D107 self.regex = regex self._hash: int | None = None def __eq__(self, other): return isinstance(other, StringSubtype) and other.regex == self.regex def __hash__(self): return hash(self.regex) def __str__(self): return f"StringSubtype({self.regex})"
[docs] def accept(self, visitor: TypeVisitor[T]) -> T: # noqa: D102 return visitor.visit_string_subtype(self)
[docs] class NumericString(StringSubtype): """A string that only contains numerical characters and an optional decimal point. Examples: "123", "123.45", "0.001" """ def __init__(self): # noqa: D107 super().__init__(re.compile(r"^\d+(\.\d+)?$"))
[docs] class EmailString(StringSubtype): """A string that follows the general pattern of an email address. Example: "name@domain.edu". """ def __init__(self): # noqa: D107 super().__init__(re.compile(r"^[^@]+@[^@]+\.[^@]+$"))
[docs] class HexadecimalString(StringSubtype): """A string that represents a hexadecimal number. Examples: "0x1A3F", "1a3f", "0XABCDEF". """ def __init__(self): # noqa: D107 super().__init__(re.compile(r"^(0x|0X)?[0-9a-fA-F]+$"))
[docs] class ISOColorString(StringSubtype): """A string that represents a hexadecimal color code. Examples: "#FFFFFF", "#000000", "#FF5733". """ def __init__(self): # noqa: D107 super().__init__(re.compile(r"^#([0-9a-fA-F]{6}|[0-9a-fA-F]{3})$"))
[docs] class UUIDString(StringSubtype): """A string that represents a UUID. Examples: "123e4567-e89b-12d3-a456-426614174000". """ def __init__(self): # noqa: D107 super().__init__( re.compile( r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$" ) )
[docs] class ISODateString(StringSubtype): """A string that represents a date in ISO 8601 format. Examples: "2023-10-05", "1999-12-31". """ def __init__(self): # noqa: D107 super().__init__(re.compile(r"^\d{4}-\d{2}-\d{2}$"))
[docs] class ISOTimeString(StringSubtype): """A string that represents a time in ISO 8601 format. Examples: "14:30:00", "23:59:59". """ def __init__(self): # noqa: D107 super().__init__(re.compile(r"^\d{2}:\d{2}:\d{2}$"))
[docs] class CSVString(StringSubtype): """A string that represents comma-separated values. Examples: "value1,value2,value3", "itemA,itemB,itemC". """ def __init__(self): # noqa: D107 super().__init__(re.compile(r"^([^,]+,)*[^,]+$"))
[docs] class URLString(StringSubtype): """A string that represents a URL. Examples: "http://example.com", "https://www.domain.org/path?query=param". """ def __init__(self): # noqa: D107 super().__init__(re.compile(r"^(https?|ftp)://[^\s/$.?#].[^\s]*$"))
[docs] class IPv4String(StringSubtype): """A string that represents an IPv4 address. Examples: "192.168.0.0", "74.5.123.31". """ def __init__(self): # noqa: D107 super().__init__( re.compile( r"^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\." r"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\." r"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\." r"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$" ) )
[docs] class IPv6String(StringSubtype): """A string that represents an IPv6 address. Examples: "2001:0db8:85a3:0000:0000:8a2e:0370:7334", "fe80::1ff:fe23:4567:890a". """ def __init__(self): # noqa: D107 super().__init__(re.compile(r"^([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}$"))
[docs] class PhoneNumberString(StringSubtype): """A string that represents a phone number. Examples: "+1-800-555-1234", "(123) 456-7890". """ def __init__(self): # noqa: D107 super().__init__(re.compile(r"^\+?[\d\s\-\(\)]+$"))
[docs] class SHA256String(StringSubtype): """A string that represents a SHA-256 hash. Examples: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855". """ def __init__(self): # noqa: D107 super().__init__(re.compile(r"^[a-fA-F0-9]{64}$"))
[docs] class Unsupported(ProperType): """Marks an unsupported type in the type system. Artificial type which represents a type that is currently not supported by our type abstraction. This is purely used for statistic purposes and should not be encountered during regular use. """ def __eq__(self, other): return isinstance(other, Unsupported) def __hash__(self): return hash(Unsupported)
[docs] def accept(self, visitor: TypeVisitor[T]) -> T: # noqa: D102 return visitor.visit_unsupported_type(self)
# Static to instances to avoid repeated construction. ANY = AnyType() NONE_TYPE = NoneType() UNSUPPORTED = Unsupported()
[docs] class TypeVisitor(Generic[T]): """A type visitor. Note that the parameter of the visit_* methods is called 'left', because it makes the implementations of _SubTypeVisitor and _MaybeSubTypeVisitor more clear and Python does not like changing the names of parameters in subclasses, thus we renamed them in this class. """
[docs] @abstractmethod def visit_any_type(self, left: AnyType) -> T: """Visit the Any type. Args: left: the Any type Returns: result of the visit """
[docs] @abstractmethod def visit_none_type(self, left: NoneType) -> T: """Visit the None type. Args: left: the None type Returns: result of the visit """
[docs] @abstractmethod def visit_instance(self, left: Instance) -> T: """Visit an instance. Args: left: instance Returns: result of the visit """
[docs] @abstractmethod def visit_tuple_type(self, left: TupleType) -> T: """Visit a tuple type. Args: left: tuple Returns: result of the visit """
[docs] @abstractmethod def visit_union_type(self, left: UnionType) -> T: """Visit a union. Args: left: union Returns: result of the visit """
[docs] @abstractmethod def visit_unsupported_type(self, left: Unsupported) -> T: """Visit unsupported type. Args: left: unsupported Returns: result of the visit """
[docs] def visit_string_subtype(self, left: StringSubtype) -> T: """Visit a string subtype. Args: left: string subtype Returns: Result of the visit """ return None # type: ignore[return-value]
class _PartialTypeMatch(TypeVisitor[ProperType | None]): """A type visitor to check for base type matches.""" def __init__(self, right: ProperType): self.right = right def visit_any_type(self, left: AnyType) -> ProperType | None: # Any is not guessed/recorded return None def visit_none_type(self, left: NoneType) -> ProperType | None: if isinstance(self.right, NoneType): return NONE_TYPE return None def visit_instance(self, left: Instance) -> ProperType | None: if isinstance(self.right, Instance) and left.type == self.right.type: return Instance(left.type) return None def visit_tuple_type(self, left: TupleType) -> ProperType | None: if isinstance(self.right, TupleType): return TupleType(()) return None def visit_union_type(self, left: UnionType) -> ProperType | None: matches: tuple[ProperType, ...] = tuple( elem for elem in (_is_partial_type_match(left_elem, self.right) for left_elem in left.items) if elem ) if matches: return UnionType(matches) return None def visit_unsupported_type(self, left: Unsupported) -> ProperType | None: # Cannot compare. return None def _is_partial_type_match(left: ProperType, right: ProperType) -> ProperType | None: """Is left a partial type match of right? This is only useful for statistics purposes, i.e., do we have a fuzzy type match? Args: left: The guessed type right: The ground truth Returns: The partial match, if Any. """ if isinstance(right, UnionType): matches: tuple[ProperType, ...] = tuple( elem for elem in (_is_partial_type_match(left, right_elem) for right_elem in right.items) if elem is not None ) if matches: flattened: set[ProperType] = set() for match in matches: if isinstance(match, UnionType): flattened.update(match.items) else: flattened.add(match) return UnionType(tuple(sorted(flattened))) return None return left.accept(_PartialTypeMatch(right))
[docs] class TypeStringVisitor(TypeVisitor[str]): """A simple visitor to convert a proper type to a string."""
[docs] def visit_any_type(self, left: AnyType) -> str: # noqa: D102 return "Any"
[docs] def visit_none_type(self, left: NoneType) -> str: # noqa: D102 return "None"
[docs] def visit_instance(self, left: Instance) -> str: # noqa: D102 rep = left.type.name if left.type.module == "builtins" else left.type.full_name if len(left.args) > 0: rep += "[" + self._sequence_str(left.args) + "]" return rep
[docs] def visit_tuple_type(self, left: TupleType) -> str: # noqa: D102 rep = "tuple" if len(left.args) > 0: rep += "[" + self._sequence_str(left.args) + "]" return rep
[docs] def visit_union_type(self, left: UnionType) -> str: # noqa: D102 if len(left.items) == 1: return left.items[0].accept(self) return self._sequence_str(left.items, sep=" | ")
def _sequence_str(self, typs: Sequence[ProperType], sep=", ") -> str: return sep.join(t.accept(self) for t in typs)
[docs] def visit_unsupported_type(self, left: Unsupported) -> str: # noqa: D102 return "<?>"
[docs] class TypeReprVisitor(TypeVisitor[str]): """A simple visitor to create a repr from a proper type."""
[docs] def visit_any_type(self, left: AnyType) -> str: # noqa: D102 return "AnyType()"
[docs] def visit_none_type(self, left: NoneType) -> str: # noqa: D102 return "NoneType()"
[docs] def visit_instance(self, left: Instance) -> str: # noqa: D102 rep = f"Instance({left.type!r}" if len(left.args) > 0: rep += "(" + self._sequence_str(left.args) + ")" return rep + ")"
[docs] def visit_tuple_type(self, left: TupleType) -> str: # noqa: D102 return f"TupleType({self._sequence_str(left.args)})"
[docs] def visit_union_type(self, left: UnionType) -> str: # noqa: D102 return f"UnionType({self._sequence_str(left.items)})"
def _sequence_str(self, typs: Sequence[ProperType]) -> str: return ", ".join(t.accept(self) for t in typs)
[docs] def visit_unsupported_type(self, left: Unsupported) -> str: # noqa: D102 return "Unsupported()"
class _SubtypeVisitor(TypeVisitor[bool]): """A visitor to check the subtyping relationship between two types. Checks whether left is a subtype of right. There is no need to check 'right' for AnyType, as this is done outside. """ def __init__( self, graph: TypeSystem, right: ProperType, sub_type_check: Callable[[ProperType, ProperType], bool], ): """Create new visitor. Args: graph: The inheritance graph. right: The right type. sub_type_check: The subtype check to use """ self.graph = graph self.right = right self.sub_type_check = sub_type_check def visit_any_type(self, left: AnyType) -> bool: # Any wins always return True def visit_none_type(self, left: NoneType) -> bool: # None cannot be subtyped # TODO(fk) handle protocols, e.g., hashable. return isinstance(self.right, NoneType) def visit_instance(self, left: Instance) -> bool: if isinstance(self.right, Instance): if not self.graph.is_subclass(left.type, self.right.type): return False if ( left.type.num_hardcoded_generic_parameters == self.right.type.num_hardcoded_generic_parameters and left.type.num_hardcoded_generic_parameters is not None ): # TODO(fk) handle generics properly :( # We only check hard coded generics for now and treat them as invariant, # i.e., set[T1] <: set[T2] <=> T1 <: T2 and T2 <: T1 return all( self.sub_type_check(left_elem, right_elem) and self.sub_type_check(right_elem, left_elem) for left_elem, right_elem in zip(left.args, self.right.args, strict=True) ) return True return False def visit_tuple_type(self, left: TupleType) -> bool: if isinstance(self.right, TupleType): if len(left.args) != len(self.right.args): # TODO(fk) Handle unknown size. return False return all(starmap(self.sub_type_check, zip(left.args, self.right.args, strict=True))) return False def visit_union_type(self, left: UnionType) -> bool: return all(self.sub_type_check(left_elem, self.right) for left_elem in left.items) def visit_unsupported_type(self, left: Unsupported) -> bool: raise NotImplementedError("This type shall not be used during runtime") class _SubtypeDistanceVisitor(TypeVisitor[int | None]): """A visitor to compute the subtype distance between two types. The subtype distance is the length of the shortest path in the inheritance graph between the two types. The AnyType is treated as a sub- and supertype of everything, but with a high distance. For all not supported types, None is returned indicating that the elements are not connected. """ def __init__( self, graph: TypeSystem, subtype: ProperType, any_distance: int = config.configuration.generator_selection.generator_any_distance, ): """Create a new visitor. Args: graph: The inheritance graph. subtype: The subtype to calculate the distance to. any_distance: The distance for the AnyType. """ self.graph = graph self.subtype = subtype self.any_distance = any_distance def visit_any_type(self, supertype: AnyType) -> int: return self.any_distance def visit_none_type(self, supertype: NoneType) -> None: return None def visit_instance(self, supertype: Instance) -> int | None: """Calculate the distance between two instances. Uses the distance in the inheritance graph between the two types. For types with arguments, e.g. list or dict, the sum of the distances of the arguments is returned. For the UnionType, the minimum distance is to any of the arguments is returned. For the AnyType, the distance is always the any_distance. Args: supertype: The supertype to calculate the distance to. Returns: The distance between the two types or None if they are not connected. """ if isinstance(self.subtype, Instance): if supertype.args and self.subtype.args: distances = list( map(self.graph.subtype_distance, supertype.args, self.subtype.args) ) if any(dist is None for dist in distances): return None return sum(distances) # type: ignore[arg-type] return self.graph.get_shortest_path_length(supertype.type, self.subtype.type) if isinstance(self.subtype, UnionType): distances = [ self.graph.subtype_distance(supertype, elem) for elem in self.subtype.items ] valid_distances = [dist for dist in distances if dist is not None] if valid_distances: return min(valid_distances) return None if isinstance(self.subtype, AnyType): return self.any_distance return None def visit_tuple_type(self, supertype: TupleType) -> int | None: """Calculate the distance between two tuple types. The length of the arguments must match. If so, the result is the sum of the distances of the arguments. Args: supertype: The supertype to calculate the distance to. Returns: The distance between the two types or None if they are not connected. """ if isinstance(self.subtype, TupleType) and len(supertype.args) == len(self.subtype.args): distances = list(map(self.graph.subtype_distance, supertype.args, self.subtype.args)) if any(dist is None for dist in distances): return None return sum(distances) # type: ignore[arg-type] return None def visit_union_type(self, supertype: UnionType) -> int | None: """Calculate the distance between two union types. The minimum distance between any of the arguments is returned. Args: supertype: The supertype to calculate the distance to. Returns: The distance between the two types or None if they are not connected. """ distances = [self.graph.subtype_distance(elem, self.subtype) for elem in supertype.items] valid_distances = [dist for dist in distances if dist is not None] if valid_distances: return min(valid_distances) return None def visit_unsupported_type(self, supertype: Unsupported) -> int | None: raise NotImplementedError("This type shall not be used during runtime") class _MaybeSubtypeVisitor(_SubtypeVisitor): """A weaker subtype check, which only checks if left may be a subtype of right. For example, tuple[str | int | bytes, str | int | bytes] is not a subtype of tuple[int, int], but the actual return value may be. """ def visit_union_type(self, left: UnionType) -> bool: return any(self.sub_type_check(left_elem, self.right) for left_elem in left.items) def visit_unsupported_type(self, left: Unsupported) -> bool: raise NotImplementedError("This type shall not be used during runtime") class _CollectionTypeVisitor(TypeVisitor[bool]): # No tuple because it is a separate type. Collections: ClassVar[set[type]] = {dict, list, set} def visit_any_type(self, left: AnyType) -> bool: return False def visit_none_type(self, left: NoneType) -> bool: return False def visit_instance(self, left: Instance) -> bool: return left.type.raw_type in _CollectionTypeVisitor.Collections def visit_tuple_type(self, left: TupleType) -> bool: return True def visit_union_type(self, left: UnionType) -> bool: return False def visit_unsupported_type(self, left: Unsupported) -> bool: raise NotImplementedError("This type shall not be used during runtime") is_collection_type = _CollectionTypeVisitor() class _PrimitiveTypeVisitor(TypeVisitor[bool]): Primitives: ClassVar[set[type]] = {int, str, bool, float, complex, bytes, type} def visit_any_type(self, left: AnyType) -> bool: return False def visit_none_type(self, left: NoneType) -> bool: return False def visit_instance(self, left: Instance) -> bool: return left.type.raw_type in _PrimitiveTypeVisitor.Primitives def visit_tuple_type(self, left: TupleType) -> bool: return False def visit_union_type(self, left: UnionType) -> bool: return False def visit_unsupported_type(self, left: Unsupported) -> bool: raise NotImplementedError("This type shall not be used during runtime") is_primitive_type = _PrimitiveTypeVisitor()
[docs] class TypeInfo: """A small wrapper around type, i.e., classes. Corresponds 1:1 to a class. """ def __init__(self, raw_type: type | types.UnionType): """Create type info from the given type. Don't use this constructor directly (unless for testing purposes), instead ask the inheritance graph to give you a type info for the given raw type. Naming in python is somehow misleading, 'type' actually only represents classes, but not any more complex types. Args: raw_type: the raw (class) type """ self.raw_type = raw_type self.name, self.qualname, self.module = TypeInfo._extract_name_qualname_module(raw_type) self.full_name = TypeInfo.to_full_name(raw_type) self.hash = hash(self.full_name) self.is_abstract = inspect.isabstract(raw_type) # TODO(fk) store more information on attributes self.instance_attributes: OrderedSet[str] = OrderedSet() self.attributes: OrderedSet[str] = OrderedSet() # TODO(fk) properly implement generics! # For now we just store the number of generic parameters for set, dict and list. self.num_hardcoded_generic_parameters: int | None = ( 2 if raw_type is dict else 1 if raw_type in {set, list} else None ) @staticmethod def _extract_name_qualname_module(raw_type: type | types.UnionType) -> tuple[str, str, str]: """Extract the name, qualname and module from the given type. While type has a __name__, __qualname__ and __module__ attribute, UnionType does not. This caused a crash which is resolved by special handling of UnionType. As fallback, we use the type of the given type, which worked for the UnionType. """ if isinstance(raw_type, types.UnionType): name = "UnionType" qualname = "UnionType" module = "types" return name, qualname, module name = TypeInfo.get_dunder_value_from_type(raw_type, "name") qualname = TypeInfo.get_dunder_value_from_type(raw_type, "qualname") module = TypeInfo.get_dunder_value_from_type(raw_type, "module") return name, qualname, module
[docs] @staticmethod def to_full_name(typ: type | types.UnionType) -> str: # noqa: D417 """Get the full name of the given type. While `type` has a `__name__`, `__qualname__` and `__module__` attribute, `UnionType` does not. This caused a crash which is resolved by special handling of `UnionType`. Args: `typ`: The `type` for which we want a full name. Returns: The fully qualified name """ if isinstance(typ, types.UnionType): return "types.UnionType" module = TypeInfo.get_dunder_value_from_type(typ, "module") qualname = TypeInfo.get_dunder_value_from_type(typ, "qualname") return f"{module}.{qualname}"
[docs] @staticmethod def get_dunder_value_from_type(typ: type, name: str) -> str: """Get the dunder value with the given name from the given type. If the given type has no dunder attribute with the given name, we fall back to using the type of the given typ (== a value in this case). This worked for the UnionType. Args: typ: The type from which to get the attribute. name: The name of the dunder attribute to get. Returns: The value of the dunder attribute. """ dunder_name = f"__{name}__" if hasattr(typ, dunder_name): return getattr(typ, dunder_name) _LOGGER.error( "%s has no attribute __%s__. This method must not be called with instances of types.", typ, name, ) return getattr(type(typ), dunder_name)
def __eq__(self, other) -> bool: return isinstance(other, TypeInfo) and other.full_name == self.full_name def __hash__(self): return self.hash def __repr__(self): return f"TypeInfo({self.full_name})"
[docs] class NamedDefaultDict(dict[str, tt.UsageTraceNode]): # noqa: FURB189 """A default dictionary that automatically creates nodes for keys. Default dict which automatically creates a UsageTraceNode for each requested and non-existing key. """ def __missing__(self, key): # Create node for missing attribute res = self[key] = tt.UsageTraceNode(key) return res
# If one of these methods was called on a proxy, we can use the argument type # to make guesses. # __mul__ and __rmul__ are not reliable, as they don't necessarily have to indicate # the type, for example, [1,2] * 3 is well-defined between a list and an int. _ARGUMENT_ATTRIBUTES = OrderedSet([ "__eq__", "__ne__", "__lt__", "__le__", "__gt__", "__ge__", "__add__", "__radd__", "__sub__", "__rsub__", "__truediv__", "__rtruediv__", "__floordiv__", "__rfloordiv__", ]) # If we suspect the type to be a string and one of these methods was called on a proxy, # we can use the argument values to try and infer a string subtype. _STRING_SUBTYPE_ATTRIBUTES = OrderedSet([ "startswith", "endswith", "split", "rsplit", "splitlines", "partition", "rpartition", "find", "rfind", "index", "rindex", "replace", "format", "join", "strip", "lstrip", "rstrip", "zfill", "center", "ljust", "rjust", "removeprefix", "removesuffix", "translate", "count", ]) STRING_SUBTYPES = OrderedSet([ NumericString, EmailString, HexadecimalString, ISOColorString, UUIDString, ISODateString, ISOTimeString, CSVString, URLString, IPv4String, IPv6String, PhoneNumberString, SHA256String, ]) # We can guess the element type by looking at the knowledge from these _LIST_ELEMENT_ATTRIBUTES = OrderedSet(("__iter__", "__getitem__")) _DICT_KEY_ATTRIBUTES = OrderedSet(("__iter__",)) _DICT_VALUE_ATTRIBUTES = OrderedSet(("__getitem__",)) _SET_ELEMENT_ATTRIBUTES = OrderedSet(("__iter__",)) _TUPLE_ELEMENT_ATTRIBUTES = OrderedSet(("__iter__", "__getitem__")) # We can guess generic type(s) from the argument type(s) of these methods: _LIST_ELEMENT_FROM_ARGUMENT_TYPES = OrderedSet(("__contains__", "__delitem__")) _SET_ELEMENT_FROM_ARGUMENT_TYPES = OrderedSet(("__contains__", "__delitem__")) _DICT_KEY_FROM_ARGUMENT_TYPES = OrderedSet(( "__contains__", "__delitem__", "__getitem__", "__setitem__", )) _DICT_VALUE_FROM_ARGUMENT_TYPES = OrderedSet(("__setitem__",)) _TUPLE_ELEMENT_FROM_ARGUMENT_TYPES = OrderedSet(("__contains__",)) # Similar to above, but these are not dunder methods but are called, # e.g., for 'append', we need to search for 'append.__call__(...)' _LIST_ELEMENT_FROM_ARGUMENT_TYPES_PATH: OrderedSet[tuple[str, ...]] = OrderedSet([ ("append", "__call__"), ("remove", "__call__"), ]) _SET_ELEMENT_FROM_ARGUMENT_TYPES_PATH: OrderedSet[tuple[str, ...]] = OrderedSet([ ("add", "__call__"), ("remove", "__call__"), ("discard", "__call__"), ]) # Nothing for tuple and dict. _EMPTY_SET: OrderedSet[tuple[str, ...]] = OrderedSet()
[docs] @dataclass(eq=False, repr=False) class InferredSignature: """Encapsulates the types inferred for a method.""" # Signature inferred from inspect, only useful to get non-type related information # on parameters signature: inspect.Signature # The return type original_return_type: ProperType # A dict mapping every parameter name to its type original_parameters: dict[str, ProperType] # Proxy knowledge learned from executions usage_trace: dict[str, tt.UsageTraceNode] = field( default_factory=NamedDefaultDict, init=False, ) # Reference to the used type system. type_system: TypeSystem # Return type might be updated, which is stored here. return_type: ProperType = field(init=False) # The currently guessed parameter types. Guessing will never result in Any, as # that is not a useful guess. current_guessed_parameters: dict[str, list[ProperType]] = field( init=False, default_factory=dict ) # Parameter types of each parameter, including unsupported types, # i.e., types that we currently cannot understand/parse. Purely used for statistics # purposes! parameters_for_statistics: dict[str, ProperType] = field(default_factory=dict) return_type_for_statistics: ProperType = ANY def __post_init__(self): self.return_type = self.original_return_type def __str__(self): return str(self.signature)
[docs] def get_parameter_types( self, signature_memo: dict[InferredSignature, dict[str, ProperType]] ) -> dict[str, ProperType]: """Get a possible type signature for the parameters. This method may choose a random type signature, or return the original one or create one based on the observed knowledge. Args: signature_memo: A memo that stores already chosen signatures, so that we don't choose another signature in the same run. This is required for certain operations in the test factory to be consistent. Returns: A dict of chosen parameter types for each parameter. """ if (sig := signature_memo.get(self)) is not None: # We already chose a signature return sig res: dict[str, ProperType] = {} test_conf = config.configuration.test_creation for param_name, orig_type in self.original_parameters.items(): if len(self.usage_trace[param_name]) > 0: # If we have information from proxies, update guess. self._update_guess( param_name, self._guess_parameter_type( self.usage_trace[param_name], self.signature.parameters[param_name].kind, ), ) # Choose from: # - Reusing developer annotated types # - Guessed types from proxies # - NoneType # - AnyType, i.e., disregard type choices: list[ProperType] = [NONE_TYPE, ANY] weights: list[float] = [test_conf.none_weight, test_conf.any_weight] if not isinstance(orig_type, AnyType): # Only add the original type to the choices, if it is not Any. choices.append(orig_type) weights.append(test_conf.original_type_weight) if (guessed := self.current_guessed_parameters.get(param_name)) is not None: # We have traced types choices.append(UnionType(tuple(sorted(guessed)))) weights.append(test_conf.type_tracing_weight) chosen = randomness.choices(choices, weights)[0] if ( randomness.next_float() < config.configuration.test_creation.wrap_var_param_type_probability ): # Wrap var-positional or var-keyword parameters in list/dict, # with a certain probability, as there might also be other data # structures suitable for being passed by * and **, e.g., # generators, tuples, etc. chosen = self.type_system.wrap_var_param_type( chosen, self.signature.parameters[param_name].kind, ) res[param_name] = chosen signature_memo[self] = res return res
def _update_guess(self, name: str, guessed: ProperType | None): if guessed is None: return if (old := self.current_guessed_parameters.get(name)) is None: self.current_guessed_parameters[name] = [guessed] else: if guessed in old: return if len(old) >= config.configuration.test_creation.type_tracing_kept_guesses: # Drop first guess old.pop(0) # append current old.append(guessed) def _guess_parameter_type(self, knowledge: tt.UsageTraceNode, kind) -> ProperType | None: """Guess a type for a parameter. Args: knowledge: The name of the parameter. kind: the kind of parameter. Returns: A guessed type for the given parameter name, or None, if no educated guess can be made. """ match kind: case inspect.Parameter.VAR_KEYWORD: # Case for **kwargs parameter # We know that it is always dict[str, ?]. # We can guess the unknown type by looking at the knowledge of # __getitem__ of the proxy. if (get_item_knowledge := knowledge.children.get("__getitem__")) is not None: return self._guess_parameter_type_from(get_item_knowledge) case inspect.Parameter.VAR_POSITIONAL: # Case for *args parameter # We know that it is always list[?] # Similar to above. if (iter_knowledge := knowledge.children.get("__iter__")) is not None: return self._guess_parameter_type_from(iter_knowledge) case _: return self._guess_parameter_type_from(knowledge) return None def _from_type_check(self, knowledge: tt.UsageTraceNode) -> ProperType | None: # Type checks is not empty here. return self._choose_type_or_negate( OrderedSet([self.type_system.to_type_info(randomness.choice(knowledge.type_checks))]) ) def _from_attr_table(self, knowledge: tt.UsageTraceNode) -> ProperType | None: random_attribute = randomness.choice(list(knowledge.children)) arg_types = knowledge.children[random_attribute].children["__call__"].arg_types[0] def _string_subtype() -> ProperType | None: return self._from_str_values(knowledge) def _argument_type() -> ProperType | None: selected_arg_type = randomness.choice(knowledge.children[random_attribute].arg_types[0]) return self._choose_type_or_negate( OrderedSet([self.type_system.to_type_info(selected_arg_type)]) ) def _attribute_table() -> ProperType | None: return self._choose_type_or_negate(self.type_system.find_by_attribute(random_attribute)) weights = { _string_subtype: config.configuration.type_inference.type_tracing_subtype_weight, _argument_type: config.configuration.type_inference.type_tracing_argument_type_weight, _attribute_table: config.configuration.type_inference.type_tracing_attribute_weight, } available_strategies = {} # Check if string subtype inference is applicable if ( config.configuration.type_inference.subtype_inference == config.SubtypeInferenceStrategy.STRING and len(arg_types) > 0 ): candidate_arg_type = randomness.choice(arg_types) if candidate_arg_type is str and random_attribute in _STRING_SUBTYPE_ATTRIBUTES: available_strategies[_string_subtype] = weights[_string_subtype] # Check if argument-type inference is applicable if ( random_attribute in _ARGUMENT_ATTRIBUTES and knowledge.children[random_attribute].arg_types[0] ): available_strategies[_argument_type] = weights[_argument_type] # Attribute-table inference is always available available_strategies[_attribute_table] = weights[_attribute_table] # Chose and execute strategy chosen_strategy = weighted_choice(available_strategies) return chosen_strategy() @staticmethod def _from_str_values(knowledge: tt.UsageTraceNode) -> StringSubtype | None: """Try to infer a string subtype from the given knowledge.""" # Get all arg values from the knowledge called_str_methods: Mapping[str, OrderedSet[str]] = defaultdict(OrderedSet) for child in knowledge.children: if child in _STRING_SUBTYPE_ATTRIBUTES: for arg_values in knowledge.children[child].children["__call__"].arg_values[0]: if isinstance(arg_values, str): called_str_methods[child].add(arg_values) if not called_str_methods: return None regex = infer_regex_from_methods(called_str_methods) return StringSubtype(regex) def _guess_parameter_type_from( self, knowledge: tt.UsageTraceNode, recursion_depth: int = 0 ) -> ProperType | None: guess_from: list[Callable[[tt.UsageTraceNode], ProperType | None]] = [] if knowledge.type_checks: guess_from.append(self._from_type_check) if knowledge.children: guess_from.append(self._from_attr_table) if not guess_from: return None guessed_type: ProperType | None = randomness.choice(guess_from)(knowledge) if recursion_depth <= 1 and guessed_type and guessed_type.accept(is_collection_type): guessed_type = self._guess_generic_type_parameters_for_builtins( guessed_type, knowledge, recursion_depth ) return guessed_type def _guess_generic_type_parameters_for_builtins( self, guessed_type: ProperType, knowledge: tt.UsageTraceNode, recursion_depth: int, ): # If it is a builtin collection, we may be able to make further guesses on # the generic types. if isinstance(guessed_type, Instance): args = guessed_type.args match guessed_type.type.full_name: case "builtins.list": guessed_element_type = self._guess_generic_arguments( knowledge, recursion_depth, _LIST_ELEMENT_ATTRIBUTES, _LIST_ELEMENT_FROM_ARGUMENT_TYPES, _LIST_ELEMENT_FROM_ARGUMENT_TYPES_PATH, argument_idx=0, ) args = (guessed_element_type or guessed_type.args[0],) case "builtins.set": guessed_element_type = self._guess_generic_arguments( knowledge, recursion_depth, _SET_ELEMENT_ATTRIBUTES, _SET_ELEMENT_FROM_ARGUMENT_TYPES, _SET_ELEMENT_FROM_ARGUMENT_TYPES_PATH, argument_idx=0, ) args = (guessed_element_type or guessed_type.args[0],) case "builtins.dict": guessed_key_type = self._guess_generic_arguments( knowledge, recursion_depth, _DICT_KEY_ATTRIBUTES, _DICT_KEY_FROM_ARGUMENT_TYPES, _EMPTY_SET, argument_idx=0, ) guessed_value_type = self._guess_generic_arguments( knowledge, recursion_depth, _DICT_VALUE_ATTRIBUTES, _DICT_VALUE_FROM_ARGUMENT_TYPES, _EMPTY_SET, argument_idx=1, ) args = ( guessed_key_type or guessed_type.args[0], guessed_value_type or guessed_type.args[1], ) guessed_type = Instance(guessed_type.type, args) elif isinstance(guessed_type, TupleType): # Guess random size of tuple. num_elements = randomness.next_int( 1, config.configuration.test_creation.collection_size ) elements = [] for _ in range(num_elements): guessed_element_type = self._guess_generic_arguments( knowledge, recursion_depth, _TUPLE_ELEMENT_ATTRIBUTES, _TUPLE_ELEMENT_FROM_ARGUMENT_TYPES, _EMPTY_SET, argument_idx=0, ) if guessed_element_type: elements.append(guessed_element_type) else: elements.append(ANY) guessed_type = TupleType(tuple(elements)) return guessed_type def _choose_type_or_negate(self, positive_types: OrderedSet[TypeInfo]) -> ProperType | None: if not positive_types: return None if randomness.next_float() < config.configuration.test_creation.negate_type: negated_choices = self.type_system.get_type_outside_of(positive_types) if len(negated_choices) > 0: return self.type_system.make_instance(randomness.choice(negated_choices)) return self.type_system.make_instance(randomness.choice(positive_types)) def _guess_generic_arguments( # noqa: PLR0917 self, knowledge: tt.UsageTraceNode, recursion_depth: int, element_attributes: OrderedSet[str], argument_attributes: OrderedSet[str], argument_attribute_paths: OrderedSet[tuple[str, ...]], argument_idx: int, ) -> ProperType | None: guess_from: list[ Callable[ [], ProperType | None, ] ] = [] if elem_attributes := element_attributes.intersection(knowledge.children.keys()): guess_from.append( functools.partial( self._guess_parameter_type_from, knowledge.children[randomness.choice(elem_attributes)], recursion_depth + 1, ) ) if arg_attributes := argument_attributes.intersection(knowledge.children.keys()): guess_from.append( functools.partial( self._guess_from_argument_types, arg_attributes, knowledge, argument_idx, ) ) if paths := [ path for path in argument_attribute_paths if knowledge.find_path(path) is not None ]: guess_from.append( functools.partial(self._guess_from_argument_types_from_path, paths, knowledge) ) # Add Any as guess, i.e., do not make argument more specific guess_from.append(lambda: ANY) return randomness.choice(guess_from)() def _guess_from_argument_types( self, arg_attrs: OrderedSet[str], knowledge: tt.UsageTraceNode, arg_idx: int = 0, ) -> ProperType | None: arg_types = knowledge.children[randomness.choice(arg_attrs)].arg_types[arg_idx] if arg_types: return self._choose_type_or_negate( OrderedSet([self.type_system.to_type_info(randomness.choice(arg_types))]) ) return None def _guess_from_argument_types_from_path( self, paths: list[tuple[str, ...]], knowledge: tt.UsageTraceNode, ) -> ProperType | None: path = randomness.choice(paths) path_end = knowledge.find_path(path) assert path_end is not None # Select type from last element in path, i.e., '__call__' # This way we can, for example, guess the generic type by looking at the # argument types of `append.__call__`. arg_types = path_end.children[path[-1]].arg_types[0] if arg_types: return self._choose_type_or_negate( OrderedSet([self.type_system.to_type_info(randomness.choice(arg_types))]) ) return None
[docs] def log_stats_and_guess_signature( self, is_constructor: bool, # noqa: FBT001 callable_full_name: str, stats: TypeGuessingStats, ) -> None: """Logs some statistics and creates a guessed signature. Parameters annotated with Any could not be guessed. Parameters: callable_full_name: The full, unique name of the callable. is_constructor: does this signature to a constructor? stats: stats object to log to. """ sig_info = stats.signature_infos[callable_full_name] sig_info.annotated_parameter_types = { k: str(v) for k, v in self.parameters_for_statistics.items() } if not is_constructor: # Constructors don't need a return type, so no need to log it. sig_info.annotated_return_type = str(self.return_type_for_statistics) else: stats.number_of_constructors += 1 parameter_types: dict[str, list[str]] = {} # The pairs for which we need to compute partial matches. compute_partial_matches_for: list[tuple[ProperType, ProperType]] = [] for param_name, param in self.signature.parameters.items(): if param_name not in self.original_parameters: # Only check params where we expect a parameter, i.e., not self. continue top_n_guesses: list[ProperType] = [] if len(self.usage_trace[param_name]) > 0: counter: Counter[ProperType] = Counter() for _ in range(100): guess = self._guess_parameter_type( self.usage_trace[param_name], param.kind, ) if guess is not None: counter[guess] += 1 for typ, _ in counter.most_common( config.configuration.statistics_output.type_guess_top_n ): top_n_guesses.append(typ) for item in top_n_guesses: compute_partial_matches_for.append( # noqa: PERF401 (item, self.parameters_for_statistics[param_name]) ) parameter_types[param_name] = [str(t) for t in top_n_guesses] # Also need to compute for return type(s). compute_partial_matches_for.append(( self.return_type, self.return_type_for_statistics, )) # Need to compute which types are base type matches of others. # Otherwise, we need to parse the string again in the evaluation... self._compute_partial_matches(compute_partial_matches_for, sig_info) return_type = str(self.return_type) if not is_constructor and self.return_type != self.original_return_type: sig_info.recorded_return_type = str(return_type) sig_info.guessed_parameter_types = parameter_types
@staticmethod def _compute_partial_matches(compute_partial_matches_for, sig_info): for left, right in compute_partial_matches_for: if (match := _is_partial_type_match(left, right)) is not None: sig_info.partial_type_matches[f"({left!s}, {right!s})"] = str(match)
[docs] class TypeSystem: # noqa: PLR0904 """Implements Pynguin's internal type system. Provides a simple inheritance graph relating various classes using their subclass relationships. Note that parents point to their children. This is also the central system to store/handle type information. """ def __init__(self): # noqa: D107 self._graph: nx.DiGraph[TypeInfo] = nx.DiGraph() # Maps all known types from their full name to their type info. self._types: dict[str, TypeInfo] = {} # Maps attributes to type which have that attribute self._attribute_map: dict[str, OrderedSet[TypeInfo]] = defaultdict(OrderedSet) # These types are intrinsic for Pynguin, i.e., we can generate them ourselves # without needing a generator. We store them here, so we don't have to generate # them all the time. self.primitive_proper_types = [self.convert_type_hint(prim) for prim in PRIMITIVES] self.collection_proper_types = [self.convert_type_hint(coll) for coll in COLLECTIONS] # Pre-compute numeric tower numeric = [complex, float, int, bool] self.numeric_tower: dict[Instance, list[Instance]] = cast( "dict[Instance, list[Instance]]", { self.convert_type_hint(typ): [self.convert_type_hint(tp) for tp in numeric[idx:]] for idx, typ in enumerate(numeric) }, ) for subtype in STRING_SUBTYPES: type_info = TypeInfo(subtype) self.add_subclass_edge(super_class=self.to_type_info(str), sub_class=type_info)
[docs] def enable_numeric_tower(self): """Enable the numeric tower on this type system.""" # Enable numeric tower int <: float <: complex. # https://peps.python.org/pep-0484/#the-numeric-tower bool_info = self.to_type_info(bool) int_info = self.to_type_info(int) float_info = self.to_type_info(float) complex_info = self.to_type_info(complex) self.add_subclass_edge(super_class=int_info, sub_class=bool_info) self.add_subclass_edge(super_class=float_info, sub_class=int_info) self.add_subclass_edge(super_class=complex_info, sub_class=float_info)
[docs] def add_subclass_edge(self, *, super_class: TypeInfo, sub_class: TypeInfo) -> None: """Add a subclass edge between two types. Args: super_class: superclass sub_class: subclass """ self._graph.add_edge(super_class, sub_class)
[docs] @functools.lru_cache(maxsize=1024) def get_subclasses(self, klass: TypeInfo) -> OrderedSet[TypeInfo]: """Provides all descendants of the given type. Includes klass. Args: klass: The class whose subtypes we want to query. Returns: All subclasses including klass """ if klass not in self._graph: return OrderedSet([klass]) result: OrderedSet[TypeInfo] = OrderedSet(nx.descendants(self._graph, klass)) result.add(klass) return result
[docs] @functools.lru_cache(maxsize=1024) def get_superclasses(self, klass: TypeInfo) -> OrderedSet[TypeInfo]: """Provides all ancestors of the given class. Args: klass: The class whose supertypes we want to query. Returns: All superclasses including klass """ if klass not in self._graph: return OrderedSet([klass]) result: OrderedSet[TypeInfo] = OrderedSet(nx.ancestors(self._graph, klass)) result.add(klass) return result
[docs] def get_type_outside_of(self, klasses: OrderedSet[TypeInfo]) -> OrderedSet[TypeInfo]: """Find a type that does not belong to the given types or any subclasses. Args: klasses: The classes to exclude Returns: A set of klasses that don't belong the given ones. """ results = OrderedSet(self._types.values()) for info in klasses: results.difference_update(self.get_subclasses(info)) return results
[docs] @functools.lru_cache(maxsize=16384) def is_subclass(self, left: TypeInfo, right: TypeInfo) -> bool: """Is 'left' a subclass of 'right'? Args: left: left type info right: right type info Returns: True, if there is a subclassing path from left to right. """ return nx.has_path(self._graph, right, left)
[docs] @functools.lru_cache(maxsize=16384) def is_subtype(self, left: ProperType, right: ProperType) -> bool: """Is 'left' a subtype of 'right'? This check is more than incomplete, but it takes into account that anything is a subtype of AnyType. See https://peps.python.org/pep-0483/ and https://peps.python.org/pep-0484/ for more details Args: left: The left type right: The right type Returns: True, if left is a subtype of right. """ if isinstance(right, AnyType): # trivial case return True if isinstance(right, UnionType) and not isinstance(left, UnionType): # Case that would be duplicated for each type, so we put it here. return any(self.is_subtype(left, right_elem) for right_elem in right.items) return left.accept(_SubtypeVisitor(self, right, self.is_subtype))
[docs] @functools.lru_cache(maxsize=16384) def is_maybe_subtype(self, left: ProperType, right: ProperType) -> bool: """Is 'left' maybe a subtype of 'right'? This is a more lenient check than is_subtype. Consider a function that returns tuple[str | int | bytes, str | int | bytes]. Strictly speaking, we cannot use such a value as an argument for a function that requires an argument of type tuple[int, int]. However, it may be possible that the returned value is tuple[int, int], in which case it does work. This check only differs from is_subtype in how it handles Unions. Instead of requiring every type to be a subtype, it is sufficient that one type of the Union is a subtype. Args: left: The left type right: The right type Returns: True, if left may be a subtype of right. """ if isinstance(right, AnyType): # trivial case return True if isinstance(right, UnionType) and not isinstance(left, UnionType): # Case that would be duplicated for each type, so we put it here. return any(self.is_maybe_subtype(left, right_elem) for right_elem in right.items) return left.accept(_MaybeSubtypeVisitor(self, right, self.is_maybe_subtype))
@property def dot(self) -> str: """Create dot representation of this graph. Returns: A dot string. """ dot = to_pydot(self._graph) return dot.to_string()
[docs] def to_type_info(self, typ: type | types.UnionType) -> TypeInfo: # noqa: D417 """Find or create `TypeInfo` for the given `type`. Args: `typ`: The raw `type` we want to convert. Returns: A `TypeInfo` object. """ # TODO(fk) what to do when we encounter a new type? found = self._types.get(TypeInfo.to_full_name(typ)) if found is not None: return found info = TypeInfo(typ) self._types[info.full_name] = info self._graph.add_node(info) return info
[docs] def find_type_info(self, full_name: str) -> TypeInfo | None: """Find typeinfo for the given name. Args: full_name: The name to search for. Returns: Type info, if any. """ return self._types.get(full_name)
[docs] def find_by_attribute(self, attr: str) -> OrderedSet[TypeInfo]: """Search for all types that have the given attribute. Args: attr: the attribute to search for. Returns: All types (or supertypes thereof) who have the given attribute. """ return self._attribute_map[attr]
[docs] def get_all_types(self) -> list[TypeInfo]: """Provides a list of all known types. Returns: A list of all known types. """ return list(self._types.values())
[docs] def get_shortest_path_length(self, start: TypeInfo, end: TypeInfo) -> int | None: """Get the shortest path length between two types. I tried adding @functools.lru_cache to this function does not improve performance, because we cache subtype_distance anyway. I tried using a _shortest_path_length_cache[(start, end)] along with using shortest_path_length(graph, source), which returns all nodes reachable from start and its length. However, probably the overhead from caching and having many different starting nodes make this slower than just using shortest_path_length with a target node (which can also leverage the quicker bidirectional_shortest_path internally). I also tried to come up with an own variant of nx.single_shortest_path_length that would stop at the target node, thinking the overhead maybe comes form deeper traversal, but this did not achieve better performance either. Thus, I suggest we stick to the simple version. Args: start: The start type end: The end type Returns: The shortest path length between the two types or None if no path exists. """ try: return int(nx.shortest_path_length(self._graph, start, end)) except nx.NetworkXNoPath: return None
[docs] def push_attributes_down(self) -> None: """Pushes attributes down in hierarchy. We don't want to see attributes multiple times, e.g., in subclasses, so only the first class in the hierarchy which adds the attribute should have it listed as an attribute, i.e., when searching for a class with that attribute we only want to retrieve the top-most class(es) in the hierarchy which define it, and not every (sub)class that inherited it. """ reach_in_sets: dict[TypeInfo, set[str]] = defaultdict(set) reach_out_sets: dict[TypeInfo, set[str]] = defaultdict(set) # While object sits at the top, it is not particularly useful, so we delete # some of it attributes, as they are only stubs. For example, when searching for # an object that supports comparison, choosing object does not make sense, # because it will raise a NotImplementedError. object_info = self.find_type_info("builtins.object") assert object_info is not None object_info.attributes.difference_update({ "__lt__", "__le__", "__gt__", "__ge__", }) # Use fix point iteration with reach-in/out to push elements down. work_list = list(self._graph.nodes) while len(work_list) > 0: current = work_list.pop() old_val = set(reach_out_sets[current]) for pred in self._graph.predecessors(current): reach_in_sets[current].update(reach_out_sets[pred]) current.attributes.difference_update(reach_in_sets[current]) reach_out_sets[current] = set(reach_in_sets[current]) reach_out_sets[current].update(current.attributes) if old_val != reach_out_sets[current]: work_list.extend(self._graph.successors(current)) for type_info in self._graph.nodes: for attribute in type_info.attributes: self._attribute_map[attribute].add(type_info)
[docs] def wrap_var_param_type(self, typ: ProperType, param_kind) -> ProperType: """Wrap parameter types. Wrap the parameter type of ``*args`` and ``**kwargs`` in List[...] or Dict[str, ...], respectively. Args: typ: The type to be wrapped. param_kind: the kind of parameter. Returns: The wrapped type, or the original type, if no wrapping is required. """ if param_kind == inspect.Parameter.VAR_POSITIONAL: return Instance(self.to_type_info(list), (typ,)) if param_kind == inspect.Parameter.VAR_KEYWORD: return Instance(self.to_type_info(dict), (self.convert_type_hint(str), typ)) return typ
[docs] def infer_type_info( self, method: Callable, type_inference_provider: InferenceProvider, ) -> InferredSignature: """Infers the type information for a callable. Args: method: The callable we try to infer type information for type_inference_provider: The provider for type inference Returns: The inference result """ return self.infer_signature(method, type_inference_provider.provide)
[docs] @staticmethod def type_hints_provider(method: Callable) -> dict[str, Any]: """Provides PEP484-style type information, if available. Args: method: The method for which we want type hints. Returns: A dict mapping parameter names to type hints. """ try: hints = typing.get_type_hints(method) # Sadly there is no guarantee that resolving the type hints actually works. # If the developers annotated something with an erroneous type hint we fall # back to no type hints, i.e., use Any. # Using Self from `from __future__ import annotations` also results in Any. # The import used in the type hint could also be conditional on # typing.TYPE_CHECKING, e.g., to avoid circular imports, in which case this # also fails. except (AttributeError, NameError, TypeError) as exc: _LOGGER.debug("Could not retrieve type hints for %s", method) _LOGGER.debug(exc) hints = {} return hints
[docs] def infer_signature( self, method: Callable, type_hint_provider: Callable[[Callable], dict], ) -> InferredSignature: """Infers the method signature using the given type hint provider. Args: method: The callable type_hint_provider: A method that provides type hints for the given method. Returns: The inference result """ method_for_signature = get_method_for_signature(method) try: method_signature = inspect.signature(method_for_signature) except ValueError: method_signature = inspect.Signature( parameters=[ inspect.Parameter( name="args", kind=inspect.Parameter.VAR_POSITIONAL, annotation=inspect.Signature.empty, ), inspect.Parameter( name="kwargs", kind=inspect.Parameter.VAR_KEYWORD, annotation=inspect.Signature.empty, ), ], return_annotation=inspect.Signature.empty, ) hints = type_hint_provider(method) parameters: dict[str, ProperType] = {} # Always use type hints for statistics, regardless of configured inference. hints_provider_for_statistics = HintInference() hints_for_statistics: dict = hints_provider_for_statistics.provide(method) parameters_for_statistics: dict[str, ProperType] = {} for param_name in method_signature.parameters: if param_name == "self": # TODO(fk) does not necessarily work, can be named anything. # There is also cls for @classmethod. continue parameters[param_name] = self.convert_type_hint(hints.get(param_name)) parameters_for_statistics[param_name] = self.convert_type_hint( hints_for_statistics.get(param_name), unsupported=UNSUPPORTED ) return_type: ProperType = self.convert_type_hint(hints.get("return")) return_type_for_statistics: ProperType = self.convert_type_hint( hints_for_statistics.get("return"), unsupported=UNSUPPORTED ) return InferredSignature( signature=method_signature, original_parameters=parameters, original_return_type=return_type, type_system=self, parameters_for_statistics=parameters_for_statistics, return_type_for_statistics=return_type_for_statistics, )
_FIND_DOT_SEPARATED_IDENTIFIERS = re.compile(r"[.a-zA-Z0-9_]+\.[a-zA-Z0-9_]+")
[docs] def try_to_load_type(self, candidate: str, globs) -> ProperType: """Try to load the given type. Args: candidate: The type to load globs: The globals that should be used for loading. Returns: The loaded type or Any. """ glob: dict[str, Any] = {} # Make sure typing constructs are available exec("from typing import *", glob) # noqa: S102 # Make globals from module available glob.update(globs) # Import any prefixes # TODO(fk) properly implement this way to find potential imports: for potential_type in self._FIND_DOT_SEPARATED_IDENTIFIERS.finditer(candidate): # try to import everything left of last dot potential_import = potential_type.group(0).rpartition(".")[0] _LOGGER.info("Try to import %s", potential_import) try: exec("import " + potential_import, glob) # noqa: S102 except Exception as err: # noqa: BLE001 _LOGGER.debug(err) # If a type cannot be built from this info, there is not much we can do. try: # (Ab)use typing module ref = ForwardRef(candidate) return self.convert_type_hint(_eval_type(ref, glob, glob)) except Exception: # noqa: BLE001 # Give up? return ANY
[docs] def convert_type_hint(self, hint: Any, unsupported: ProperType = ANY) -> ProperType: """Converts a type hint to a proper type. Python's builtin functionality makes handling types during runtime really hard, because 1) this is not intended to be used at runtime and 2) there are a lot of different notations, due to the constantly evolving type hint system. We also cannot easily use mypy's type abstraction because it is 1) strongly encapsulated and not part of mypy's public API and 2) is designed to be used for static type checking. This method tries to translate type hints into our own type abstraction in order to make handling types less painful. This conversion is naive when compared to what sophisticated type checkers like mypy do, but it is hopefully sufficient for our purposes. This method only handles a very small subset of the types that we may encounter in the wild, but at least it allows use to better reason about types. This should be extended in the future to handle more cases. Args: hint: The type hint unsupported: The type to use when encountering an unsupported type construct Returns: A proper type. """ # We must handle a lot of special cases, so try to give an example for each one. if hint is Any or hint is None: # typing.Any or empty return ANY if hint is type(None): # None return NONE_TYPE if hint is tuple: # tuple # TODO(fk) Tuple without size. Should use tuple[Any, ...] ? # But ... (ellipsis) is not a type. return TupleType((ANY,), unknown_size=True) if get_origin(hint) is tuple: # Type is `tuple[int, str]` or `typing.Tuple[int, str]` or `typing.Tuple` args = self.__convert_args_if_exists(hint, unsupported=unsupported) if not args: return TupleType((ANY,), unknown_size=True) return TupleType(args) if is_union_type(hint) or isinstance(hint, types.UnionType): # Type is `int | str` or `typing.Union[int, str]` # TODO(fk) don't make a union including Any. return UnionType( tuple(sorted(self.__convert_args_if_exists(hint, unsupported=unsupported))) ) if isinstance(hint, _BaseGenericAlias | types.GenericAlias): # `list[int, str]` or `List[int, str]` or `Dict[int, str]` or `set[str]` result = Instance( self.to_type_info(hint.__origin__), self.__convert_args_if_exists(hint, unsupported=unsupported), ) # TODO(fk) remove this one day. # Hardcoded support generic dict, list and set. return self._fixup_known_generics(result) if isinstance(hint, type): # `int` or `str` or `MyClass` return self._fixup_known_generics(Instance(self.to_type_info(hint))) # TODO(fk) log unknown hints to so we can better understand what # we should add next # Remove this or log to statistics? _LOGGER.debug("Unknown type hint: %s", hint) # Should raise an error in the future. return unsupported
def __convert_args_if_exists( self, hint: Any, unsupported: ProperType ) -> tuple[ProperType, ...]: if hasattr(hint, "__args__"): return tuple(self.convert_type_hint(t, unsupported=unsupported) for t in hint.__args__) return ()
[docs] def make_instance(self, typ: TypeInfo) -> Instance | TupleType | NoneType: """Create an instance from the given type. Args: typ: The type info. Returns: An instance or TupleType """ if typ.full_name == "builtins.tuple": return TupleType((ANY,), unknown_size=True) if typ.full_name == "builtins.NoneType": return NONE_TYPE result = Instance( typ, ) return self._fixup_known_generics(result)
@staticmethod def _fixup_known_generics(result: Instance) -> Instance: if result.type.num_hardcoded_generic_parameters is not None: args = tuple(result.args) if len(result.args) < result.type.num_hardcoded_generic_parameters: # Fill with AnyType if to small args += (ANY,) * (result.type.num_hardcoded_generic_parameters - len(args)) elif len(result.args) > result.type.num_hardcoded_generic_parameters: # Remove excessive args. args = args[: result.type.num_hardcoded_generic_parameters] return Instance(result.type, args) return result
[docs] @functools.lru_cache(maxsize=16384) def subtype_distance(self, supertype: ProperType, subtype: ProperType) -> int | None: """Computes the number of subclassing steps from the supertype to the subtype. The number of subclassing steps is the shortest path length in the inheritance graph from the supertype to the subtype. The length from a type to itself is 0. If there is no path from the supertype to the subtype, i.e. if the supertype is not a supertype of the subtype, the method returns None. Similar to is_maybe_subtype, for unions it is sufficient that one type of the Union is a subtype and further the shortest path between all arguments of the Union is computed. See _SubtypeDistanceVisitor for special cases. Examples: Assume (Super > Sub > SubSub). Super, Sub -> 1 Sub, Super -> None Super, Super -> 0 Super, SubSub -> 2 Args: supertype: The more general type. subtype: The more specific type. Returns: The number of subclassing steps from left to right, or None if no path exists. """ return supertype.accept(_SubtypeDistanceVisitor(self, subtype))