packagelister.packagelister

  1import ast
  2import importlib.metadata
  3import sys
  4from dataclasses import dataclass
  5
  6from pathier import Pathier, Pathish
  7from printbuddies import ProgBar
  8from typing_extensions import Self
  9
 10# figured it's more efficient to have this on hand than calling the function everytime I need the mapping
 11packages_distributions = importlib.metadata.packages_distributions()
 12# A list of distributions for this Python install
 13distributions = set(
 14    [
 15        name
 16        for distributions in packages_distributions.values()
 17        for name in distributions
 18    ]
 19)
 20
 21
 22def is_builtin(package_name: str) -> bool:
 23    """Returns whether `package_name` is a standard library module or not."""
 24    return package_name in sys.stdlib_module_names
 25
 26
 27@dataclass
 28class Package:
 29    """Dataclass representing an imported package.
 30
 31    #### Fields:
 32    * `name: str`
 33    * `distribution_name: str` - the name used to `pip install`, sometimes this differs from `name`
 34    * `version: str`
 35    * `builtin: bool` - whether this is a standard library package or not"""
 36
 37    name: str
 38    distribution_name: str
 39    version: str
 40    builtin: bool
 41
 42    def get_formatted_requirement(self, version_specifier: str):
 43        """Returns a string of the form `{self.distribution_name}{version_specifier}{self.version}`.
 44        e.g for this package: `"packagelister>=2.0.0"`"""
 45        return f"{self.distribution_name}{version_specifier}{self.version}"
 46
 47    @classmethod
 48    def from_name(cls, package_name: str) -> Self:
 49        """Returns a `Package` instance from the package name.
 50
 51        Will attempt to determine the other class fields."""
 52        distributions = packages_distributions.get(package_name)
 53        if distributions:
 54            distribution_name = distributions[0]
 55            version = importlib.metadata.version(distribution_name)
 56        else:
 57            distribution_name = ""
 58            version = ""
 59        return cls(package_name, distribution_name, version, is_builtin(package_name))
 60
 61    @classmethod
 62    def from_distribution_name(cls, distribution_name: str) -> Self:
 63        """Returns a `Package` instance from the distribution name.
 64
 65        Returned instance will have an empty `name` field.
 66
 67        Raises `ValueError` if `distribution_name` isn't found in `importlib.metadata.packages_distributions()`.
 68        """
 69        if distribution_name not in distributions:
 70            raise ValueError(
 71                f"`{distribution_name}` not found in Python's installed distributions."
 72            )
 73        version = importlib.metadata.version(distribution_name)
 74        return cls("", distribution_name, version, False)
 75
 76
 77class PackageList(list[Package]):
 78    """A subclass of `list` to add convenience methods when working with a list of `packagelister.Package` objects."""
 79
 80    @property
 81    def names(self) -> list[str]:
 82        """Returns a list of `Package.name` strings."""
 83        return [package.name for package in self]
 84
 85    @property
 86    def distribution_names(self) -> list[str | None]:
 87        """Returns a list of `Package.distribution_name` strings for third party packages in this list."""
 88        return [package.distribution_name for package in self.third_party]
 89
 90    @property
 91    def third_party(self) -> Self:
 92        """Returns a `PackageList` instance for the third party packages in this list."""
 93        return self.__class__(
 94            [
 95                package
 96                for package in self
 97                if not package.builtin and package.distribution_name
 98            ]
 99        )
100
101    @property
102    def builtin(self) -> Self:
103        """Returns a `PackageList` instance for the standard library packages in this list."""
104        return self.__class__([package for package in self if package.builtin])
105
106
107@dataclass
108class File:
109    """Dataclass representing a scanned file and its list of imported packages.
110
111    #### Fields:
112    * `path: Pathier` - Pathier object representing the path to this file
113    * `packages: packagelister.PackageList` - List of Package objects imported by this file
114    """
115
116    path: Pathier
117    packages: PackageList
118
119
120@dataclass
121class Project:
122    """Dataclass representing a directory that's had its files scanned for imports.
123
124    #### Fields:
125    * `files: list[packagelister.File]`"""
126
127    files: list[File]
128
129    @property
130    def packages(self) -> PackageList:
131        """Returns a `packagelister.PackageList` object for this instance with no duplicates."""
132        packages = []
133        for file in self.files:
134            for package in file.packages:
135                if package not in packages:
136                    packages.append(package)
137        return PackageList(sorted(packages, key=lambda p: p.name))
138
139    @property
140    def requirements(self) -> PackageList:
141        """Returns a `packagelister.PackageList` object of third party packages used by this project."""
142        return self.packages.third_party
143
144    def get_formatted_requirements(
145        self, version_specifier: str | None = None
146    ) -> list[str]:
147        """Returns a list of formatted requirements (third party packages) using `version_specifier` (`==`,`>=`, `<=`, etc.).
148
149        If no `version_specifier` is given, the returned list will just be package names.
150        """
151        return [
152            requirement.get_formatted_requirement(version_specifier)
153            if version_specifier
154            else requirement.distribution_name or requirement.name
155            for requirement in self.requirements
156        ]
157
158    def get_files_by_package(self) -> dict[str, list[Pathier]]:
159        """Returns a dictionary where the keys are package names and the values are lists of files that import the package."""
160        files_by_package = {}
161        for package in self.packages:
162            for file in self.files:
163                name = package.name
164                if name in file.packages.names:
165                    if name not in files_by_package:
166                        files_by_package[name] = [file.path]
167                    else:
168                        files_by_package[name].append(file.path)
169        return files_by_package
170
171
172def get_package_names_from_source(source: str) -> list[str]:
173    """Scan `source` and extract the names of imported packages/modules."""
174    tree = ast.parse(source)
175    packages = []
176    for node in ast.walk(tree):
177        type_ = type(node)
178        package = ""
179        if type_ == ast.Import:
180            package = node.names[0].name  # type: ignore
181        elif type_ == ast.ImportFrom:
182            package = node.module  # type: ignore
183        if package:
184            if "." in package:
185                package = package[: package.find(".")]
186            packages.append(package)
187    return sorted(list(set(packages)))
188
189
190def scan_file(file: Pathish) -> File:
191    """Scan `file` for imports and return a `packagelister.File` instance."""
192    file = Pathier(file) if not type(file) == Pathier else file
193    source = file.read_text(encoding="utf-8")
194    packages = get_package_names_from_source(source)
195    used_packages = PackageList(
196        [
197            Package.from_name(package)
198            for package in packages
199            if package
200            not in file.parts  # don't want to pick up modules in the scanned directory
201        ]
202    )
203    return File(file, used_packages)
204
205
206def scan_dir(path: Pathish, quiet: bool = False) -> Project:
207    """Recursively scan `*.py` files in `path` for imports and return a `packagelister.Project` instance.
208
209    Set `quiet` to `False` to prevent printing."""
210    path = Pathier(path) if not type(path) == Pathier else path
211    files = list(path.rglob("*.py"))
212    if quiet:
213        project = Project([scan_file(file) for file in files])
214    else:
215        num_files = len(files)
216        print(f"Scanning {num_files} files in {path} for imports...")
217        with ProgBar(len(files), width_ratio=0.3) as bar:
218            project = Project(
219                [bar.display(return_object=scan_file(file)) for file in files]
220            )
221    return project
def is_builtin(package_name: str) -> bool:
23def is_builtin(package_name: str) -> bool:
24    """Returns whether `package_name` is a standard library module or not."""
25    return package_name in sys.stdlib_module_names

Returns whether package_name is a standard library module or not.

@dataclass
class Package:
28@dataclass
29class Package:
30    """Dataclass representing an imported package.
31
32    #### Fields:
33    * `name: str`
34    * `distribution_name: str` - the name used to `pip install`, sometimes this differs from `name`
35    * `version: str`
36    * `builtin: bool` - whether this is a standard library package or not"""
37
38    name: str
39    distribution_name: str
40    version: str
41    builtin: bool
42
43    def get_formatted_requirement(self, version_specifier: str):
44        """Returns a string of the form `{self.distribution_name}{version_specifier}{self.version}`.
45        e.g for this package: `"packagelister>=2.0.0"`"""
46        return f"{self.distribution_name}{version_specifier}{self.version}"
47
48    @classmethod
49    def from_name(cls, package_name: str) -> Self:
50        """Returns a `Package` instance from the package name.
51
52        Will attempt to determine the other class fields."""
53        distributions = packages_distributions.get(package_name)
54        if distributions:
55            distribution_name = distributions[0]
56            version = importlib.metadata.version(distribution_name)
57        else:
58            distribution_name = ""
59            version = ""
60        return cls(package_name, distribution_name, version, is_builtin(package_name))
61
62    @classmethod
63    def from_distribution_name(cls, distribution_name: str) -> Self:
64        """Returns a `Package` instance from the distribution name.
65
66        Returned instance will have an empty `name` field.
67
68        Raises `ValueError` if `distribution_name` isn't found in `importlib.metadata.packages_distributions()`.
69        """
70        if distribution_name not in distributions:
71            raise ValueError(
72                f"`{distribution_name}` not found in Python's installed distributions."
73            )
74        version = importlib.metadata.version(distribution_name)
75        return cls("", distribution_name, version, False)

Dataclass representing an imported package.

Fields:

  • name: str
  • distribution_name: str - the name used to pip install, sometimes this differs from name
  • version: str
  • builtin: bool - whether this is a standard library package or not
Package(name: str, distribution_name: str, version: str, builtin: bool)
def get_formatted_requirement(self, version_specifier: str):
43    def get_formatted_requirement(self, version_specifier: str):
44        """Returns a string of the form `{self.distribution_name}{version_specifier}{self.version}`.
45        e.g for this package: `"packagelister>=2.0.0"`"""
46        return f"{self.distribution_name}{version_specifier}{self.version}"

Returns a string of the form {self.distribution_name}{version_specifier}{self.version}. e.g for this package: "packagelister>=2.0.0"

@classmethod
def from_name(cls, package_name: str) -> Self:
48    @classmethod
49    def from_name(cls, package_name: str) -> Self:
50        """Returns a `Package` instance from the package name.
51
52        Will attempt to determine the other class fields."""
53        distributions = packages_distributions.get(package_name)
54        if distributions:
55            distribution_name = distributions[0]
56            version = importlib.metadata.version(distribution_name)
57        else:
58            distribution_name = ""
59            version = ""
60        return cls(package_name, distribution_name, version, is_builtin(package_name))

Returns a Package instance from the package name.

Will attempt to determine the other class fields.

@classmethod
def from_distribution_name(cls, distribution_name: str) -> Self:
62    @classmethod
63    def from_distribution_name(cls, distribution_name: str) -> Self:
64        """Returns a `Package` instance from the distribution name.
65
66        Returned instance will have an empty `name` field.
67
68        Raises `ValueError` if `distribution_name` isn't found in `importlib.metadata.packages_distributions()`.
69        """
70        if distribution_name not in distributions:
71            raise ValueError(
72                f"`{distribution_name}` not found in Python's installed distributions."
73            )
74        version = importlib.metadata.version(distribution_name)
75        return cls("", distribution_name, version, False)

Returns a Package instance from the distribution name.

Returned instance will have an empty name field.

Raises ValueError if distribution_name isn't found in importlib.metadata.packages_distributions().

class PackageList(list[packagelister.packagelister.Package]):
 78class PackageList(list[Package]):
 79    """A subclass of `list` to add convenience methods when working with a list of `packagelister.Package` objects."""
 80
 81    @property
 82    def names(self) -> list[str]:
 83        """Returns a list of `Package.name` strings."""
 84        return [package.name for package in self]
 85
 86    @property
 87    def distribution_names(self) -> list[str | None]:
 88        """Returns a list of `Package.distribution_name` strings for third party packages in this list."""
 89        return [package.distribution_name for package in self.third_party]
 90
 91    @property
 92    def third_party(self) -> Self:
 93        """Returns a `PackageList` instance for the third party packages in this list."""
 94        return self.__class__(
 95            [
 96                package
 97                for package in self
 98                if not package.builtin and package.distribution_name
 99            ]
100        )
101
102    @property
103    def builtin(self) -> Self:
104        """Returns a `PackageList` instance for the standard library packages in this list."""
105        return self.__class__([package for package in self if package.builtin])

A subclass of list to add convenience methods when working with a list of packagelister.Package objects.

names: list[str]

Returns a list of Package.name strings.

distribution_names: list[str | None]

Returns a list of Package.distribution_name strings for third party packages in this list.

third_party: Self

Returns a PackageList instance for the third party packages in this list.

builtin: Self

Returns a PackageList instance for the standard library packages in this list.

Inherited Members
builtins.list
list
clear
copy
append
insert
extend
pop
remove
index
count
reverse
sort
@dataclass
class File:
108@dataclass
109class File:
110    """Dataclass representing a scanned file and its list of imported packages.
111
112    #### Fields:
113    * `path: Pathier` - Pathier object representing the path to this file
114    * `packages: packagelister.PackageList` - List of Package objects imported by this file
115    """
116
117    path: Pathier
118    packages: PackageList

Dataclass representing a scanned file and its list of imported packages.

Fields:

  • path: Pathier - Pathier object representing the path to this file
  • packages: packagelister.PackageList - List of Package objects imported by this file
File( path: pathier.pathier.Pathier, packages: packagelister.packagelister.PackageList)
@dataclass
class Project:
121@dataclass
122class Project:
123    """Dataclass representing a directory that's had its files scanned for imports.
124
125    #### Fields:
126    * `files: list[packagelister.File]`"""
127
128    files: list[File]
129
130    @property
131    def packages(self) -> PackageList:
132        """Returns a `packagelister.PackageList` object for this instance with no duplicates."""
133        packages = []
134        for file in self.files:
135            for package in file.packages:
136                if package not in packages:
137                    packages.append(package)
138        return PackageList(sorted(packages, key=lambda p: p.name))
139
140    @property
141    def requirements(self) -> PackageList:
142        """Returns a `packagelister.PackageList` object of third party packages used by this project."""
143        return self.packages.third_party
144
145    def get_formatted_requirements(
146        self, version_specifier: str | None = None
147    ) -> list[str]:
148        """Returns a list of formatted requirements (third party packages) using `version_specifier` (`==`,`>=`, `<=`, etc.).
149
150        If no `version_specifier` is given, the returned list will just be package names.
151        """
152        return [
153            requirement.get_formatted_requirement(version_specifier)
154            if version_specifier
155            else requirement.distribution_name or requirement.name
156            for requirement in self.requirements
157        ]
158
159    def get_files_by_package(self) -> dict[str, list[Pathier]]:
160        """Returns a dictionary where the keys are package names and the values are lists of files that import the package."""
161        files_by_package = {}
162        for package in self.packages:
163            for file in self.files:
164                name = package.name
165                if name in file.packages.names:
166                    if name not in files_by_package:
167                        files_by_package[name] = [file.path]
168                    else:
169                        files_by_package[name].append(file.path)
170        return files_by_package

Dataclass representing a directory that's had its files scanned for imports.

Fields:

  • files: list[packagelister.File]
Project(files: list[packagelister.packagelister.File])

Returns a packagelister.PackageList object for this instance with no duplicates.

Returns a packagelister.PackageList object of third party packages used by this project.

def get_formatted_requirements(self, version_specifier: str | None = None) -> list[str]:
145    def get_formatted_requirements(
146        self, version_specifier: str | None = None
147    ) -> list[str]:
148        """Returns a list of formatted requirements (third party packages) using `version_specifier` (`==`,`>=`, `<=`, etc.).
149
150        If no `version_specifier` is given, the returned list will just be package names.
151        """
152        return [
153            requirement.get_formatted_requirement(version_specifier)
154            if version_specifier
155            else requirement.distribution_name or requirement.name
156            for requirement in self.requirements
157        ]

Returns a list of formatted requirements (third party packages) using version_specifier (==,>=, <=, etc.).

If no version_specifier is given, the returned list will just be package names.

def get_files_by_package(self) -> dict[str, list[pathier.pathier.Pathier]]:
159    def get_files_by_package(self) -> dict[str, list[Pathier]]:
160        """Returns a dictionary where the keys are package names and the values are lists of files that import the package."""
161        files_by_package = {}
162        for package in self.packages:
163            for file in self.files:
164                name = package.name
165                if name in file.packages.names:
166                    if name not in files_by_package:
167                        files_by_package[name] = [file.path]
168                    else:
169                        files_by_package[name].append(file.path)
170        return files_by_package

Returns a dictionary where the keys are package names and the values are lists of files that import the package.

def get_package_names_from_source(source: str) -> list[str]:
173def get_package_names_from_source(source: str) -> list[str]:
174    """Scan `source` and extract the names of imported packages/modules."""
175    tree = ast.parse(source)
176    packages = []
177    for node in ast.walk(tree):
178        type_ = type(node)
179        package = ""
180        if type_ == ast.Import:
181            package = node.names[0].name  # type: ignore
182        elif type_ == ast.ImportFrom:
183            package = node.module  # type: ignore
184        if package:
185            if "." in package:
186                package = package[: package.find(".")]
187            packages.append(package)
188    return sorted(list(set(packages)))

Scan source and extract the names of imported packages/modules.

def scan_file( file: pathier.pathier.Pathier | pathlib.Path | str) -> packagelister.packagelister.File:
191def scan_file(file: Pathish) -> File:
192    """Scan `file` for imports and return a `packagelister.File` instance."""
193    file = Pathier(file) if not type(file) == Pathier else file
194    source = file.read_text(encoding="utf-8")
195    packages = get_package_names_from_source(source)
196    used_packages = PackageList(
197        [
198            Package.from_name(package)
199            for package in packages
200            if package
201            not in file.parts  # don't want to pick up modules in the scanned directory
202        ]
203    )
204    return File(file, used_packages)

Scan file for imports and return a packagelister.File instance.

def scan_dir( path: pathier.pathier.Pathier | pathlib.Path | str, quiet: bool = False) -> packagelister.packagelister.Project:
207def scan_dir(path: Pathish, quiet: bool = False) -> Project:
208    """Recursively scan `*.py` files in `path` for imports and return a `packagelister.Project` instance.
209
210    Set `quiet` to `False` to prevent printing."""
211    path = Pathier(path) if not type(path) == Pathier else path
212    files = list(path.rglob("*.py"))
213    if quiet:
214        project = Project([scan_file(file) for file in files])
215    else:
216        num_files = len(files)
217        print(f"Scanning {num_files} files in {path} for imports...")
218        with ProgBar(len(files), width_ratio=0.3) as bar:
219            project = Project(
220                [bar.display(return_object=scan_file(file)) for file in files]
221            )
222    return project

Recursively scan *.py files in path for imports and return a packagelister.Project instance.

Set quiet to False to prevent printing.