packagelister.packagelister
1import ast 2import importlib.metadata 3import sys 4from dataclasses import dataclass 5 6from pathier import Pathier, Pathish 7from printbuddies import ProgBar 8from typing_extensions import Self 9 10# figured it's more efficient to have this on hand than calling the function everytime I need the mapping 11packages_distributions = importlib.metadata.packages_distributions() 12# A list of distributions for this Python install 13distributions = set( 14 [ 15 name 16 for distributions in packages_distributions.values() 17 for name in distributions 18 ] 19) 20 21 22def is_builtin(package_name: str) -> bool: 23 """Returns whether `package_name` is a standard library module or not.""" 24 return package_name in sys.stdlib_module_names 25 26 27@dataclass 28class Package: 29 """Dataclass representing an imported package. 30 31 #### Fields: 32 * `name: str` 33 * `distribution_name: str` - the name used to `pip install`, sometimes this differs from `name` 34 * `version: str` 35 * `builtin: bool` - whether this is a standard library package or not""" 36 37 name: str 38 distribution_name: str 39 version: str 40 builtin: bool 41 42 def get_formatted_requirement(self, version_specifier: str): 43 """Returns a string of the form `{self.distribution_name}{version_specifier}{self.version}`. 44 e.g for this package: `"packagelister>=2.0.0"`""" 45 return f"{self.distribution_name}{version_specifier}{self.version}" 46 47 @classmethod 48 def from_name(cls, package_name: str) -> Self: 49 """Returns a `Package` instance from the package name. 50 51 Will attempt to determine the other class fields.""" 52 distributions = packages_distributions.get(package_name) 53 if distributions: 54 distribution_name = distributions[0] 55 version = importlib.metadata.version(distribution_name) 56 else: 57 distribution_name = "" 58 version = "" 59 return cls(package_name, distribution_name, version, is_builtin(package_name)) 60 61 @classmethod 62 def from_distribution_name(cls, distribution_name: str) -> Self: 63 """Returns a `Package` instance from the distribution name. 64 65 Returned instance will have an empty `name` field. 66 67 Raises `ValueError` if `distribution_name` isn't found in `importlib.metadata.packages_distributions()`. 68 """ 69 if distribution_name not in distributions: 70 raise ValueError( 71 f"`{distribution_name}` not found in Python's installed distributions." 72 ) 73 version = importlib.metadata.version(distribution_name) 74 return cls("", distribution_name, version, False) 75 76 77class PackageList(list[Package]): 78 """A subclass of `list` to add convenience methods when working with a list of `packagelister.Package` objects.""" 79 80 @property 81 def names(self) -> list[str]: 82 """Returns a list of `Package.name` strings.""" 83 return [package.name for package in self] 84 85 @property 86 def distribution_names(self) -> list[str | None]: 87 """Returns a list of `Package.distribution_name` strings for third party packages in this list.""" 88 return [package.distribution_name for package in self.third_party] 89 90 @property 91 def third_party(self) -> Self: 92 """Returns a `PackageList` instance for the third party packages in this list.""" 93 return self.__class__( 94 [ 95 package 96 for package in self 97 if not package.builtin and package.distribution_name 98 ] 99 ) 100 101 @property 102 def builtin(self) -> Self: 103 """Returns a `PackageList` instance for the standard library packages in this list.""" 104 return self.__class__([package for package in self if package.builtin]) 105 106 107@dataclass 108class File: 109 """Dataclass representing a scanned file and its list of imported packages. 110 111 #### Fields: 112 * `path: Pathier` - Pathier object representing the path to this file 113 * `packages: packagelister.PackageList` - List of Package objects imported by this file 114 """ 115 116 path: Pathier 117 packages: PackageList 118 119 120@dataclass 121class Project: 122 """Dataclass representing a directory that's had its files scanned for imports. 123 124 #### Fields: 125 * `files: list[packagelister.File]`""" 126 127 files: list[File] 128 129 @property 130 def packages(self) -> PackageList: 131 """Returns a `packagelister.PackageList` object for this instance with no duplicates.""" 132 packages = [] 133 for file in self.files: 134 for package in file.packages: 135 if package not in packages: 136 packages.append(package) 137 return PackageList(sorted(packages, key=lambda p: p.name)) 138 139 @property 140 def requirements(self) -> PackageList: 141 """Returns a `packagelister.PackageList` object of third party packages used by this project.""" 142 return self.packages.third_party 143 144 def get_formatted_requirements( 145 self, version_specifier: str | None = None 146 ) -> list[str]: 147 """Returns a list of formatted requirements (third party packages) using `version_specifier` (`==`,`>=`, `<=`, etc.). 148 149 If no `version_specifier` is given, the returned list will just be package names. 150 """ 151 return [ 152 requirement.get_formatted_requirement(version_specifier) 153 if version_specifier 154 else requirement.distribution_name or requirement.name 155 for requirement in self.requirements 156 ] 157 158 def get_files_by_package(self) -> dict[str, list[Pathier]]: 159 """Returns a dictionary where the keys are package names and the values are lists of files that import the package.""" 160 files_by_package = {} 161 for package in self.packages: 162 for file in self.files: 163 name = package.name 164 if name in file.packages.names: 165 if name not in files_by_package: 166 files_by_package[name] = [file.path] 167 else: 168 files_by_package[name].append(file.path) 169 return files_by_package 170 171 172def get_package_names_from_source(source: str) -> list[str]: 173 """Scan `source` and extract the names of imported packages/modules.""" 174 tree = ast.parse(source) 175 packages = [] 176 for node in ast.walk(tree): 177 type_ = type(node) 178 package = "" 179 if type_ == ast.Import: 180 package = node.names[0].name # type: ignore 181 elif type_ == ast.ImportFrom: 182 package = node.module # type: ignore 183 if package: 184 if "." in package: 185 package = package[: package.find(".")] 186 packages.append(package) 187 return sorted(list(set(packages))) 188 189 190def scan_file(file: Pathish) -> File: 191 """Scan `file` for imports and return a `packagelister.File` instance.""" 192 file = Pathier(file) if not type(file) == Pathier else file 193 source = file.read_text(encoding="utf-8") 194 packages = get_package_names_from_source(source) 195 used_packages = PackageList( 196 [ 197 Package.from_name(package) 198 for package in packages 199 if package 200 not in file.parts # don't want to pick up modules in the scanned directory 201 ] 202 ) 203 return File(file, used_packages) 204 205 206def scan_dir(path: Pathish, quiet: bool = False) -> Project: 207 """Recursively scan `*.py` files in `path` for imports and return a `packagelister.Project` instance. 208 209 Set `quiet` to `False` to prevent printing.""" 210 path = Pathier(path) if not type(path) == Pathier else path 211 files = list(path.rglob("*.py")) 212 if quiet: 213 project = Project([scan_file(file) for file in files]) 214 else: 215 num_files = len(files) 216 print(f"Scanning {num_files} files in {path} for imports...") 217 with ProgBar(len(files), width_ratio=0.3) as bar: 218 project = Project( 219 [bar.display(return_object=scan_file(file)) for file in files] 220 ) 221 return project
23def is_builtin(package_name: str) -> bool: 24 """Returns whether `package_name` is a standard library module or not.""" 25 return package_name in sys.stdlib_module_names
Returns whether package_name
is a standard library module or not.
28@dataclass 29class Package: 30 """Dataclass representing an imported package. 31 32 #### Fields: 33 * `name: str` 34 * `distribution_name: str` - the name used to `pip install`, sometimes this differs from `name` 35 * `version: str` 36 * `builtin: bool` - whether this is a standard library package or not""" 37 38 name: str 39 distribution_name: str 40 version: str 41 builtin: bool 42 43 def get_formatted_requirement(self, version_specifier: str): 44 """Returns a string of the form `{self.distribution_name}{version_specifier}{self.version}`. 45 e.g for this package: `"packagelister>=2.0.0"`""" 46 return f"{self.distribution_name}{version_specifier}{self.version}" 47 48 @classmethod 49 def from_name(cls, package_name: str) -> Self: 50 """Returns a `Package` instance from the package name. 51 52 Will attempt to determine the other class fields.""" 53 distributions = packages_distributions.get(package_name) 54 if distributions: 55 distribution_name = distributions[0] 56 version = importlib.metadata.version(distribution_name) 57 else: 58 distribution_name = "" 59 version = "" 60 return cls(package_name, distribution_name, version, is_builtin(package_name)) 61 62 @classmethod 63 def from_distribution_name(cls, distribution_name: str) -> Self: 64 """Returns a `Package` instance from the distribution name. 65 66 Returned instance will have an empty `name` field. 67 68 Raises `ValueError` if `distribution_name` isn't found in `importlib.metadata.packages_distributions()`. 69 """ 70 if distribution_name not in distributions: 71 raise ValueError( 72 f"`{distribution_name}` not found in Python's installed distributions." 73 ) 74 version = importlib.metadata.version(distribution_name) 75 return cls("", distribution_name, version, False)
Dataclass representing an imported package.
Fields:
name: str
distribution_name: str
- the name used topip install
, sometimes this differs fromname
version: str
builtin: bool
- whether this is a standard library package or not
43 def get_formatted_requirement(self, version_specifier: str): 44 """Returns a string of the form `{self.distribution_name}{version_specifier}{self.version}`. 45 e.g for this package: `"packagelister>=2.0.0"`""" 46 return f"{self.distribution_name}{version_specifier}{self.version}"
Returns a string of the form {self.distribution_name}{version_specifier}{self.version}
.
e.g for this package: "packagelister>=2.0.0"
48 @classmethod 49 def from_name(cls, package_name: str) -> Self: 50 """Returns a `Package` instance from the package name. 51 52 Will attempt to determine the other class fields.""" 53 distributions = packages_distributions.get(package_name) 54 if distributions: 55 distribution_name = distributions[0] 56 version = importlib.metadata.version(distribution_name) 57 else: 58 distribution_name = "" 59 version = "" 60 return cls(package_name, distribution_name, version, is_builtin(package_name))
Returns a Package
instance from the package name.
Will attempt to determine the other class fields.
62 @classmethod 63 def from_distribution_name(cls, distribution_name: str) -> Self: 64 """Returns a `Package` instance from the distribution name. 65 66 Returned instance will have an empty `name` field. 67 68 Raises `ValueError` if `distribution_name` isn't found in `importlib.metadata.packages_distributions()`. 69 """ 70 if distribution_name not in distributions: 71 raise ValueError( 72 f"`{distribution_name}` not found in Python's installed distributions." 73 ) 74 version = importlib.metadata.version(distribution_name) 75 return cls("", distribution_name, version, False)
Returns a Package
instance from the distribution name.
Returned instance will have an empty name
field.
Raises ValueError
if distribution_name
isn't found in importlib.metadata.packages_distributions()
.
78class PackageList(list[Package]): 79 """A subclass of `list` to add convenience methods when working with a list of `packagelister.Package` objects.""" 80 81 @property 82 def names(self) -> list[str]: 83 """Returns a list of `Package.name` strings.""" 84 return [package.name for package in self] 85 86 @property 87 def distribution_names(self) -> list[str | None]: 88 """Returns a list of `Package.distribution_name` strings for third party packages in this list.""" 89 return [package.distribution_name for package in self.third_party] 90 91 @property 92 def third_party(self) -> Self: 93 """Returns a `PackageList` instance for the third party packages in this list.""" 94 return self.__class__( 95 [ 96 package 97 for package in self 98 if not package.builtin and package.distribution_name 99 ] 100 ) 101 102 @property 103 def builtin(self) -> Self: 104 """Returns a `PackageList` instance for the standard library packages in this list.""" 105 return self.__class__([package for package in self if package.builtin])
A subclass of list
to add convenience methods when working with a list of packagelister.Package
objects.
Returns a list of Package.distribution_name
strings for third party packages in this list.
Inherited Members
- builtins.list
- list
- clear
- copy
- append
- insert
- extend
- pop
- remove
- index
- count
- reverse
- sort
108@dataclass 109class File: 110 """Dataclass representing a scanned file and its list of imported packages. 111 112 #### Fields: 113 * `path: Pathier` - Pathier object representing the path to this file 114 * `packages: packagelister.PackageList` - List of Package objects imported by this file 115 """ 116 117 path: Pathier 118 packages: PackageList
Dataclass representing a scanned file and its list of imported packages.
Fields:
path: Pathier
- Pathier object representing the path to this filepackages: packagelister.PackageList
- List of Package objects imported by this file
121@dataclass 122class Project: 123 """Dataclass representing a directory that's had its files scanned for imports. 124 125 #### Fields: 126 * `files: list[packagelister.File]`""" 127 128 files: list[File] 129 130 @property 131 def packages(self) -> PackageList: 132 """Returns a `packagelister.PackageList` object for this instance with no duplicates.""" 133 packages = [] 134 for file in self.files: 135 for package in file.packages: 136 if package not in packages: 137 packages.append(package) 138 return PackageList(sorted(packages, key=lambda p: p.name)) 139 140 @property 141 def requirements(self) -> PackageList: 142 """Returns a `packagelister.PackageList` object of third party packages used by this project.""" 143 return self.packages.third_party 144 145 def get_formatted_requirements( 146 self, version_specifier: str | None = None 147 ) -> list[str]: 148 """Returns a list of formatted requirements (third party packages) using `version_specifier` (`==`,`>=`, `<=`, etc.). 149 150 If no `version_specifier` is given, the returned list will just be package names. 151 """ 152 return [ 153 requirement.get_formatted_requirement(version_specifier) 154 if version_specifier 155 else requirement.distribution_name or requirement.name 156 for requirement in self.requirements 157 ] 158 159 def get_files_by_package(self) -> dict[str, list[Pathier]]: 160 """Returns a dictionary where the keys are package names and the values are lists of files that import the package.""" 161 files_by_package = {} 162 for package in self.packages: 163 for file in self.files: 164 name = package.name 165 if name in file.packages.names: 166 if name not in files_by_package: 167 files_by_package[name] = [file.path] 168 else: 169 files_by_package[name].append(file.path) 170 return files_by_package
Dataclass representing a directory that's had its files scanned for imports.
Fields:
files: list[packagelister.File]
Returns a packagelister.PackageList
object for this instance with no duplicates.
Returns a packagelister.PackageList
object of third party packages used by this project.
145 def get_formatted_requirements( 146 self, version_specifier: str | None = None 147 ) -> list[str]: 148 """Returns a list of formatted requirements (third party packages) using `version_specifier` (`==`,`>=`, `<=`, etc.). 149 150 If no `version_specifier` is given, the returned list will just be package names. 151 """ 152 return [ 153 requirement.get_formatted_requirement(version_specifier) 154 if version_specifier 155 else requirement.distribution_name or requirement.name 156 for requirement in self.requirements 157 ]
Returns a list of formatted requirements (third party packages) using version_specifier
(==
,>=
, <=
, etc.).
If no version_specifier
is given, the returned list will just be package names.
159 def get_files_by_package(self) -> dict[str, list[Pathier]]: 160 """Returns a dictionary where the keys are package names and the values are lists of files that import the package.""" 161 files_by_package = {} 162 for package in self.packages: 163 for file in self.files: 164 name = package.name 165 if name in file.packages.names: 166 if name not in files_by_package: 167 files_by_package[name] = [file.path] 168 else: 169 files_by_package[name].append(file.path) 170 return files_by_package
Returns a dictionary where the keys are package names and the values are lists of files that import the package.
173def get_package_names_from_source(source: str) -> list[str]: 174 """Scan `source` and extract the names of imported packages/modules.""" 175 tree = ast.parse(source) 176 packages = [] 177 for node in ast.walk(tree): 178 type_ = type(node) 179 package = "" 180 if type_ == ast.Import: 181 package = node.names[0].name # type: ignore 182 elif type_ == ast.ImportFrom: 183 package = node.module # type: ignore 184 if package: 185 if "." in package: 186 package = package[: package.find(".")] 187 packages.append(package) 188 return sorted(list(set(packages)))
Scan source
and extract the names of imported packages/modules.
191def scan_file(file: Pathish) -> File: 192 """Scan `file` for imports and return a `packagelister.File` instance.""" 193 file = Pathier(file) if not type(file) == Pathier else file 194 source = file.read_text(encoding="utf-8") 195 packages = get_package_names_from_source(source) 196 used_packages = PackageList( 197 [ 198 Package.from_name(package) 199 for package in packages 200 if package 201 not in file.parts # don't want to pick up modules in the scanned directory 202 ] 203 ) 204 return File(file, used_packages)
Scan file
for imports and return a packagelister.File
instance.
207def scan_dir(path: Pathish, quiet: bool = False) -> Project: 208 """Recursively scan `*.py` files in `path` for imports and return a `packagelister.Project` instance. 209 210 Set `quiet` to `False` to prevent printing.""" 211 path = Pathier(path) if not type(path) == Pathier else path 212 files = list(path.rglob("*.py")) 213 if quiet: 214 project = Project([scan_file(file) for file in files]) 215 else: 216 num_files = len(files) 217 print(f"Scanning {num_files} files in {path} for imports...") 218 with ProgBar(len(files), width_ratio=0.3) as bar: 219 project = Project( 220 [bar.display(return_object=scan_file(file)) for file in files] 221 ) 222 return project
Recursively scan *.py
files in path
for imports and return a packagelister.Project
instance.
Set quiet
to False
to prevent printing.