reckless: Clone github sources when API access fails

Due to the API ratelimit, this allows cloning a github repo and searching
the result rather than searching via the REST API.  If a source has
already been cloned, it is fetched and the default branch checked out.

Fixes a failure reported by @farscapian

Changelog-Fixed: Reckless no longer fails on github API ratelimit.
This commit is contained in:
Alex Myers 2024-02-17 10:15:11 -06:00 committed by Christian Decker
parent ba9ec412c7
commit bfb29aaef0

View file

@ -17,6 +17,7 @@ import types
from typing import Union from typing import Union
from urllib.parse import urlparse from urllib.parse import urlparse
from urllib.request import urlopen from urllib.request import urlopen
from urllib.error import HTTPError
import venv import venv
@ -144,7 +145,8 @@ class InstInfo:
target = SourceDir(self.source_loc, srctype=self.srctype) target = SourceDir(self.source_loc, srctype=self.srctype)
# Set recursion for how many directories deep we should search # Set recursion for how many directories deep we should search
depth = 0 depth = 0
if self.srctype in [Source.DIRECTORY, Source.LOCAL_REPO]: if self.srctype in [Source.DIRECTORY, Source.LOCAL_REPO,
Source.GIT_LOCAL_CLONE]:
depth = 5 depth = 5
elif self.srctype == Source.GITHUB_REPO: elif self.srctype == Source.GITHUB_REPO:
depth = 1 depth = 1
@ -193,7 +195,28 @@ class InstInfo:
return success return success
return None return None
result = search_dir(self, target, False, depth) try:
result = search_dir(self, target, False, depth)
# Using the rest API of github.com may result in a
# "Error 403: rate limit exceeded" or other access issues.
# Fall back to cloning and searching the local copy instead.
except HTTPError:
result = None
if self.srctype == Source.GITHUB_REPO:
# clone source to reckless dir
target = copy_remote_git_source(self)
if not target:
logging.warning(f"could not clone github source {self}")
return False
logging.debug(f"falling back to cloning remote repo {self}")
# Update to reflect use of a local clone
self.source_loc = target.location
self.srctype = target.srctype
result = search_dir(self, target, False, 5)
if not result:
return False
if result: if result:
if result != target: if result != target:
if result.relative: if result.relative:
@ -235,6 +258,8 @@ class Source(Enum):
GITHUB_REPO = 3 GITHUB_REPO = 3
OTHER_URL = 4 OTHER_URL = 4
UNKNOWN = 5 UNKNOWN = 5
# Cloned from remote source before searching (rather than github API)
GIT_LOCAL_CLONE = 6
@classmethod @classmethod
def get_type(cls, source: str): def get_type(cls, source: str):
@ -253,6 +278,16 @@ class Source(Enum):
return cls(4) return cls(4)
return cls(5) return cls(5)
@classmethod
def get_github_user_repo(cls, source: str) -> (str, str):
'extract a github username and repository name'
if 'github.com/' not in source.lower():
return None, None
trailing = Path(source.lower().partition('github.com/')[2]).parts
if len(trailing) < 2:
return None, None
return trailing[0], trailing[1]
class SourceDir(): class SourceDir():
"""Structure to search source contents.""" """Structure to search source contents."""
@ -277,7 +312,7 @@ class SourceDir():
# logging.debug(f"populating {self.srctype} {self.location}") # logging.debug(f"populating {self.srctype} {self.location}")
if self.srctype == Source.DIRECTORY: if self.srctype == Source.DIRECTORY:
self.contents = populate_local_dir(self.location) self.contents = populate_local_dir(self.location)
elif self.srctype == Source.LOCAL_REPO: elif self.srctype in [Source.LOCAL_REPO, Source.GIT_LOCAL_CLONE]:
self.contents = populate_local_repo(self.location) self.contents = populate_local_repo(self.location)
elif self.srctype == Source.GITHUB_REPO: elif self.srctype == Source.GITHUB_REPO:
self.contents = populate_github_repo(self.location) self.contents = populate_github_repo(self.location)
@ -435,6 +470,11 @@ def source_element_from_repo_api(member: dict):
def populate_github_repo(url: str) -> list: def populate_github_repo(url: str) -> list:
"""populate one level of a github repository via REST API"""
# Forces search to clone remote repos (for blackbox testing)
if GITHUB_API_FALLBACK:
with tempfile.NamedTemporaryFile() as tmp:
raise HTTPError(url, 403, 'simulated ratelimit', {}, tmp)
# FIXME: This probably contains leftover cruft. # FIXME: This probably contains leftover cruft.
repo = url.split('/') repo = url.split('/')
while '' in repo: while '' in repo:
@ -478,6 +518,28 @@ def populate_github_repo(url: str) -> list:
return contents return contents
def copy_remote_git_source(github_source: InstInfo):
"""clone or fetch & checkout a local copy of a remote git repo"""
user, repo = Source.get_github_user_repo(github_source.source_loc)
if not user or not repo:
logging.warning('could not extract github user and repo '
f'name for {github_source.source_loc}')
return None
local_path = RECKLESS_DIR / '.remote_sources' / user
create_dir(RECKLESS_DIR / '.remote_sources')
if not create_dir(local_path):
logging.warning(f'could not provision dir {local_path} to '
f'clone remote source {github_source.source_loc}')
return None
local_path = local_path / repo
if local_path.exists():
# Fetch the latest
assert _git_update(github_source, local_path)
else:
_git_clone(github_source, local_path)
return SourceDir(local_path, srctype=Source.GIT_LOCAL_CLONE)
class Config(): class Config():
"""A generic class for procuring, reading and editing config files""" """A generic class for procuring, reading and editing config files"""
def obtain_config(self, def obtain_config(self,
@ -803,7 +865,8 @@ def _git_clone(src: InstInfo, dest: Union[PosixPath, str]) -> bool:
if src.srctype == Source.GITHUB_REPO: if src.srctype == Source.GITHUB_REPO:
assert 'github.com' in src.source_loc assert 'github.com' in src.source_loc
source = f"{GITHUB_COM}" + src.source_loc.split("github.com")[-1] source = f"{GITHUB_COM}" + src.source_loc.split("github.com")[-1]
elif src.srctype in [Source.LOCAL_REPO, Source.OTHER_URL]: elif src.srctype in [Source.LOCAL_REPO, Source.OTHER_URL,
Source.GIT_LOCAL_CLONE]:
source = src.source_loc source = src.source_loc
else: else:
return False return False
@ -819,6 +882,46 @@ def _git_clone(src: InstInfo, dest: Union[PosixPath, str]) -> bool:
return True return True
def _git_update(github_source: InstInfo, local_copy: PosixPath):
# Ensure this is the correct source
git = run(['git', 'remote', 'set-url', 'origin', github_source.source_loc],
cwd=str(local_copy), stdout=PIPE, stderr=PIPE, text=True,
check=False, timeout=60)
assert git.returncode == 0
if git.returncode != 0:
return False
# Fetch the latest from the remote
git = run(['git', 'fetch', 'origin', '--recurse-submodules=on-demand'],
cwd=str(local_copy), stdout=PIPE, stderr=PIPE, text=True,
check=False, timeout=60)
assert git.returncode == 0
if git.returncode != 0:
return False
# Find default branch
git = run(['git', 'symbolic-ref', 'refs/remotes/origin/HEAD', '--short'],
cwd=str(local_copy), stdout=PIPE, stderr=PIPE, text=True,
check=False, timeout=60)
assert git.returncode == 0
if git.returncode != 0:
return False
default_branch = git.stdout.splitlines()[0]
if default_branch != 'origin/master':
logging.debug(f'UNUSUAL: fetched default branch {default_branch} for '
f'{github_source.source_loc}')
# Checkout default branch
git = run(['git', 'checkout', default_branch],
cwd=str(local_copy), stdout=PIPE, stderr=PIPE, text=True,
check=False, timeout=60)
assert git.returncode == 0
if git.returncode != 0:
return False
return True
def get_temp_reckless_dir() -> PosixPath: def get_temp_reckless_dir() -> PosixPath:
random_dir = 'reckless-{}'.format(str(hash(os.times()))[-9:]) random_dir = 'reckless-{}'.format(str(hash(os.times()))[-9:])
new_path = Path(tempfile.gettempdir()) / random_dir new_path = Path(tempfile.gettempdir()) / random_dir
@ -850,7 +953,7 @@ def _checkout_commit(orig_src: InstInfo,
cloned_path: PosixPath): cloned_path: PosixPath):
# Check out and verify commit/tag if source was a repository # Check out and verify commit/tag if source was a repository
if orig_src.srctype in [Source.LOCAL_REPO, Source.GITHUB_REPO, if orig_src.srctype in [Source.LOCAL_REPO, Source.GITHUB_REPO,
Source.OTHER_URL]: Source.OTHER_URL, Source.GIT_LOCAL_CLONE]:
if orig_src.commit: if orig_src.commit:
logging.debug(f"Checking out {orig_src.commit}") logging.debug(f"Checking out {orig_src.commit}")
checkout = Popen(['git', 'checkout', orig_src.commit], checkout = Popen(['git', 'checkout', orig_src.commit],
@ -912,7 +1015,7 @@ def _install_plugin(src: InstInfo) -> Union[InstInfo, None]:
create_dir(clone_path) create_dir(clone_path)
shutil.copytree(src.source_loc, plugin_path) shutil.copytree(src.source_loc, plugin_path)
elif src.srctype in [Source.LOCAL_REPO, Source.GITHUB_REPO, elif src.srctype in [Source.LOCAL_REPO, Source.GITHUB_REPO,
Source.OTHER_URL]: Source.OTHER_URL, Source.GIT_LOCAL_CLONE]:
# clone git repository to /tmp/reckless-... # clone git repository to /tmp/reckless-...
if not _git_clone(src, plugin_path): if not _git_clone(src, plugin_path):
return None return None
@ -1401,6 +1504,10 @@ if __name__ == '__main__':
GITHUB_COM = os.environ['REDIR_GITHUB'] GITHUB_COM = os.environ['REDIR_GITHUB']
logging.root.setLevel(args.loglevel) logging.root.setLevel(args.loglevel)
GITHUB_API_FALLBACK = False
if 'GITHUB_API_FALLBACK' in os.environ:
GITHUB_API_FALLBACK = os.environ['GITHUB_API_FALLBACK']
if 'targets' in args: if 'targets' in args:
# FIXME: Catch missing argument # FIXME: Catch missing argument
if args.func.__name__ == 'help_alias': if args.func.__name__ == 'help_alias':