commit d0ca73d8c6ee217b89d6a8911778b9d06f9a6daa Author: Alexander "PapaTutuWawa Date: Sun Aug 20 19:54:52 2023 +0200 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3107f4b --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +__pycache__/ +build/ +config.toml +*.sqlite +dist/ +*.egg-info +tmp/ +venv/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b841600 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2023 Alexander "PapaTutuWawa" + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..cc5755a --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# pubcached + +A caching front-end for the pub.dev package repository. + +## License + +See `./LICENSE`. diff --git a/pubcached/__init__.py b/pubcached/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pubcached/pubcached.py b/pubcached/pubcached.py new file mode 100644 index 0000000..df029e5 --- /dev/null +++ b/pubcached/pubcached.py @@ -0,0 +1,258 @@ +import falcon +import falcon.asgi +import requests +from loguru import logger +import uvicorn +import aiofiles +import toml + +import json +import os +import sqlite3 +import time +from optparse import OptionParser +import sys + +class Config: + config: dict[str, object] = None + + # Construct the config from the file at @path + def __init__(self, path): + self.config = toml.load(path) + + @property + def db_path(self): + return self.config["db_path"] + + @property + def server_url(self): + return self.config.get("server_url", "127.0.0.1:8000") + + @property + def package_path(self): + return self.config["package_path"] + + @property + def api_ttl(self): + # Default: 1 day + return self.config.get("api_ttl", 1 * 24 * 60 * 60) + +class Database: + __db = None + + def __init__(self, path: str): + self.__db = sqlite3.connect(path) + + def initialize(self): + cur = self.__db.cursor() + cur.execute(''' + CREATE TABLE ApiCache ( + package TEXT NOT NULL PRIMARY KEY, + payload TEXT NOT NULL, + time INTEGER NOT NULL, + success INTEGER NOT NULL + ) + ''') + cur.execute(''' + CREATE TABLE PackageCache ( + package TEXT NOT NULL, + version TEXT NOT NULL, + time INTEGER NOT NULL, + path TEXT NOT NULL, + PRIMARY KEY (package, version) + ) + ''') + + def get_api_cache(self, package: str): + cur = self.__db.cursor() + api = cur.execute('SELECT package, payload, time, success FROM ApiCache WHERE package = ?', (package,)).fetchone() + if api: + return ApiCacheEntry( + api[0], + api[1], + api[2], + True if api[3] == 1 else False + ) + + def persist_api_cache(self, item): + cur = self.__db.cursor() + cur.execute( + 'INSERT OR REPLACE INTO ApiCache VALUES (?, ?, ?, ?)', + (item.package, item.payload, item.request_time, 1 if item.success else 0) + ) + self.__db.commit() + + def get_package_cache(self, package: str, version: str): + cur = self.__db.cursor() + api = cur.execute('SELECT package, version, time, path FROM PackageCache WHERE package = ? AND version = ?', (package, version)).fetchone() + if api: + return PackageCacheEntry( + api[0], + api[1], + api[2], + api[3], + ) + + def persist_package_cache(self, item): + cur = self.__db.cursor() + cur.execute( + 'INSERT INTO PackageCache VALUES (?, ?, ?, ?)', + (item.package, item.version, item.request_time, item.path) + ) + self.__db.commit() + +def patch_pubdev_api_response(resp: dict[str, object], package: str, config: Config): + # Patch the latest version + if 'latest' in resp: + version = resp['latest']['version'] + resp['latest']['archive_url'] = f'{config.server_url}/api/archives/{package}/{version}' + + # Patch all other versions + new_versions = [] + for release in resp['versions']: + version = release['version'] + release['archive_url'] = f'{config.server_url}/api/archives/{package}/{version}' + new_versions.append(release) + + resp['versions'] = new_versions + return resp + +class PackageCacheEntry: + package: str = None + version: str = None + request_time: float = None + path: str = None + + def __init__(self, package, version, request_time, path): + self.package = package + self.version = version + self.request_time = request_time + self.path = path + +class ApiCacheEntry: + package: str = None + payload = None + request_time = None + success = None + + def __init__(self, package, payload, request_time, success): + self.package = package + self.payload = payload + self.request_time = request_time + self.success = success + + def is_valid(self, ttl): + return time.time() <= self.request_time + ttl + +class PubApiCacheResource: + __db: Database = None + __config: Config = None + + def __init__(self, db: Database, config: Config): + self.__db = db + self.__config = config + + @logger.catch + async def on_get(self, req, resp, package): + #breakpoint() + cache = self.__db.get_api_cache(package) + if not cache or not cache.is_valid(self.__config.api_ttl): + logger.info(f'API response for {package} not in cache...') + r = requests.get( + f'https://pub.dev/api/packages/{package}', + headers={ + 'Accept': 'application/vnd.pub.v2+json', + } + ) + if r.status_code == 200: + payload = patch_pubdev_api_response(r.json(), package, self.__config) + else: + payload = r.json() + cache = ApiCacheEntry(package, json.dumps(payload), time.time(), r.status_code == 200) + self.__db.persist_api_cache(cache) + + logger.debug(f'Payload: {cache.payload}') + + resp.append_header('Content-Type', 'application/vnd.pub.v2+json') + resp.text = cache.payload + resp.status = falcon.HTTP_200 if cache.success else falcon.HTTP_404 + +class PubPackageCacheResource: + __db: Database = None + __config: Config = None + + def __init__(self, db: Database, config: Config): + self.__db = db + self.__config = config + + @logger.catch + async def on_get(self, req, resp, package, version): + #breakpoint() + cache = self.__db.get_package_cache(package, version) + if not cache or not os.path.exists(cache.path): + logger.info(f'{package}:{version} not in cache. Querying...') + package_path = os.path.join(self.__config.package_path, package) + if not os.path.exists(package_path): + os.mkdir(package_path) + + path = os.path.join(package_path, f'{version}.tar.gz') + with requests.get(f'https://pub.dartlang.org/packages/{package}/versions/{version}.tar.gz', stream=True) as r: + r.raise_for_status() + + with open(path, 'wb') as f: + for chunk in r.iter_content(chunk_size=8196): + f.write(chunk) + cache = PackageCacheEntry( + package, + version, + time.time(), + path, + ) + self.__db.persist_package_cache(cache) + + resp.status = falcon.HTTP_200 + resp.content_type = 'application/octet-stream' + resp.stream = await aiofiles.open(cache.path, 'rb') + resp.content_length = os.path.getsize(cache.path) + +def main(): + parser = OptionParser() + parser.add_option( + "-d", "--debug", dest="debug", help="Enable debug logging", action="store_true" + ) + parser.add_option( + "-c", + "--config", + dest="config", + help="Location of the config.toml", + default="/etc/pubcached/config.toml", + ) + (options, args) = parser.parse_args() + + # Configure verbosity + logger.remove() + logger.add( + sys.stdout, + level="DEBUG" if options.debug else "INFO", + ) + + config = Config(options.config) + should_initialize = not os.path.exists(config.db_path) + db = Database(config.db_path) + if should_initialize: + db.initialize() + + logger.info(f'API calls have a TTL of {config.api_ttl} seconds') + + if not os.path.exists(config.package_path): + logger.info('Creating packages directory...') + os.makedirs(config.package_path) + + app = falcon.asgi.App() + app.add_route('/api/packages/{package}', PubApiCacheResource(db, config)) + app.add_route('/api/archives/{package}/{version}', PubPackageCacheResource(db, config)) + + uvicorn.run(app, host=config.config.get('host', '127.0.0.1'), port=config.config.get('port', 8000)) + +if __name__ == '__main__': + main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ce56d4c --- /dev/null +++ b/setup.py @@ -0,0 +1,25 @@ +from setuptools import setup, find_packages + +setup( + name='pubcached', + version='0.1', + description='Caching server for pub.dev packages', + author='Alexander \"PapaTutuWawa\"', + author_email='papatutuwawa [at] polynom.me', + install_requires=[ + 'aiofiles>=23.1.0', + 'requests>=2.29.0', + 'falcon>=3.1.1', + 'loguru>=0.7.0', + 'toml>=0.10.2', + 'uvicorn>=0.20.0' + ], + packages=find_packages(), + license='MIT', + zip_safe=True, + entry_points={ + "console_scripts": [ + "pubcached = pubcached.pubcached:main" + ] + } +)