commit 54d902aba1546c062c5a088f5f996cbb3e439a7e Author: Michael Chen Date: Tue Nov 22 16:26:31 2022 +0100 Revision 25th of October diff --git a/createreadmes.py b/createreadmes.py new file mode 100644 index 0000000..37d98a0 --- /dev/null +++ b/createreadmes.py @@ -0,0 +1,134 @@ +from pathlib import Path +import json +from typing import Any +import requests + +dataset_path = Path('dataset') +dataset_info = dataset_path / 'dataset.json' +token = "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" + +def open_dataset() -> dict[str, Any]: + with open(dataset_info, 'r') as f: + return json.load(f) + +def save_dataset(dataset: dict[str, Any]): + with open(dataset_info, 'w') as f: + json.dump(dataset, f, indent=4) + +def get_json(uri: str): + print(uri) + resp = requests.get(url=uri, headers={"Authorization": f"Bearer {token}"}) + print(resp) + if not resp.ok: + try: + error = resp.json()['message'] + except Exception: + error = resp.text + raise Exception(f"Invalid response: {error}") + return resp.json() + +def get_repo(slug: str): + return get_json(f"https://api.github.com/repos/{slug}") + +def get_user(name: str): + return get_json(f"https://api.github.com/users/{name}") + +def get_file(slug: str, path: str): + return get_json(f"https://api.github.com/repos/{slug}/contents/{path}") + +def plural(amount: int, name: str, plural: str = 's'): + return f"{amount} {name}{plural[:amount^1]}" + +update_dataset = False + +def get_name(slug: str): + return slug[slug.find('/')+1:] + +def write_model_readmes(dataset: dict[str, Any]): + for model_id, info in dataset.items(): + dir = dataset_path / model_id + readme = dir / 'README.md' + slug: str = info['slug'] + data = info.get('data') + if not data: + data = get_repo(slug) + info['data'] = data + owner_url = data.get('owner', {}).get('url') + if not owner_url: + raise Exception(f'No owner in repo {slug}!') + owner = info.get('owner') + if not owner: + owner = get_json(owner_url) + info['owner'] = owner + owner_name = owner.get('name') + if not owner_name: + raise Exception(f'No owner name in repo {slug}!') + stars = data['stargazers_count'] + forks = data['forks'] + owner_slug = owner['login'] + info['stars'] = stars + info['forks'] = forks + info['owner_name'] = owner_name + info['owner_slug'] = owner_slug + print(f"Writing readme file {readme}") + with open(readme, 'w', encoding="utf-8") as f: + f.write(f"""# {slug} + +## Repository Information + +Repository: [GitHub](https://github.com/{slug}) + +Owner: [{owner_name}](https://github.com/{owner_slug}) + +The repository has {plural(stars, 'star')} and was forked {plural(forks, 'time')}. The codebase consists of {plural(info['l'], 'line')} of code and makes use of the following technologies: + +{chr(10).join(f'- {tech}' for tech in info['tech'])} + +## Data Flow Diagram + +### Statistics + +The Application consists of a total of {plural(info['t'], 'element')}: + +Element | Count +-- | -- +Services | {info['s']} +External Entities | {info['e']} +Information Flows | {info['i']} +Annotations | {info['a']} +Total Items | {info['t']} + +### Diagram + +The below diagram is generated from the corresponding [model file]({model_id}.py). + +Formats: +- [PlantUML Model]({model_id}/{model_id}.txt) +- [SVG Vector Image]({model_id}/{model_id}.svg) +- [PNG Raster Image]({model_id}/{model_id}.png) + +![Data Flow Diagram]({model_id}/{model_id}.svg)""") + +def write_root_readme(dataset: dict[str, Any]): + print(f"Writing main readme file") + with open('README.md', 'w', encoding="utf-8") as f: + f.write(f"""# Dataset of Dataflow Diagrams + +This repository contains of 17 manually created dataflow diagrams (DFDs) of microservice applications found on GitHub. The dataset is published as an additional contribution to "Automatic Extraction of Security-Rich Dataflow Diagrams for Microservice Applications written in Java" [Simon Schneider, Riccardo Scandariato]. Each folder in the [`dataset`](dataset/) directory contains one DFD in a [CodeableModels](https://github.com/uzdun/CodeableModels)-format that can be executed to generate PNG, SVG and TXT files for the DFD. Each model refers to stereotypes and metaclasses from the [metamodel](microservice_dfds_metamodel.py) which needs to be imported. This repository already contains rendered versions for each model, thus setup and rendering is only necessary once changes to the models are made. + +## Models + +Name | Source | LoC | Stars | Forks | DFD Items | Technologies +-- | -- | -- | -- | -- | -- | -- +""") + for model_id, info in dataset.items(): + f.write(f"[{info['slug']}](dataset/{model_id}/README.md) | [GitHub](https://github.com/{info['slug']}) | {info['l']} | {info['stars']} | {info['forks']} | {info['t']} | {len(info['tech'])}\n") + +def main(): + dataset = open_dataset() + write_root_readme(dataset) + write_model_readmes(dataset) + save_dataset(dataset) + +if __name__ == '__main__': + main() \ No newline at end of file