From 54d902aba1546c062c5a088f5f996cbb3e439a7e Mon Sep 17 00:00:00 2001
From: Michael Chen <git@cnml.de>
Date: Tue, 22 Nov 2022 16:26:31 +0100
Subject: [PATCH] Revision 25th of October

---
 createreadmes.py | 134 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 createreadmes.py

diff --git a/createreadmes.py b/createreadmes.py
new file mode 100644
index 0000000..37d98a0
--- /dev/null
+++ b/createreadmes.py
@@ -0,0 +1,134 @@
+from pathlib import Path
+import json
+from typing import Any
+import requests
+
+dataset_path = Path('dataset')
+dataset_info = dataset_path / 'dataset.json'
+token = "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"
+
+def open_dataset() -> dict[str, Any]:
+    with open(dataset_info, 'r') as f:
+        return json.load(f)
+
+def save_dataset(dataset: dict[str, Any]):
+    with open(dataset_info, 'w') as f:
+        json.dump(dataset, f, indent=4)
+
+def get_json(uri: str):
+    print(uri)
+    resp = requests.get(url=uri, headers={"Authorization": f"Bearer {token}"})
+    print(resp)
+    if not resp.ok:
+        try:
+            error = resp.json()['message']
+        except Exception:
+            error = resp.text
+        raise Exception(f"Invalid response: {error}")
+    return resp.json()
+
+def get_repo(slug: str):
+    return get_json(f"https://api.github.com/repos/{slug}")
+    
+def get_user(name: str):
+    return get_json(f"https://api.github.com/users/{name}")
+
+def get_file(slug: str, path: str):
+    return get_json(f"https://api.github.com/repos/{slug}/contents/{path}")
+    
+def plural(amount: int, name: str, plural: str = 's'):
+    return f"{amount} {name}{plural[:amount^1]}"
+
+update_dataset = False
+
+def get_name(slug: str):
+    return slug[slug.find('/')+1:]
+
+def write_model_readmes(dataset: dict[str, Any]):
+    for model_id, info in dataset.items():
+        dir = dataset_path / model_id
+        readme = dir / 'README.md'
+        slug: str = info['slug']
+        data = info.get('data')
+        if not data:
+            data = get_repo(slug)
+            info['data'] = data
+        owner_url = data.get('owner', {}).get('url')
+        if not owner_url:
+            raise Exception(f'No owner in repo {slug}!')
+        owner = info.get('owner')
+        if not owner:
+            owner = get_json(owner_url)
+            info['owner'] = owner
+        owner_name = owner.get('name')
+        if not owner_name:
+            raise Exception(f'No owner name in repo {slug}!')
+        stars = data['stargazers_count']
+        forks = data['forks']
+        owner_slug = owner['login']
+        info['stars'] = stars
+        info['forks'] = forks
+        info['owner_name'] = owner_name
+        info['owner_slug'] = owner_slug
+        print(f"Writing readme file {readme}")
+        with open(readme, 'w', encoding="utf-8") as f:
+            f.write(f"""# {slug}
+
+## Repository Information
+
+Repository: [GitHub](https://github.com/{slug})
+
+Owner: [{owner_name}](https://github.com/{owner_slug})
+
+The repository has {plural(stars, 'star')} and was forked {plural(forks, 'time')}. The codebase consists of {plural(info['l'], 'line')} of code and makes use of the following technologies:
+
+{chr(10).join(f'- {tech}' for tech in info['tech'])}
+
+## Data Flow Diagram
+
+### Statistics
+
+The Application consists of a total of {plural(info['t'], 'element')}:
+
+Element | Count
+-- | --
+Services | {info['s']}
+External Entities | {info['e']}
+Information Flows | {info['i']}
+Annotations | {info['a']}
+Total Items | {info['t']}
+
+### Diagram
+
+The below diagram is generated from the corresponding [model file]({model_id}.py).
+
+Formats:
+- [PlantUML Model]({model_id}/{model_id}.txt)
+- [SVG Vector Image]({model_id}/{model_id}.svg)
+- [PNG Raster Image]({model_id}/{model_id}.png)
+
+![Data Flow Diagram]({model_id}/{model_id}.svg)""")
+
+def write_root_readme(dataset: dict[str, Any]):
+    print(f"Writing main readme file")
+    with open('README.md', 'w', encoding="utf-8") as f:
+        f.write(f"""# Dataset of Dataflow Diagrams
+
+This repository contains of 17 manually created dataflow diagrams (DFDs) of microservice applications found on GitHub. The dataset is published as an additional contribution to "Automatic Extraction of Security-Rich Dataflow Diagrams for Microservice Applications written in Java" [Simon Schneider, Riccardo Scandariato]. Each folder in the [`dataset`](dataset/) directory contains one DFD in a [CodeableModels](https://github.com/uzdun/CodeableModels)-format that can be executed to generate PNG, SVG and TXT files for the DFD. Each model refers to stereotypes and metaclasses from the [metamodel](microservice_dfds_metamodel.py) which needs to be imported. This repository already contains rendered versions for each model, thus setup and rendering is only necessary once changes to the models are made.
+
+## Models
+
+Name | Source | LoC | Stars | Forks | DFD Items | Technologies
+-- | -- | -- | -- | -- | -- | --
+""")
+        for model_id, info in dataset.items():
+            f.write(f"[{info['slug']}](dataset/{model_id}/README.md) | [GitHub](https://github.com/{info['slug']}) | {info['l']} | {info['stars']} | {info['forks']} | {info['t']} | {len(info['tech'])}\n")
+
+def main():
+    dataset = open_dataset()
+    write_root_readme(dataset)
+    write_model_readmes(dataset)
+    save_dataset(dataset)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file