From b6389f747fee7e8fcc0cc71d8fbacfdaa5b6a602 Mon Sep 17 00:00:00 2001
From: Michael Chen <git@cnml.de>
Date: Tue, 22 Nov 2022 16:30:07 +0100
Subject: [PATCH] Revision 9th of November

---
 createreadmes.py | 98 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 73 insertions(+), 25 deletions(-)

diff --git a/createreadmes.py b/createreadmes.py
index b9be0e3..4e20ed3 100644
--- a/createreadmes.py
+++ b/createreadmes.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 import json
+import itertools
 import yaml
 import jsonschema
 from typing import Any
@@ -11,22 +12,16 @@ except ModuleNotFoundError:
     yachalk_imported = False
 
 dataset_path = Path('dataset')
-dataset_info = dataset_path / 'dataset.json'
+output_path = Path('pages')
+dataset_info = dataset_path / Path('dataset.json')
 token = "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"
 
-def error(msg: str):
-    if yachalk_imported:
-        msg = chalk.red(msg)
-    else:
-        msg = "Error: {}".format(msg) 
-    print(msg)
+def error(msg: str) -> Exception:
+    print(chalk.red(msg) if yachalk_imported else "Error: {}".format(msg))
+    return Exception(msg)
 
 def warning(msg: str):
-    if yachalk_imported:
-        msg = chalk.yellow(msg)
-    else:
-        msg = "Warning: {}".format(msg) 
-    print(msg)
+    print(chalk.yellow(msg) if yachalk_imported else "Warning: {}".format(msg))
 
 def open_dataset() -> dict[str, Any]:
     with open(dataset_info, 'r') as f:
@@ -81,6 +76,7 @@ properties:
     type: string
     enum:
       - disregarded
+      - observed
       - not applicable
       - unknown
   argument:
@@ -114,10 +110,13 @@ update_dataset = False
 def get_name(slug: str):
     return slug[slug.find('/')+1:]
 
+def get_tag_slug(tag: str) -> str:
+    return tag.lower().replace(' ', '_')
+
 def write_model_readmes(dataset: dict[str, Any]):
     for model_id, info in dataset.items():
-        dir = dataset_path / model_id
-        readme = dir / 'README.md'
+        dir = output_path / 'dataset'
+        readme = dir / f'{model_id}.md'
         slug: str = info['slug']
         data = info.get('data')
         if not data:
@@ -149,8 +148,15 @@ def write_model_readmes(dataset: dict[str, Any]):
             warning("Security rules file not found at {}".format(security_rules_file))
             security_rules = {}
         print(f"Writing readme file {readme}")
+        dir.mkdir(exist_ok=True)
         with open(readme, 'w', encoding="utf-8") as f:
-            f.write(f"""# {slug}
+            f.write(f"""---
+title: {slug}
+keywords: model TODO
+tags: [{', '.join(get_tag_slug(tech) for tech in info['tech'])}]
+sidebar: datasetdoc_sidebar
+permalink: {model_id}.html
+---
 
 ## Repository Information
 
@@ -178,21 +184,30 @@ Total Items | {info['t']}
 
 ### Diagram
 
-The below diagram is generated from the corresponding [model file]({model_id}.py).
+The below diagram is generated from the corresponding [model file](/dataset/{model_id}/{model_id}.py).
 
 Formats:
-- [PlantUML Model]({model_id}/{model_id}.txt)
-- [SVG Vector Image]({model_id}/{model_id}.svg)
-- [PNG Raster Image]({model_id}/{model_id}.png)
+- [PlantUML Model](/dataset/{model_id}/{model_id}/{model_id}.txt)
+- [SVG Vector Image](/dataset/{model_id}/{model_id}/{model_id}.svg)
+- [PNG Raster Image](/dataset/{model_id}/{model_id}/{model_id}.png)
 
-![Data Flow Diagram]({model_id}/{model_id}.svg)""")
+![Data Flow Diagram](/dataset/{model_id}/{model_id}/{model_id}.svg)""")
 
 def write_root_readme(dataset: dict[str, Any]):
     print(f"Writing main readme file")
-    with open('README.md', 'w', encoding="utf-8") as f:
-        f.write(f"""# Dataset of Dataflow Diagrams
+    with open('index.md', 'w', encoding="utf-8") as f:
+        f.write(f"""---
+title: code2DFD Dataset
+keywords: sample homepage
+tags: [getting_started]
+sidebar: datasetdoc_sidebar
+permalink: index.html
+summary: Dataset of dataflow diagrams of microservice applications.
+---
 
-This repository contains of 17 manually created dataflow diagrams (DFDs) of microservice applications found on GitHub. The dataset is published as an additional contribution to "Automatic Extraction of Security-Rich Dataflow Diagrams for Microservice Applications written in Java" [Simon Schneider, Riccardo Scandariato]. Each folder in the [`dataset`](dataset/) directory contains one DFD in a [CodeableModels](https://github.com/uzdun/CodeableModels)-format that can be executed to generate PNG, SVG and TXT files for the DFD. Each model refers to stereotypes and metaclasses from the [metamodel](microservice_dfds_metamodel.py) which needs to be imported. This repository already contains rendered versions for each model, thus setup and rendering is only necessary once changes to the models are made.
+# Dataset of Dataflow Diagrams
+
+This repository contains of {len(dataset)} manually created dataflow diagrams (DFDs) of microservice applications found on GitHub. The dataset is published as an additional contribution to "Automatic Extraction of Security-Rich Dataflow Diagrams for Microservice Applications written in Java" [Simon Schneider, Riccardo Scandariato]. Each folder in the [`dataset`](dataset/) directory contains one DFD in a [CodeableModels](https://github.com/uzdun/CodeableModels)-format that can be executed to generate PNG, SVG and TXT files for the DFD. Each model refers to stereotypes and metaclasses from the [metamodel](microservice_dfds_metamodel.py) which needs to be imported. This repository already contains rendered versions for each model, thus setup and rendering is only necessary once changes to the models are made.
 
 ## Models
 
@@ -200,7 +215,7 @@ This repository contains of 17 manually created dataflow diagrams (DFDs) of micr
 
 Name | Source | LoC | Stars | Forks | DFD Items | Technologies
 -- | -- | -- | -- | -- | -- | --
-{chr(10).join(f"[{info['slug']}](dataset/{model_id}/README.md) | [GitHub](https://github.com/{info['slug']}) | {info['l']} | {info['stars']} | {info['forks']} | {info['t']} | {len(info['tech'])}" for model_id, info in dataset.items())}
+{chr(10).join(f"[{info['slug']}]({model_id}.html) | [GitHub](https://github.com/{info['slug']}) | {info['l']} | {info['stars']} | {info['forks']} | {info['t']} | {len(info['tech'])}" for model_id, info in dataset.items())}
 
 <div class="datatable-end"></div>
 
@@ -213,11 +228,44 @@ Do culpa deserunt est excepteur amet. Non pariatur ea elit ad eiusmod veniam exe
 Veniam culpa nostrud id laborum deserunt consectetur consectetur voluptate. Sint aute cupidatat velit irure elit laboris anim labore esse labore. Quis ullamco ut consequat amet. Enim sit laboris deserunt veniam duis aliqua irure proident.
 """)
 
+def write_tag_readme(dataset: dict[str, Any]):
+    tag_dir = output_path / 'tags'
+    known_tech = set(tech for model in dataset.values() for tech in model['tech'])
+    print(f"Writing tag data file")
+    with open('_data/tags.yml', 'r+') as f:
+        tags = yaml.safe_load(f)
+        tags['allowed-tags'] = list(sorted(set(itertools.chain(tags['allowed-tags'], (get_tag_slug(tech) for tech in known_tech)))))
+        f.seek(0)
+        yaml.dump(tags, f)
+        f.truncate()
+
+    for tech in known_tech:
+        slug = get_tag_slug(tech)
+        info_file = tag_dir / f'tag_{slug}.md'
+        print(f"Writing tag file for {tech}")
+        with open(info_file, 'w', encoding="utf-8") as f:
+            f.write(f"""---
+title: "{tech}"
+tagName: {slug}
+search: exclude
+permalink: tag_{slug}.html
+sidebar: datasetdoc_sidebar
+folder: tags
+---
+{{% include taglogic.html %}}
+
+{{% include links.html %}}
+""")
+
 def main():
+    global known_tags
     dataset = open_dataset()
+    write_tag_readme(dataset) 
     write_root_readme(dataset)
     write_model_readmes(dataset)
-    save_dataset(dataset)
+    if update_dataset:
+        save_dataset(dataset)
 
+yaml.dump
 if __name__ == '__main__':
     main()