Revision 9th of November

This commit is contained in:
Michael Chen 2022-11-22 16:30:07 +01:00
parent 4272165184
commit b6389f747f
Signed by: cnml
GPG Key ID: 5845BF3F82D5F629

View File

@ -1,5 +1,6 @@
from pathlib import Path from pathlib import Path
import json import json
import itertools
import yaml import yaml
import jsonschema import jsonschema
from typing import Any from typing import Any
@ -11,22 +12,16 @@ except ModuleNotFoundError:
yachalk_imported = False yachalk_imported = False
dataset_path = Path('dataset') dataset_path = Path('dataset')
dataset_info = dataset_path / 'dataset.json' output_path = Path('pages')
dataset_info = dataset_path / Path('dataset.json')
token = "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" token = "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"
def error(msg: str): def error(msg: str) -> Exception:
if yachalk_imported: print(chalk.red(msg) if yachalk_imported else "Error: {}".format(msg))
msg = chalk.red(msg) return Exception(msg)
else:
msg = "Error: {}".format(msg)
print(msg)
def warning(msg: str): def warning(msg: str):
if yachalk_imported: print(chalk.yellow(msg) if yachalk_imported else "Warning: {}".format(msg))
msg = chalk.yellow(msg)
else:
msg = "Warning: {}".format(msg)
print(msg)
def open_dataset() -> dict[str, Any]: def open_dataset() -> dict[str, Any]:
with open(dataset_info, 'r') as f: with open(dataset_info, 'r') as f:
@ -81,6 +76,7 @@ properties:
type: string type: string
enum: enum:
- disregarded - disregarded
- observed
- not applicable - not applicable
- unknown - unknown
argument: argument:
@ -114,10 +110,13 @@ update_dataset = False
def get_name(slug: str): def get_name(slug: str):
return slug[slug.find('/')+1:] return slug[slug.find('/')+1:]
def get_tag_slug(tag: str) -> str:
return tag.lower().replace(' ', '_')
def write_model_readmes(dataset: dict[str, Any]): def write_model_readmes(dataset: dict[str, Any]):
for model_id, info in dataset.items(): for model_id, info in dataset.items():
dir = dataset_path / model_id dir = output_path / 'dataset'
readme = dir / 'README.md' readme = dir / f'{model_id}.md'
slug: str = info['slug'] slug: str = info['slug']
data = info.get('data') data = info.get('data')
if not data: if not data:
@ -149,8 +148,15 @@ def write_model_readmes(dataset: dict[str, Any]):
warning("Security rules file not found at {}".format(security_rules_file)) warning("Security rules file not found at {}".format(security_rules_file))
security_rules = {} security_rules = {}
print(f"Writing readme file {readme}") print(f"Writing readme file {readme}")
dir.mkdir(exist_ok=True)
with open(readme, 'w', encoding="utf-8") as f: with open(readme, 'w', encoding="utf-8") as f:
f.write(f"""# {slug} f.write(f"""---
title: {slug}
keywords: model TODO
tags: [{', '.join(get_tag_slug(tech) for tech in info['tech'])}]
sidebar: datasetdoc_sidebar
permalink: {model_id}.html
---
## Repository Information ## Repository Information
@ -178,21 +184,30 @@ Total Items | {info['t']}
### Diagram ### Diagram
The below diagram is generated from the corresponding [model file]({model_id}.py). The below diagram is generated from the corresponding [model file](/dataset/{model_id}/{model_id}.py).
Formats: Formats:
- [PlantUML Model]({model_id}/{model_id}.txt) - [PlantUML Model](/dataset/{model_id}/{model_id}/{model_id}.txt)
- [SVG Vector Image]({model_id}/{model_id}.svg) - [SVG Vector Image](/dataset/{model_id}/{model_id}/{model_id}.svg)
- [PNG Raster Image]({model_id}/{model_id}.png) - [PNG Raster Image](/dataset/{model_id}/{model_id}/{model_id}.png)
![Data Flow Diagram]({model_id}/{model_id}.svg)""") ![Data Flow Diagram](/dataset/{model_id}/{model_id}/{model_id}.svg)""")
def write_root_readme(dataset: dict[str, Any]): def write_root_readme(dataset: dict[str, Any]):
print(f"Writing main readme file") print(f"Writing main readme file")
with open('README.md', 'w', encoding="utf-8") as f: with open('index.md', 'w', encoding="utf-8") as f:
f.write(f"""# Dataset of Dataflow Diagrams f.write(f"""---
title: code2DFD Dataset
keywords: sample homepage
tags: [getting_started]
sidebar: datasetdoc_sidebar
permalink: index.html
summary: Dataset of dataflow diagrams of microservice applications.
---
This repository contains of 17 manually created dataflow diagrams (DFDs) of microservice applications found on GitHub. The dataset is published as an additional contribution to "Automatic Extraction of Security-Rich Dataflow Diagrams for Microservice Applications written in Java" [Simon Schneider, Riccardo Scandariato]. Each folder in the [`dataset`](dataset/) directory contains one DFD in a [CodeableModels](https://github.com/uzdun/CodeableModels)-format that can be executed to generate PNG, SVG and TXT files for the DFD. Each model refers to stereotypes and metaclasses from the [metamodel](microservice_dfds_metamodel.py) which needs to be imported. This repository already contains rendered versions for each model, thus setup and rendering is only necessary once changes to the models are made. # Dataset of Dataflow Diagrams
This repository contains of {len(dataset)} manually created dataflow diagrams (DFDs) of microservice applications found on GitHub. The dataset is published as an additional contribution to "Automatic Extraction of Security-Rich Dataflow Diagrams for Microservice Applications written in Java" [Simon Schneider, Riccardo Scandariato]. Each folder in the [`dataset`](dataset/) directory contains one DFD in a [CodeableModels](https://github.com/uzdun/CodeableModels)-format that can be executed to generate PNG, SVG and TXT files for the DFD. Each model refers to stereotypes and metaclasses from the [metamodel](microservice_dfds_metamodel.py) which needs to be imported. This repository already contains rendered versions for each model, thus setup and rendering is only necessary once changes to the models are made.
## Models ## Models
@ -200,7 +215,7 @@ This repository contains of 17 manually created dataflow diagrams (DFDs) of micr
Name | Source | LoC | Stars | Forks | DFD Items | Technologies Name | Source | LoC | Stars | Forks | DFD Items | Technologies
-- | -- | -- | -- | -- | -- | -- -- | -- | -- | -- | -- | -- | --
{chr(10).join(f"[{info['slug']}](dataset/{model_id}/README.md) | [GitHub](https://github.com/{info['slug']}) | {info['l']} | {info['stars']} | {info['forks']} | {info['t']} | {len(info['tech'])}" for model_id, info in dataset.items())} {chr(10).join(f"[{info['slug']}]({model_id}.html) | [GitHub](https://github.com/{info['slug']}) | {info['l']} | {info['stars']} | {info['forks']} | {info['t']} | {len(info['tech'])}" for model_id, info in dataset.items())}
<div class="datatable-end"></div> <div class="datatable-end"></div>
@ -213,11 +228,44 @@ Do culpa deserunt est excepteur amet. Non pariatur ea elit ad eiusmod veniam exe
Veniam culpa nostrud id laborum deserunt consectetur consectetur voluptate. Sint aute cupidatat velit irure elit laboris anim labore esse labore. Quis ullamco ut consequat amet. Enim sit laboris deserunt veniam duis aliqua irure proident. Veniam culpa nostrud id laborum deserunt consectetur consectetur voluptate. Sint aute cupidatat velit irure elit laboris anim labore esse labore. Quis ullamco ut consequat amet. Enim sit laboris deserunt veniam duis aliqua irure proident.
""") """)
def write_tag_readme(dataset: dict[str, Any]):
tag_dir = output_path / 'tags'
known_tech = set(tech for model in dataset.values() for tech in model['tech'])
print(f"Writing tag data file")
with open('_data/tags.yml', 'r+') as f:
tags = yaml.safe_load(f)
tags['allowed-tags'] = list(sorted(set(itertools.chain(tags['allowed-tags'], (get_tag_slug(tech) for tech in known_tech)))))
f.seek(0)
yaml.dump(tags, f)
f.truncate()
for tech in known_tech:
slug = get_tag_slug(tech)
info_file = tag_dir / f'tag_{slug}.md'
print(f"Writing tag file for {tech}")
with open(info_file, 'w', encoding="utf-8") as f:
f.write(f"""---
title: "{tech}"
tagName: {slug}
search: exclude
permalink: tag_{slug}.html
sidebar: datasetdoc_sidebar
folder: tags
---
{{% include taglogic.html %}}
{{% include links.html %}}
""")
def main(): def main():
global known_tags
dataset = open_dataset() dataset = open_dataset()
write_tag_readme(dataset)
write_root_readme(dataset) write_root_readme(dataset)
write_model_readmes(dataset) write_model_readmes(dataset)
if update_dataset:
save_dataset(dataset) save_dataset(dataset)
yaml.dump
if __name__ == '__main__': if __name__ == '__main__':
main() main()