diff --git a/createreadmes.py b/createreadmes.py index 8baf2b1..63c7528 100644 --- a/createreadmes.py +++ b/createreadmes.py @@ -1,3 +1,4 @@ +from io import StringIO from pathlib import Path import json import itertools @@ -344,6 +345,16 @@ def write_security_rules(info: ModelInformation, security_rules: dict[int, Secur {(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(18, 19))}""" +def write_file_if_changed(file: Path, content: str, encoding: str = "utf-8"): + old_content = None + if file.exists(): + with file.open('r', encoding=encoding) as f: + old_content = f.read() + if old_content is None or old_content != content: + print(f"Writing changed file: {file}") + with file.open('w', encoding=encoding) as f: + f.write(content) + def write_model_readmes(dataset: Dataset): for model_id, info in dataset.items(): dir = output_path / 'dataset' @@ -383,10 +394,8 @@ def write_model_readmes(dataset: Dataset): warning("Security rules file not found at {}".format(security_rules_file)) except Exception as e: warning("Security rules file at {} is invalid: {}".format(security_rules_file, e)) - print(f"Writing readme file {readme}") dir.mkdir(exist_ok=True) - with readme.open('w', encoding="utf-8") as f: - f.write(f"""--- + write_file_if_changed(readme, f"""--- title: {slug} keywords: model TODO tags: [{', '.join(get_tag_slug(tech) for tech in info['tech'])}] @@ -445,9 +454,7 @@ def write_root_readme(dataset: Dataset): overview_dir = output_path / 'overview' index_file = Path('index.md') - print(f"Writing main readme file") - with index_file.open('w', encoding="utf-8") as f: - f.write(f"""--- + write_file_if_changed(index_file, f"""--- title: code2DFD Documentation keywords: code2DFD introduction tags: [overview] @@ -457,7 +464,7 @@ summary: Dataset of dataflow diagrams of microservice applications. toc: false --- -# DaFD +## DaFD {{% include image.html file="TUHH_logo-wortmarke_en_rgb.svg" alt="TUHH Logo" max-width="350" %}} {{% include image.html file="company_logo_big.png" alt="SoftSec Institute Logo" max-width="350" %}} @@ -465,13 +472,16 @@ toc: false This is DaFD, a dataset containing Dataflow Diagrams (DFDs) of microservices written in Java. The models correspond to actual implementation code of open-source applications found on GitHub. The DFDs are presented in multiple formats and contain full traceability of all model items to code, indicating the evidence for their implementation. Additionally to the models themselves, we present a mapping to a list of 17 architectural security best-practices, i.e. a table indicating whether each rules is followed or not. For those that are not followed, we created model variants that do follow the rule. These variants were crafted purely on the model-level and the added items do not correspond to code anymore. All artifacts were created manually by researchers of the Institute of Software Security at Hamburg University of Technology. -{{% include toc.html %}} +## Table of Contents + +- [Overview](index.html) +- [Dataflow Diagrams](dfds.html) +- [Use-Cases](usecases.html) +- [Models](models.html) """) models_file = overview_dir / 'models.md' - print(f"Writing models readme file") - with models_file.open('w', encoding="utf-8") as f: - f.write(f"""--- + write_file_if_changed(models_file, f"""--- title: Models keywords: dataset models tags: [overview] @@ -497,24 +507,28 @@ Name | Source | LoC | Stars | Forks | DFD Items | Technologies def write_tag_readme(dataset: Dataset): tag_dir = output_path / 'tags' known_tech = set(tech for model in dataset.values() for tech in model['tech']) - print(f"Writing tag data file") + tags_data_path = Path('_data') tags_data_file = tags_data_path / 'tags.yml' - tags_data_path.mkdir(exist_ok=True, parents=True) - with tags_data_file.open('r+') as f: - tags = yaml.safe_load(f) - tags['allowed-tags'] = list(sorted(set(itertools.chain(tags['allowed-tags'], (get_tag_slug(tech) for tech in known_tech))))) - f.seek(0) + if tags_data_file.exists(): + tags_data_path.mkdir(exist_ok=True, parents=True) + with tags_data_file.open('r') as f: + tags: dict[Any, Any] = yaml.safe_load(f) + else: + tags = {} + + tags['allowed-tags'] = list(sorted(set(itertools.chain(tags.get('allowed-tags', []), (get_tag_slug(tech) for tech in known_tech))))) + + with StringIO() as f: yaml.dump(tags, f) - f.truncate() + tags_content = f.getvalue() + write_file_if_changed(tags_data_file, tags_content) for tech in known_tech: slug = get_tag_slug(tech) info_file = tag_dir / f'tag_{slug}.md' - print(f"Writing tag file for {tech}") tag_dir.mkdir(exist_ok=True, parents=True) - with open(info_file, 'w', encoding="utf-8") as f: - f.write(f"""--- + write_file_if_changed(info_file, f"""--- title: "{tech}" tagName: {slug} search: exclude