Build latest tag on main branch

clean old stuff from createreadmes
Added branch override for artifacts
2023-03-22 17:37:00 +01:00 · 2022-12-13 17:51:20 +01:00 · 2022-12-13 17:51:01 +01:00 · 2022-12-13 15:48:29 +01:00 · 2022-12-13 14:50:02 +01:00 · 2022-11-30 15:07:56 +01:00
5 changed files with 279 additions and 57 deletions
--- a/Dockerfile.local
+++ b/Dockerfile.local
@ -0,0 +1,2 @@
 FROM httpd:alpine
 COPY dist/ /usr/local/apache2/htdocs/
--- a/build.py
+++ b/build.py
@ -0,0 +1,71 @@
 from pathlib import Path
 from shutil import rmtree
 from subprocess import PIPE, run
 from requests import post
 def buildx(repository: str, tags: list[str], build_platforms: list[str], dockerfile: str | Path = "Dockerfile", build_args: dict[str, str] | None = None, directory: str = ".", push: bool = True, pull: bool = False, progress: str = "auto", write_command: bool = False):
    if build_args is None:
        build_args = dict()
    labels = [f"{repository}:{tag}" for tag in tags]
    command = list(filter(None, ["docker", "buildx", "build",
                                 "--platform", ",".join(build_platforms),
                                 *[t for (key, value) in build_args.items()
                                   for t in ("--build-arg", f"{key}={value}")],
                                 "--file", str(dockerfile),
                                 *[t for label in labels for t in ("--tag", label)],
                                 f"--progress={progress}",
                                 "--pull" if pull else None,
                                 "--push" if push else None,
                                 directory]))
    if write_command:
        print(" ".join(command))
    run(command, check=True)
 def get_webhook():
    try:
        with open("portainer-webhook.txt", "r") as f:
            return f.read().strip()
    except Exception:
        return None
 def update_portainer_stack(webhook_id: str):
    print("Updating portainer stack...")
    resp = post(f"https://docker.cnml.de/api/stacks/webhooks/{webhook_id}")
    if not resp.ok:
        try:
            try:
                error = resp.json()
            except Exception:
                error = resp.content.decode()
                raise Exception(error)
            raise Exception(f"{error['message']} ({error['details']})")
        except Exception as e:
            print("Failed to update:", e)
    else:
        print("Stack successfully updated!")
 if __name__ == '__main__':
    output_path = Path("dist")
    if output_path.exists():
        rmtree(output_path)
    output_path.mkdir()
    dockerfile = Path("Dockerfile.local").resolve()
    run(["python", "createreadmes.py"], check=True)
    run(["bundle.bat", "exec", "jekyll", "build", "--destination", output_path], check=True)
    branch = run(["git", "branch", "--show-current"],
                 stdout=PIPE, check=True).stdout.decode().strip()
    short_sha = run(["git", "rev-parse", "--short", "HEAD"],
                    stdout=PIPE, check=True).stdout.decode().strip()
    tags = [branch, short_sha]
    if branch == 'main':
        default_branch_tag = "latest"
        print(f"On default branch, also building {default_branch_tag} tag!")
        tags.append(default_branch_tag)
    platforms = ['linux/amd64', 'linux/arm/v6', 'linux/arm/v7',
                 'linux/arm64/v8', 'linux/386', 'linux/ppc64le', 'linux/s390x']
    buildx("chenio/code2dfd", tags, platforms, dockerfile=dockerfile)
    webhook_id = get_webhook()
    if webhook_id is not None:
        update_portainer_stack(webhook_id)
--- a/createreadmes.py
+++ b/createreadmes.py
@ -1,9 +1,10 @@
 from io import StringIO
 from pathlib import Path
 import json
 import itertools
 import yaml
 import jsonschema
-from typing import Any, List, NotRequired, Optional, TypedDict
+from typing import Any, Dict, List, Literal, NotRequired, Optional, TypedDict
 import requests
 try:
    from yachalk import chalk
@ -216,11 +217,15 @@ from typing import TypedDict
 class Artifact(TypedDict):
    file: str
-    lines: list[int]
+    lines: NotRequired[list[int]]
    repository: NotRequired[str]
    branch: NotRequired[str]
 RuleStatus = Literal["disregarded", "observed", "not applicable", "unknown"]
 class SecurityRule(TypedDict):
-    status: str
+    status: RuleStatus
-    argument: str
+    argument: str | list[str]
    artifacts: NotRequired[list[Artifact]]
 rule_schema = yaml.safe_load("""type: object
@ -237,20 +242,31 @@ properties:
      - not applicable
      - unknown
  argument:
    anyOf:
      - type: string
      - type: array
        items:
          type: string
  artifacts:
    type: array
    items:
      additionalProperties: no
      required:
        - file
      type: object
      properties:
        file:
          type: string
        repository:
          type: string
        branch:
          type: string
        lines:
          type: array
          items:
            type: integer""")
-def check_security_rules(security_rules: dict[Any, Any] | None) -> dict[int, SecurityRule]:
+def check_security_rules(model_id: str, security_rules: dict[Any, Any] | None) -> dict[int, SecurityRule]:
    if security_rules is None:
        raise Exception("Security rules file is empty!")
    for n in range(1, 19):
@ -260,7 +276,7 @@ def check_security_rules(security_rules: dict[Any, Any] | None) -> dict[int, Sec
            jsonschema.validate(rule, rule_schema)
            rule: SecurityRule
            if rule["status"] == "unknown":
-                warning(f"Rule {n} is still unknown!")
+                warning(f"In model {model_id}: Rule {n} is still unknown!")
        except jsonschema.ValidationError as e:
            warning("Not checking further rules!")
            raise Exception("Security rule {n}: {msg} at $.{n}.{path}".format(n=n, msg=e.message, path=e.json_path)) from e
@ -298,17 +314,24 @@ rule_names = {
 def artifact_to_string(info: ModelInformation, artifact: Artifact):
    file = Path(artifact['file'])
    filename = file.name
-    file_url = f"https://github.com/{info['slug']}/blob/{info.get('branch', 'master')}/{artifact['file']}"
+    project_branch = info.get("branch", "master")
-    return f"- {filename}: Line{'s'[:len(artifact['lines'])^1]}: {', '.join(f'[{line}]({file_url}#L{line})' for line in artifact['lines'])}"
+    branch = artifact.get("branch", project_branch)
    file_url = f"https://github.com/{artifact.get('repository', info['slug'])}/blob/{branch}/{artifact['file']}"
    lines = artifact.get("lines")
    if lines is None:
        return f"- {filename}: [File]({file_url})"
    return f"- {filename}: Line{'s'[:len(lines)^1]}: {', '.join(f'[{line}]({file_url}#L{line})' for line in lines)}"
 def rule_to_string(info: ModelInformation, id: int, rule: SecurityRule | None):
    if rule is None:
-        # warning(f"Rule {id} is missing!") # TODO Enable warning
+        warning(f"Rule {id} is missing!")
        return ""
-    text = f"""#### Rule {id}: {rule_names[id]}
+    argument = rule['argument']
    argument = argument if isinstance(argument, str) else "".join(f"\n1. {arg}" for arg in argument)
    text = f"""#### Rule {id}: {rule_names[id]} {{#rule{id:02}}}
-This rule is {rule['status']}: {rule['argument']}"""
+This rule is {rule['status']}: {argument}"""
    artifacts = rule.get("artifacts", [])
    if len(artifacts) > 0:
        text = text + f"""
@ -318,8 +341,18 @@ Artifacts:
    return text
 def write_security_rules(info: ModelInformation, security_rules: dict[int, SecurityRule]):
    icons: Dict[RuleStatus | str, str] = {
        'disregarded': '<i class="fa fa-exclamation-circle" style="color: #d72b28;"></i>',
        'observed': '<i class="fa fa-check-square-o" style="color: #6be16d;"></i>',
        'not applicable': '<i class="fa fa-info-circle" style="color: #31708;"></i>',
        'unknown': '<i class="fa fa-warning" style="color: #bfc600;"></i>',
    }
    return f"""## Security Rules
 {" | ".join(f"R{i}" for i in range(1, 19))}
 {" | ".join("--" for _ in range(1, 19))}
 {" | ".join(f'<a href="#rule{i:02}">{icons[security_rules.get(i, {"status": "unknown"})["status"]]}</a>' for i in range(1, 19))}
 ### Authentication / Authorization
 {(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(1, 7))}
@ -344,6 +377,16 @@ def write_security_rules(info: ModelInformation, security_rules: dict[int, Secur
 {(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(18, 19))}"""
 def write_file_if_changed(file: Path, content: str, encoding: str = "utf-8"):
    old_content = None
    if file.exists():
        with file.open('r', encoding=encoding) as f:
            old_content = f.read()
    if old_content is None or old_content != content:
        print(f"Writing changed file: {file}")
        with file.open('w', encoding=encoding) as f:
            f.write(content)
 def write_model_readmes(dataset: Dataset):
    for model_id, info in dataset.items():
        dir = output_path / 'dataset'
@ -375,22 +418,18 @@ def write_model_readmes(dataset: Dataset):
        model_file = model_path / f"{model_id}.py"
        with model_file.open("r") as f:
            model = f.read()
        security_rules = None
        try:
            with security_rules_file.open('r') as f:
-                security_rules = yaml.safe_load(f)
+                security_rules = check_security_rules(model_id, yaml.safe_load(f))
            security_rules = check_security_rules(security_rules)
        except FileNotFoundError:
            warning("Security rules file not found at {}".format(security_rules_file))
            security_rules = {}
        except Exception as e:
            warning("Security rules file at {} is invalid: {}".format(security_rules_file, e))
            security_rules = {}
        print(f"Writing readme file {readme}")
        dir.mkdir(exist_ok=True)
-        with readme.open('w', encoding="utf-8") as f:
+        write_file_if_changed(readme, f"""---
            f.write(f"""---
 title: {slug}
-keywords: model TODO
+keywords: model
 tags: [{', '.join(get_tag_slug(tech) for tech in info['tech'])}]
 sidebar: datasetdoc_sidebar
 permalink: {model_id}.html
@ -440,56 +479,52 @@ Formats:
 ![Data Flow Diagram](../../dataset/{model_id}/{model_id}/{model_id}.svg)
-{write_security_rules(info, security_rules)}
+{"" if security_rules is None else write_security_rules(info, security_rules)}
 """)
 def write_root_readme(dataset: Dataset):
-    print(f"Writing main readme file")
+    overview_dir = output_path / 'overview'
-    with open('index.md', 'w', encoding="utf-8") as f:
+    index_file = Path('index.md')
-        f.write(f"""---
+
    write_file_if_changed(index_file, f"""---
 title: code2DFD Documentation
 keywords: code2DFD introduction
-tags: []
+tags: [overview]
 sidebar: datasetdoc_sidebar
 permalink: index.html
-summary: Dataset of dataflow diagrams of microservice applications.
+toc: false
 ---
-# code2DFD
+## DaFD
 {{% include image.html file="TUHH_logo-wortmarke_en_rgb.svg" alt="TUHH Logo" max-width="350" %}}
 This project is developed by the Institute of Software Security at Hamburg University of Technology.
 {{% include image.html file="company_logo_big.png" alt="SoftSec Institute Logo" max-width="350" %}}
-This is a description. Duis proident aliqua laborum reprehenderit duis nostrud sint duis anim Lorem anim ut.
+This is DaFD, a dataset containing Dataflow Diagrams (DFDs) of microservices written in Java. The models correspond to actual implementation code of open-source applications found on GitHub.
 The DFDs are presented in multiple formats and contain full traceability of all model items to code, indicating the evidence for their implementation. Additionally to the models themselves, we present a mapping to a list of 17 architectural security best-practices, i.e. a table indicating whether each rules is followed or not. For those that are not followed, we created model variants that do follow the rule. These variants were crafted purely on the model-level and the added items do not correspond to code anymore. All artifacts were created manually by researchers of the Institute of Software Security at Hamburg University of Technology.
-## DFD Items
+## Table of Contents
-Do culpa deserunt est excepteur amet. Non pariatur ea elit ad eiusmod veniam exercitation nulla. Commodo do adipisicing amet et. Voluptate laboris commodo dolor eu mollit ipsum. Amet reprehenderit velit eu culpa amet exercitation. Elit esse ullamco duis mollit quis. Eiusmod qui reprehenderit sunt cupidatat Lorem anim occaecat enim sint eiusmod tempor.
+- [Overview](index.html)
-
+- [Dataflow Diagrams](dfds.html)
-## Use-Cases
+- [Use-Cases](usecases.html)
-
+- [Models](models.html)
 Veniam culpa nostrud id laborum deserunt consectetur consectetur voluptate. Sint aute cupidatat velit irure elit laboris anim labore esse labore. Quis ullamco ut consequat amet. Enim sit laboris deserunt veniam duis aliqua irure proident.
 """)
-    print(f"Writing models readme file")
+    
-    with open('dataset.md', 'w', encoding="utf-8") as f:
+    models_file = overview_dir / 'models.md'
-        f.write(f"""---
+    write_file_if_changed(models_file, f"""---
-title: code2DFD Dataset
+title: Models
 keywords: dataset models
-tags: []
+tags: [overview]
 sidebar: datasetdoc_sidebar
-permalink: dataset.html
+permalink: models.html
 summary: Dataset of dataflow diagrams of microservice applications.
 datatable: true
 ---
-# Dataset of Dataflow Diagrams
+The following table presents the models in this dataset. It shows some properties about their popularity and size of the models. Column `Source` links directly to the corresponding repository on GitHub. If you click on the name of an entry, you will be referred to the model and all artifacts.
-This repository contains of {len(dataset)} manually created dataflow diagrams (DFDs) of microservice applications found on GitHub. The dataset is published as an additional contribution to "Automatic Extraction of Security-Rich Dataflow Diagrams for Microservice Applications written in Java" [Simon Schneider, Riccardo Scandariato]. Each folder in the [`dataset`](dataset/) directory contains one DFD in a [CodeableModels](https://github.com/uzdun/CodeableModels)-format that can be executed to generate PNG, SVG and TXT files for the DFD. Each model refers to stereotypes and metaclasses from the [metamodel](microservice_dfds_metamodel.py) which needs to be imported. This repository already contains rendered versions for each model, thus setup and rendering is only necessary once changes to the models are made.
+Please select a model in column `Name`
 ## Models
 <div class="datatable-begin"></div>
@ -503,24 +538,28 @@ Name | Source | LoC | Stars | Forks | DFD Items | Technologies
 def write_tag_readme(dataset: Dataset):
    tag_dir = output_path / 'tags'
    known_tech = set(tech for model in dataset.values() for tech in model['tech'])
-    print(f"Writing tag data file")
+
    tags_data_path = Path('_data')
    tags_data_file = tags_data_path / 'tags.yml'
    if tags_data_file.exists():
        tags_data_path.mkdir(exist_ok=True, parents=True)
-    with tags_data_file.open('r+') as f:
+        with tags_data_file.open('r') as f:
-        tags = yaml.safe_load(f)
+            tags: dict[Any, Any] = yaml.safe_load(f)
-        tags['allowed-tags'] = list(sorted(set(itertools.chain(tags['allowed-tags'], (get_tag_slug(tech) for tech in known_tech)))))
+    else:
-        f.seek(0)
+        tags = {}
    tags['allowed-tags'] = list(sorted(set(itertools.chain(tags.get('allowed-tags', []), (get_tag_slug(tech) for tech in known_tech)))))
    with StringIO() as f:
        yaml.dump(tags, f)
-        f.truncate()
+        tags_content = f.getvalue()
    write_file_if_changed(tags_data_file, tags_content)
    for tech in known_tech:
        slug = get_tag_slug(tech)
        info_file = tag_dir / f'tag_{slug}.md'
        print(f"Writing tag file for {tech}")
        tag_dir.mkdir(exist_ok=True, parents=True)
-        with open(info_file, 'w', encoding="utf-8") as f:
+        write_file_if_changed(info_file, f"""---
            f.write(f"""---
 title: "{tech}"
 tagName: {slug}
 search: exclude
--- a/dataset.schema.json
+++ b/dataset.schema.json
@ -0,0 +1,43 @@
 {
    "$schema": "http://json-schema.org/draft-04/schema",
    "type": "object",
    "patternProperties": {
        ".+": {
            "type": "object",
            "properties": {
                "title": { "type": "string" },
                "slug": { "type": "string" },
                "stars": { "type": "integer" },
                "forks": { "type": "integer" },
                "data": { "type": "object" },
                "owner": { "type": "object" },
                "owner_name": { "type": "string" },
                "owner_slug": { "type": "string" },
                "s": { "type": "integer" },
                "e": { "type": "integer" },
                "i": { "type": "integer" },
                "a": { "type": "integer" },
                "t": { "type": "integer" },
                "l": { "type": "integer" },
                "tech": { "type": "array", "items": { "type": "string" } }
            },
            "required": [
                "title",
                "slug",
                "stars",
                "forks",
                "data",
                "owner",
                "owner_name",
                "owner_slug",
                "s",
                "e",
                "i",
                "a",
                "t",
                "l",
                "tech"
            ]
        }
    }
 }
--- a/security_rules_schema.json
+++ b/security_rules_schema.json
@ -0,0 +1,67 @@
 {
  "$schema": "http://json-schema.org/draft-04/schema#",
  "type": "object",
  "patternProperties": {
    "^\\d+$": {
      "type": "object",
      "additionalProperties": false,
      "properties": {
        "status": {
          "type": "string",
          "enum": [
            "disregarded",
            "observed",
            "not applicable",
            "unknown"
          ]
        },
        "argument": {
          "anyOf": [
            { "type": "string" },
            { "type": "array", "items": { "type": "string" } }
          ]
        },
        "artifacts": {
          "type": "array",
          "items": {
            "type": "object",
            "additionalProperties": false,
            "properties": {
              "repository": { "type": "string" },
              "branch": { "type": "string" },
              "file": { "type": "string" },
              "lines": { "type": "array", "items": { "type": "integer" } }
            },
            "required": [
              "file"
            ]
          }
        }
      },
      "required": [
        "status",
        "argument"
      ]
    }
  },
  "required": [
    "1",
    "2",
    "3",
    "4",
    "5",
    "6",
    "7",
    "8",
    "9",
    "10",
    "11",
    "12",
    "13",
    "14",
    "15",
    "16",
    "17",
    "18"
  ]
 }
Author	SHA1	Message	Date
Michael Chen	556035bdd9	Build latest tag on main branch clean old stuff from createreadmes	2023-03-22 17:37:00 +01:00
Michael Chen	44bae3fa85	Added branch override for artifacts	2022-12-13 17:51:20 +01:00
Michael Chen	62c96fcbf6	Added rule icons	2022-12-13 17:51:01 +01:00
Michael Chen	b4f5e3ebd4	Added support for external repository file links	2022-12-13 15:48:29 +01:00
Michael Chen	ca6fee127c	Added argument can now be a list of arguments for list formatting Artifact lines can now be omitted to show entire file	2022-12-13 14:50:02 +01:00
Michael Chen	887fb71a27	Added index page table of contents write files only if changed (less verbose)	2022-11-30 15:07:56 +01:00
Michael Chen	5f73765212	Write security rules only if they exist. Updated content for overview readmes	2022-11-30 14:31:32 +01:00
Michael Chen	54eeea998b	Added portainer webhook for deployment	2022-11-23 14:29:28 +01:00
Michael Chen	edaacf8b0a	Added docker build script with local pre-build stage	2022-11-23 14:29:09 +01:00
Michael Chen	15ed7a5c73	Added schemas for datasets	2022-11-23 14:10:20 +01:00
		`@ -0,0 +1,2 @@`
							`FROM httpd:alpine`
							`COPY dist/ /usr/local/apache2/htdocs/`