Compare commits
14 Commits
30ca3b6a08
...
main
Author | SHA1 | Date | |
---|---|---|---|
556035bdd9
|
|||
44bae3fa85
|
|||
62c96fcbf6
|
|||
b4f5e3ebd4
|
|||
ca6fee127c
|
|||
887fb71a27
|
|||
5f73765212
|
|||
54eeea998b
|
|||
edaacf8b0a
|
|||
15ed7a5c73
|
|||
e623b960a6
|
|||
94545afb28
|
|||
57829c67f5
|
|||
d3be8cbc3d
|
2
Dockerfile.local
Normal file
2
Dockerfile.local
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
FROM httpd:alpine
|
||||||
|
COPY dist/ /usr/local/apache2/htdocs/
|
71
build.py
Normal file
71
build.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
from shutil import rmtree
|
||||||
|
from subprocess import PIPE, run
|
||||||
|
from requests import post
|
||||||
|
|
||||||
|
|
||||||
|
def buildx(repository: str, tags: list[str], build_platforms: list[str], dockerfile: str | Path = "Dockerfile", build_args: dict[str, str] | None = None, directory: str = ".", push: bool = True, pull: bool = False, progress: str = "auto", write_command: bool = False):
|
||||||
|
if build_args is None:
|
||||||
|
build_args = dict()
|
||||||
|
labels = [f"{repository}:{tag}" for tag in tags]
|
||||||
|
command = list(filter(None, ["docker", "buildx", "build",
|
||||||
|
"--platform", ",".join(build_platforms),
|
||||||
|
*[t for (key, value) in build_args.items()
|
||||||
|
for t in ("--build-arg", f"{key}={value}")],
|
||||||
|
"--file", str(dockerfile),
|
||||||
|
*[t for label in labels for t in ("--tag", label)],
|
||||||
|
f"--progress={progress}",
|
||||||
|
"--pull" if pull else None,
|
||||||
|
"--push" if push else None,
|
||||||
|
directory]))
|
||||||
|
if write_command:
|
||||||
|
print(" ".join(command))
|
||||||
|
run(command, check=True)
|
||||||
|
|
||||||
|
def get_webhook():
|
||||||
|
try:
|
||||||
|
with open("portainer-webhook.txt", "r") as f:
|
||||||
|
return f.read().strip()
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def update_portainer_stack(webhook_id: str):
|
||||||
|
print("Updating portainer stack...")
|
||||||
|
resp = post(f"https://docker.cnml.de/api/stacks/webhooks/{webhook_id}")
|
||||||
|
if not resp.ok:
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
error = resp.json()
|
||||||
|
except Exception:
|
||||||
|
error = resp.content.decode()
|
||||||
|
raise Exception(error)
|
||||||
|
raise Exception(f"{error['message']} ({error['details']})")
|
||||||
|
except Exception as e:
|
||||||
|
print("Failed to update:", e)
|
||||||
|
else:
|
||||||
|
print("Stack successfully updated!")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
output_path = Path("dist")
|
||||||
|
if output_path.exists():
|
||||||
|
rmtree(output_path)
|
||||||
|
output_path.mkdir()
|
||||||
|
dockerfile = Path("Dockerfile.local").resolve()
|
||||||
|
run(["python", "createreadmes.py"], check=True)
|
||||||
|
run(["bundle.bat", "exec", "jekyll", "build", "--destination", output_path], check=True)
|
||||||
|
branch = run(["git", "branch", "--show-current"],
|
||||||
|
stdout=PIPE, check=True).stdout.decode().strip()
|
||||||
|
short_sha = run(["git", "rev-parse", "--short", "HEAD"],
|
||||||
|
stdout=PIPE, check=True).stdout.decode().strip()
|
||||||
|
tags = [branch, short_sha]
|
||||||
|
if branch == 'main':
|
||||||
|
default_branch_tag = "latest"
|
||||||
|
print(f"On default branch, also building {default_branch_tag} tag!")
|
||||||
|
tags.append(default_branch_tag)
|
||||||
|
platforms = ['linux/amd64', 'linux/arm/v6', 'linux/arm/v7',
|
||||||
|
'linux/arm64/v8', 'linux/386', 'linux/ppc64le', 'linux/s390x']
|
||||||
|
buildx("chenio/code2dfd", tags, platforms, dockerfile=dockerfile)
|
||||||
|
webhook_id = get_webhook()
|
||||||
|
if webhook_id is not None:
|
||||||
|
update_portainer_stack(webhook_id)
|
422
createreadmes.py
422
createreadmes.py
@ -1,9 +1,10 @@
|
|||||||
|
from io import StringIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import json
|
import json
|
||||||
import itertools
|
import itertools
|
||||||
import yaml
|
import yaml
|
||||||
import jsonschema
|
import jsonschema
|
||||||
from typing import Any, TypedDict
|
from typing import Any, Dict, List, Literal, NotRequired, Optional, TypedDict
|
||||||
import requests
|
import requests
|
||||||
try:
|
try:
|
||||||
from yachalk import chalk
|
from yachalk import chalk
|
||||||
@ -23,11 +24,168 @@ def error(msg: str) -> Exception:
|
|||||||
def warning(msg: str):
|
def warning(msg: str):
|
||||||
print(chalk.yellow(msg) if yachalk_imported else "Warning: {}".format(msg))
|
print(chalk.yellow(msg) if yachalk_imported else "Warning: {}".format(msg))
|
||||||
|
|
||||||
def open_dataset() -> dict[str, Any]:
|
class License(TypedDict):
|
||||||
|
key: str
|
||||||
|
name: str
|
||||||
|
spdx_id: str
|
||||||
|
url: str
|
||||||
|
node_id: str
|
||||||
|
|
||||||
|
|
||||||
|
class Permissions(TypedDict):
|
||||||
|
admin: bool
|
||||||
|
maintain: bool
|
||||||
|
push: bool
|
||||||
|
triage: bool
|
||||||
|
pull: bool
|
||||||
|
|
||||||
|
|
||||||
|
class Owner(TypedDict):
|
||||||
|
login: str
|
||||||
|
id: int
|
||||||
|
node_id: str
|
||||||
|
avatar_url: str
|
||||||
|
gravatar_id: str
|
||||||
|
url: str
|
||||||
|
html_url: str
|
||||||
|
followers_url: str
|
||||||
|
following_url: str
|
||||||
|
gists_url: str
|
||||||
|
starred_url: str
|
||||||
|
subscriptions_url: str
|
||||||
|
organizations_url: str
|
||||||
|
repos_url: str
|
||||||
|
events_url: str
|
||||||
|
received_events_url: str
|
||||||
|
type: str
|
||||||
|
site_admin: bool
|
||||||
|
name: NotRequired[str]
|
||||||
|
company: NotRequired[Optional[str]]
|
||||||
|
blog: NotRequired[str]
|
||||||
|
location: NotRequired[Optional[str]]
|
||||||
|
email: NotRequired[Optional[str]]
|
||||||
|
hireable: NotRequired[Optional[bool]]
|
||||||
|
bio: NotRequired[Optional[str]]
|
||||||
|
twitter_username: NotRequired[Optional[str]]
|
||||||
|
public_repos: NotRequired[int]
|
||||||
|
public_gists: NotRequired[int]
|
||||||
|
followers: NotRequired[int]
|
||||||
|
following: NotRequired[int]
|
||||||
|
created_at: NotRequired[str]
|
||||||
|
updated_at: NotRequired[str]
|
||||||
|
|
||||||
|
|
||||||
|
class GithubRepositoryInformation(TypedDict):
|
||||||
|
id: int
|
||||||
|
node_id: str
|
||||||
|
name: str
|
||||||
|
full_name: str
|
||||||
|
private: bool
|
||||||
|
owner: Owner
|
||||||
|
html_url: str
|
||||||
|
description: Optional[str]
|
||||||
|
fork: bool
|
||||||
|
url: str
|
||||||
|
forks_url: str
|
||||||
|
keys_url: str
|
||||||
|
collaborators_url: str
|
||||||
|
teams_url: str
|
||||||
|
hooks_url: str
|
||||||
|
issue_events_url: str
|
||||||
|
events_url: str
|
||||||
|
assignees_url: str
|
||||||
|
branches_url: str
|
||||||
|
tags_url: str
|
||||||
|
blobs_url: str
|
||||||
|
git_tags_url: str
|
||||||
|
git_refs_url: str
|
||||||
|
trees_url: str
|
||||||
|
statuses_url: str
|
||||||
|
languages_url: str
|
||||||
|
stargazers_url: str
|
||||||
|
contributors_url: str
|
||||||
|
subscribers_url: str
|
||||||
|
subscription_url: str
|
||||||
|
commits_url: str
|
||||||
|
git_commits_url: str
|
||||||
|
comments_url: str
|
||||||
|
issue_comment_url: str
|
||||||
|
contents_url: str
|
||||||
|
compare_url: str
|
||||||
|
merges_url: str
|
||||||
|
archive_url: str
|
||||||
|
downloads_url: str
|
||||||
|
issues_url: str
|
||||||
|
pulls_url: str
|
||||||
|
milestones_url: str
|
||||||
|
notifications_url: str
|
||||||
|
labels_url: str
|
||||||
|
releases_url: str
|
||||||
|
deployments_url: str
|
||||||
|
created_at: str
|
||||||
|
updated_at: str
|
||||||
|
pushed_at: str
|
||||||
|
git_url: str
|
||||||
|
ssh_url: str
|
||||||
|
clone_url: str
|
||||||
|
svn_url: str
|
||||||
|
homepage: Optional[str]
|
||||||
|
size: int
|
||||||
|
stargazers_count: int
|
||||||
|
watchers_count: int
|
||||||
|
language: str
|
||||||
|
has_issues: bool
|
||||||
|
has_projects: bool
|
||||||
|
has_downloads: bool
|
||||||
|
has_wiki: bool
|
||||||
|
has_pages: bool
|
||||||
|
forks_count: int
|
||||||
|
mirror_url: None
|
||||||
|
archived: bool
|
||||||
|
disabled: bool
|
||||||
|
open_issues_count: int
|
||||||
|
license: Optional[License]
|
||||||
|
allow_forking: bool
|
||||||
|
is_template: bool
|
||||||
|
web_commit_signoff_required: bool
|
||||||
|
topics: List[str]
|
||||||
|
visibility: str
|
||||||
|
forks: int
|
||||||
|
open_issues: int
|
||||||
|
watchers: int
|
||||||
|
default_branch: str
|
||||||
|
permissions: Permissions
|
||||||
|
temp_clone_token: str
|
||||||
|
organization: NotRequired[Owner]
|
||||||
|
network_count: int
|
||||||
|
subscribers_count: int
|
||||||
|
|
||||||
|
|
||||||
|
class ModelInformation(TypedDict):
|
||||||
|
title: NotRequired[str]
|
||||||
|
slug: str
|
||||||
|
branch: NotRequired[str]
|
||||||
|
data: GithubRepositoryInformation
|
||||||
|
owner: Owner
|
||||||
|
stars: int
|
||||||
|
forks: int
|
||||||
|
owner_name: str
|
||||||
|
owner_slug: str
|
||||||
|
s: int
|
||||||
|
e: int
|
||||||
|
i: int
|
||||||
|
a: int
|
||||||
|
t: int
|
||||||
|
l: int
|
||||||
|
tech: List[str]
|
||||||
|
|
||||||
|
Dataset = dict[str, ModelInformation]
|
||||||
|
|
||||||
|
def open_dataset() -> Dataset:
|
||||||
with open(dataset_info, 'r') as f:
|
with open(dataset_info, 'r') as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
def save_dataset(dataset: dict[str, Any]):
|
def save_dataset(dataset: Dataset):
|
||||||
with open(dataset_info, 'w') as f:
|
with open(dataset_info, 'w') as f:
|
||||||
json.dump(dataset, f, indent=4)
|
json.dump(dataset, f, indent=4)
|
||||||
|
|
||||||
@ -59,12 +217,16 @@ from typing import TypedDict
|
|||||||
|
|
||||||
class Artifact(TypedDict):
|
class Artifact(TypedDict):
|
||||||
file: str
|
file: str
|
||||||
lines: list[int]
|
lines: NotRequired[list[int]]
|
||||||
|
repository: NotRequired[str]
|
||||||
|
branch: NotRequired[str]
|
||||||
|
|
||||||
|
RuleStatus = Literal["disregarded", "observed", "not applicable", "unknown"]
|
||||||
|
|
||||||
class SecurityRule(TypedDict):
|
class SecurityRule(TypedDict):
|
||||||
status: str
|
status: RuleStatus
|
||||||
argument: str
|
argument: str | list[str]
|
||||||
artifacts: None | list[Artifact]
|
artifacts: NotRequired[list[Artifact]]
|
||||||
|
|
||||||
rule_schema = yaml.safe_load("""type: object
|
rule_schema = yaml.safe_load("""type: object
|
||||||
additionalProperties: no
|
additionalProperties: no
|
||||||
@ -80,20 +242,31 @@ properties:
|
|||||||
- not applicable
|
- not applicable
|
||||||
- unknown
|
- unknown
|
||||||
argument:
|
argument:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
type: string
|
type: string
|
||||||
artifacts:
|
artifacts:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
|
additionalProperties: no
|
||||||
|
required:
|
||||||
|
- file
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
file:
|
file:
|
||||||
type: string
|
type: string
|
||||||
|
repository:
|
||||||
|
type: string
|
||||||
|
branch:
|
||||||
|
type: string
|
||||||
lines:
|
lines:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: integer""")
|
type: integer""")
|
||||||
|
|
||||||
def check_security_rules(security_rules: dict[Any, Any] | None) -> dict[int, SecurityRule]:
|
def check_security_rules(model_id: str, security_rules: dict[Any, Any] | None) -> dict[int, SecurityRule]:
|
||||||
if security_rules is None:
|
if security_rules is None:
|
||||||
raise Exception("Security rules file is empty!")
|
raise Exception("Security rules file is empty!")
|
||||||
for n in range(1, 19):
|
for n in range(1, 19):
|
||||||
@ -103,12 +276,11 @@ def check_security_rules(security_rules: dict[Any, Any] | None) -> dict[int, Sec
|
|||||||
jsonschema.validate(rule, rule_schema)
|
jsonschema.validate(rule, rule_schema)
|
||||||
rule: SecurityRule
|
rule: SecurityRule
|
||||||
if rule["status"] == "unknown":
|
if rule["status"] == "unknown":
|
||||||
warning(f"Rule {n} is still unknown!")
|
warning(f"In model {model_id}: Rule {n} is still unknown!")
|
||||||
except jsonschema.ValidationError as e:
|
except jsonschema.ValidationError as e:
|
||||||
error("Security rule {n}: {msg} at $.{n}.{path}".format(n=n, msg=e.message, path=e.json_path))
|
|
||||||
warning("Not checking further rules!")
|
warning("Not checking further rules!")
|
||||||
break
|
raise Exception("Security rule {n}: {msg} at $.{n}.{path}".format(n=n, msg=e.message, path=e.json_path)) from e
|
||||||
return security_rules
|
return dict(sorted(security_rules.items()))
|
||||||
|
|
||||||
update_dataset = False
|
update_dataset = False
|
||||||
|
|
||||||
@ -118,11 +290,108 @@ def get_name(slug: str):
|
|||||||
def get_tag_slug(tag: str) -> str:
|
def get_tag_slug(tag: str) -> str:
|
||||||
return tag.lower().replace(' ', '_')
|
return tag.lower().replace(' ', '_')
|
||||||
|
|
||||||
def write_model_readmes(dataset: dict[str, Any]):
|
rule_names = {
|
||||||
|
1: "API Gateway",
|
||||||
|
2: "Mutual Authentication",
|
||||||
|
3: "Decoupled Authentication",
|
||||||
|
4: "Internal Identity Represenation",
|
||||||
|
5: "Authentication Token Validation",
|
||||||
|
6: "Login Rate Limiting",
|
||||||
|
7: "Edge Encryption",
|
||||||
|
8: "Internal Encryption",
|
||||||
|
9: "Central Logging Subsystem",
|
||||||
|
10: "Local Logging Agent",
|
||||||
|
11: "Log Sanitization",
|
||||||
|
12: "Log Message Broker",
|
||||||
|
13: "Circuit Breaker",
|
||||||
|
14: "Load Balancing",
|
||||||
|
15: "Service Mesh Usage Limits",
|
||||||
|
16: "Service Registry Deployment",
|
||||||
|
17: "Service Registry Validation",
|
||||||
|
18: "Secret Manager",
|
||||||
|
}
|
||||||
|
|
||||||
|
def artifact_to_string(info: ModelInformation, artifact: Artifact):
|
||||||
|
file = Path(artifact['file'])
|
||||||
|
filename = file.name
|
||||||
|
project_branch = info.get("branch", "master")
|
||||||
|
branch = artifact.get("branch", project_branch)
|
||||||
|
file_url = f"https://github.com/{artifact.get('repository', info['slug'])}/blob/{branch}/{artifact['file']}"
|
||||||
|
lines = artifact.get("lines")
|
||||||
|
if lines is None:
|
||||||
|
return f"- {filename}: [File]({file_url})"
|
||||||
|
return f"- {filename}: Line{'s'[:len(lines)^1]}: {', '.join(f'[{line}]({file_url}#L{line})' for line in lines)}"
|
||||||
|
|
||||||
|
|
||||||
|
def rule_to_string(info: ModelInformation, id: int, rule: SecurityRule | None):
|
||||||
|
if rule is None:
|
||||||
|
warning(f"Rule {id} is missing!")
|
||||||
|
return ""
|
||||||
|
argument = rule['argument']
|
||||||
|
argument = argument if isinstance(argument, str) else "".join(f"\n1. {arg}" for arg in argument)
|
||||||
|
text = f"""#### Rule {id}: {rule_names[id]} {{#rule{id:02}}}
|
||||||
|
|
||||||
|
This rule is {rule['status']}: {argument}"""
|
||||||
|
artifacts = rule.get("artifacts", [])
|
||||||
|
if len(artifacts) > 0:
|
||||||
|
text = text + f"""
|
||||||
|
|
||||||
|
Artifacts:
|
||||||
|
{chr(10).join(artifact_to_string(info, artifact) for artifact in artifacts)}"""
|
||||||
|
return text
|
||||||
|
|
||||||
|
def write_security_rules(info: ModelInformation, security_rules: dict[int, SecurityRule]):
|
||||||
|
icons: Dict[RuleStatus | str, str] = {
|
||||||
|
'disregarded': '<i class="fa fa-exclamation-circle" style="color: #d72b28;"></i>',
|
||||||
|
'observed': '<i class="fa fa-check-square-o" style="color: #6be16d;"></i>',
|
||||||
|
'not applicable': '<i class="fa fa-info-circle" style="color: #31708;"></i>',
|
||||||
|
'unknown': '<i class="fa fa-warning" style="color: #bfc600;"></i>',
|
||||||
|
}
|
||||||
|
return f"""## Security Rules
|
||||||
|
|
||||||
|
{" | ".join(f"R{i}" for i in range(1, 19))}
|
||||||
|
{" | ".join("--" for _ in range(1, 19))}
|
||||||
|
{" | ".join(f'<a href="#rule{i:02}">{icons[security_rules.get(i, {"status": "unknown"})["status"]]}</a>' for i in range(1, 19))}
|
||||||
|
|
||||||
|
### Authentication / Authorization
|
||||||
|
|
||||||
|
{(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(1, 7))}
|
||||||
|
|
||||||
|
### Encryption
|
||||||
|
|
||||||
|
{(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(7, 9))}
|
||||||
|
|
||||||
|
### Logging
|
||||||
|
|
||||||
|
{(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(9, 13))}
|
||||||
|
|
||||||
|
### Availability
|
||||||
|
|
||||||
|
{(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(13, 16))}
|
||||||
|
|
||||||
|
### Service Registry
|
||||||
|
|
||||||
|
{(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(16, 18))}
|
||||||
|
|
||||||
|
### Secret Management
|
||||||
|
|
||||||
|
{(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(18, 19))}"""
|
||||||
|
|
||||||
|
def write_file_if_changed(file: Path, content: str, encoding: str = "utf-8"):
|
||||||
|
old_content = None
|
||||||
|
if file.exists():
|
||||||
|
with file.open('r', encoding=encoding) as f:
|
||||||
|
old_content = f.read()
|
||||||
|
if old_content is None or old_content != content:
|
||||||
|
print(f"Writing changed file: {file}")
|
||||||
|
with file.open('w', encoding=encoding) as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
def write_model_readmes(dataset: Dataset):
|
||||||
for model_id, info in dataset.items():
|
for model_id, info in dataset.items():
|
||||||
dir = output_path / 'dataset'
|
dir = output_path / 'dataset'
|
||||||
readme = dir / f'{model_id}.md'
|
readme = dir / f'{model_id}.md'
|
||||||
slug: str = info['slug']
|
slug = info['slug']
|
||||||
data = info.get('data')
|
data = info.get('data')
|
||||||
if not data:
|
if not data:
|
||||||
data = get_repo(slug)
|
data = get_repo(slug)
|
||||||
@ -144,26 +413,27 @@ def write_model_readmes(dataset: dict[str, Any]):
|
|||||||
info['forks'] = forks
|
info['forks'] = forks
|
||||||
info['owner_name'] = owner_name
|
info['owner_name'] = owner_name
|
||||||
info['owner_slug'] = owner_slug
|
info['owner_slug'] = owner_slug
|
||||||
security_rules_file = dataset_path / model_id / 'security_rules.yaml'
|
model_path = dataset_path / model_id
|
||||||
|
security_rules_file = model_path / 'security_rules.yaml'
|
||||||
|
model_file = model_path / f"{model_id}.py"
|
||||||
|
with model_file.open("r") as f:
|
||||||
|
model = f.read()
|
||||||
|
security_rules = None
|
||||||
try:
|
try:
|
||||||
with open(security_rules_file, 'r') as f:
|
with security_rules_file.open('r') as f:
|
||||||
security_rules = yaml.safe_load(f)
|
security_rules = check_security_rules(model_id, yaml.safe_load(f))
|
||||||
security_rules = check_security_rules(security_rules)
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
warning("Security rules file not found at {}".format(security_rules_file))
|
warning("Security rules file not found at {}".format(security_rules_file))
|
||||||
security_rules = {}
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
warning("Security rules file at {} is invalid: {}".format(security_rules_file, e))
|
warning("Security rules file at {} is invalid: {}".format(security_rules_file, e))
|
||||||
security_rules = {}
|
|
||||||
print(f"Writing readme file {readme}")
|
|
||||||
dir.mkdir(exist_ok=True)
|
dir.mkdir(exist_ok=True)
|
||||||
with open(readme, 'w', encoding="utf-8") as f:
|
write_file_if_changed(readme, f"""---
|
||||||
f.write(f"""---
|
|
||||||
title: {slug}
|
title: {slug}
|
||||||
keywords: model TODO
|
keywords: model
|
||||||
tags: [{', '.join(get_tag_slug(tech) for tech in info['tech'])}]
|
tags: [{', '.join(get_tag_slug(tech) for tech in info['tech'])}]
|
||||||
sidebar: datasetdoc_sidebar
|
sidebar: datasetdoc_sidebar
|
||||||
permalink: {model_id}.html
|
permalink: {model_id}.html
|
||||||
|
toc: false
|
||||||
---
|
---
|
||||||
|
|
||||||
## Repository Information
|
## Repository Information
|
||||||
@ -174,10 +444,20 @@ Owner: [{owner_name}](https://github.com/{owner_slug})
|
|||||||
|
|
||||||
The repository has {plural(stars, 'star')} and was forked {plural(forks, 'time')}. The codebase consists of {plural(info['l'], 'line')} of code and makes use of the following technologies:
|
The repository has {plural(stars, 'star')} and was forked {plural(forks, 'time')}. The codebase consists of {plural(info['l'], 'line')} of code and makes use of the following technologies:
|
||||||
|
|
||||||
{chr(10).join(f'- {tech}' for tech in info['tech'])}
|
{chr(10).join(f'<a class="btn btn-primary" style="margin-bottom: 5px" role="button" href="tag_{get_tag_slug(tech)}.html">{tech}</a>' for tech in info['tech'])}
|
||||||
|
|
||||||
## Data Flow Diagram
|
## Data Flow Diagram
|
||||||
|
|
||||||
|
### DFD Model
|
||||||
|
|
||||||
|
{{% include note.html content="Download the [model file](../../dataset/{model_id}/{model_id}.py)" %}}
|
||||||
|
|
||||||
|
The images below were generated by executing the model file. The DFD is represented as a CodeableModels file.
|
||||||
|
|
||||||
|
```python
|
||||||
|
{model}
|
||||||
|
```
|
||||||
|
|
||||||
### Statistics
|
### Statistics
|
||||||
|
|
||||||
The Application consists of a total of {plural(info['t'], 'element')}:
|
The Application consists of a total of {plural(info['t'], 'element')}:
|
||||||
@ -192,32 +472,59 @@ Total Items | {info['t']}
|
|||||||
|
|
||||||
### Diagram
|
### Diagram
|
||||||
|
|
||||||
The below diagram is generated from the corresponding [model file](../../dataset/{model_id}/{model_id}.py).
|
|
||||||
|
|
||||||
Formats:
|
Formats:
|
||||||
- [PlantUML Model](../../dataset/{model_id}/{model_id}/{model_id}.txt)
|
- [PlantUML Model](../../dataset/{model_id}/{model_id}/{model_id}.txt)
|
||||||
- [SVG Vector Image](../../dataset/{model_id}/{model_id}/{model_id}.svg)
|
- [SVG Vector Image](../../dataset/{model_id}/{model_id}/{model_id}.svg)
|
||||||
- [PNG Raster Image](../../dataset/{model_id}/{model_id}/{model_id}.png)
|
- [PNG Raster Image](../../dataset/{model_id}/{model_id}/{model_id}.png)
|
||||||
|
|
||||||
""")
|

|
||||||
|
|
||||||
def write_root_readme(dataset: dict[str, Any]):
|
{"" if security_rules is None else write_security_rules(info, security_rules)}
|
||||||
print(f"Writing main readme file")
|
""")
|
||||||
with open('index.md', 'w', encoding="utf-8") as f:
|
|
||||||
f.write(f"""---
|
def write_root_readme(dataset: Dataset):
|
||||||
title: code2DFD Dataset
|
overview_dir = output_path / 'overview'
|
||||||
keywords: sample homepage
|
index_file = Path('index.md')
|
||||||
tags: [getting_started]
|
|
||||||
|
write_file_if_changed(index_file, f"""---
|
||||||
|
title: code2DFD Documentation
|
||||||
|
keywords: code2DFD introduction
|
||||||
|
tags: [overview]
|
||||||
sidebar: datasetdoc_sidebar
|
sidebar: datasetdoc_sidebar
|
||||||
permalink: index.html
|
permalink: index.html
|
||||||
summary: Dataset of dataflow diagrams of microservice applications.
|
toc: false
|
||||||
---
|
---
|
||||||
|
|
||||||
# Dataset of Dataflow Diagrams
|
## DaFD
|
||||||
|
|
||||||
This repository contains of {len(dataset)} manually created dataflow diagrams (DFDs) of microservice applications found on GitHub. The dataset is published as an additional contribution to "Automatic Extraction of Security-Rich Dataflow Diagrams for Microservice Applications written in Java" [Simon Schneider, Riccardo Scandariato]. Each folder in the [`dataset`](dataset/) directory contains one DFD in a [CodeableModels](https://github.com/uzdun/CodeableModels)-format that can be executed to generate PNG, SVG and TXT files for the DFD. Each model refers to stereotypes and metaclasses from the [metamodel](microservice_dfds_metamodel.py) which needs to be imported. This repository already contains rendered versions for each model, thus setup and rendering is only necessary once changes to the models are made.
|
{{% include image.html file="TUHH_logo-wortmarke_en_rgb.svg" alt="TUHH Logo" max-width="350" %}}
|
||||||
|
{{% include image.html file="company_logo_big.png" alt="SoftSec Institute Logo" max-width="350" %}}
|
||||||
|
|
||||||
## Models
|
This is DaFD, a dataset containing Dataflow Diagrams (DFDs) of microservices written in Java. The models correspond to actual implementation code of open-source applications found on GitHub.
|
||||||
|
The DFDs are presented in multiple formats and contain full traceability of all model items to code, indicating the evidence for their implementation. Additionally to the models themselves, we present a mapping to a list of 17 architectural security best-practices, i.e. a table indicating whether each rules is followed or not. For those that are not followed, we created model variants that do follow the rule. These variants were crafted purely on the model-level and the added items do not correspond to code anymore. All artifacts were created manually by researchers of the Institute of Software Security at Hamburg University of Technology.
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
- [Overview](index.html)
|
||||||
|
- [Dataflow Diagrams](dfds.html)
|
||||||
|
- [Use-Cases](usecases.html)
|
||||||
|
- [Models](models.html)
|
||||||
|
""")
|
||||||
|
|
||||||
|
models_file = overview_dir / 'models.md'
|
||||||
|
write_file_if_changed(models_file, f"""---
|
||||||
|
title: Models
|
||||||
|
keywords: dataset models
|
||||||
|
tags: [overview]
|
||||||
|
sidebar: datasetdoc_sidebar
|
||||||
|
permalink: models.html
|
||||||
|
summary: Dataset of dataflow diagrams of microservice applications.
|
||||||
|
datatable: true
|
||||||
|
---
|
||||||
|
|
||||||
|
The following table presents the models in this dataset. It shows some properties about their popularity and size of the models. Column `Source` links directly to the corresponding repository on GitHub. If you click on the name of an entry, you will be referred to the model and all artifacts.
|
||||||
|
|
||||||
|
Please select a model in column `Name`
|
||||||
|
|
||||||
<div class="datatable-begin"></div>
|
<div class="datatable-begin"></div>
|
||||||
|
|
||||||
@ -226,38 +533,39 @@ Name | Source | LoC | Stars | Forks | DFD Items | Technologies
|
|||||||
{chr(10).join(f"[{info['slug']}]({model_id}.html) | [GitHub](https://github.com/{info['slug']}) | {info['l']} | {info['stars']} | {info['forks']} | {info['t']} | {len(info['tech'])}" for model_id, info in dataset.items())}
|
{chr(10).join(f"[{info['slug']}]({model_id}.html) | [GitHub](https://github.com/{info['slug']}) | {info['l']} | {info['stars']} | {info['forks']} | {info['t']} | {len(info['tech'])}" for model_id, info in dataset.items())}
|
||||||
|
|
||||||
<div class="datatable-end"></div>
|
<div class="datatable-end"></div>
|
||||||
|
|
||||||
## DFD Items
|
|
||||||
|
|
||||||
Do culpa deserunt est excepteur amet. Non pariatur ea elit ad eiusmod veniam exercitation nulla. Commodo do adipisicing amet et. Voluptate laboris commodo dolor eu mollit ipsum. Amet reprehenderit velit eu culpa amet exercitation. Elit esse ullamco duis mollit quis. Eiusmod qui reprehenderit sunt cupidatat Lorem anim occaecat enim sint eiusmod tempor.
|
|
||||||
|
|
||||||
## Use-Cases
|
|
||||||
|
|
||||||
Veniam culpa nostrud id laborum deserunt consectetur consectetur voluptate. Sint aute cupidatat velit irure elit laboris anim labore esse labore. Quis ullamco ut consequat amet. Enim sit laboris deserunt veniam duis aliqua irure proident.
|
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def write_tag_readme(dataset: dict[str, Any]):
|
def write_tag_readme(dataset: Dataset):
|
||||||
tag_dir = output_path / 'tags'
|
tag_dir = output_path / 'tags'
|
||||||
known_tech = set(tech for model in dataset.values() for tech in model['tech'])
|
known_tech = set(tech for model in dataset.values() for tech in model['tech'])
|
||||||
print(f"Writing tag data file")
|
|
||||||
with open('_data/tags.yml', 'r+') as f:
|
tags_data_path = Path('_data')
|
||||||
tags = yaml.safe_load(f)
|
tags_data_file = tags_data_path / 'tags.yml'
|
||||||
tags['allowed-tags'] = list(sorted(set(itertools.chain(tags['allowed-tags'], (get_tag_slug(tech) for tech in known_tech)))))
|
if tags_data_file.exists():
|
||||||
f.seek(0)
|
tags_data_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
with tags_data_file.open('r') as f:
|
||||||
|
tags: dict[Any, Any] = yaml.safe_load(f)
|
||||||
|
else:
|
||||||
|
tags = {}
|
||||||
|
|
||||||
|
tags['allowed-tags'] = list(sorted(set(itertools.chain(tags.get('allowed-tags', []), (get_tag_slug(tech) for tech in known_tech)))))
|
||||||
|
|
||||||
|
with StringIO() as f:
|
||||||
yaml.dump(tags, f)
|
yaml.dump(tags, f)
|
||||||
f.truncate()
|
tags_content = f.getvalue()
|
||||||
|
write_file_if_changed(tags_data_file, tags_content)
|
||||||
|
|
||||||
for tech in known_tech:
|
for tech in known_tech:
|
||||||
slug = get_tag_slug(tech)
|
slug = get_tag_slug(tech)
|
||||||
info_file = tag_dir / f'tag_{slug}.md'
|
info_file = tag_dir / f'tag_{slug}.md'
|
||||||
print(f"Writing tag file for {tech}")
|
tag_dir.mkdir(exist_ok=True, parents=True)
|
||||||
with open(info_file, 'w', encoding="utf-8") as f:
|
write_file_if_changed(info_file, f"""---
|
||||||
f.write(f"""---
|
|
||||||
title: "{tech}"
|
title: "{tech}"
|
||||||
tagName: {slug}
|
tagName: {slug}
|
||||||
search: exclude
|
search: exclude
|
||||||
permalink: tag_{slug}.html
|
permalink: tag_{slug}.html
|
||||||
sidebar: datasetdoc_sidebar
|
sidebar: datasetdoc_sidebar
|
||||||
|
hide_sidebar: true
|
||||||
folder: tags
|
folder: tags
|
||||||
---
|
---
|
||||||
{{% include taglogic.html %}}
|
{{% include taglogic.html %}}
|
||||||
|
43
dataset.schema.json
Normal file
43
dataset.schema.json
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-04/schema",
|
||||||
|
"type": "object",
|
||||||
|
"patternProperties": {
|
||||||
|
".+": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"title": { "type": "string" },
|
||||||
|
"slug": { "type": "string" },
|
||||||
|
"stars": { "type": "integer" },
|
||||||
|
"forks": { "type": "integer" },
|
||||||
|
"data": { "type": "object" },
|
||||||
|
"owner": { "type": "object" },
|
||||||
|
"owner_name": { "type": "string" },
|
||||||
|
"owner_slug": { "type": "string" },
|
||||||
|
"s": { "type": "integer" },
|
||||||
|
"e": { "type": "integer" },
|
||||||
|
"i": { "type": "integer" },
|
||||||
|
"a": { "type": "integer" },
|
||||||
|
"t": { "type": "integer" },
|
||||||
|
"l": { "type": "integer" },
|
||||||
|
"tech": { "type": "array", "items": { "type": "string" } }
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"title",
|
||||||
|
"slug",
|
||||||
|
"stars",
|
||||||
|
"forks",
|
||||||
|
"data",
|
||||||
|
"owner",
|
||||||
|
"owner_name",
|
||||||
|
"owner_slug",
|
||||||
|
"s",
|
||||||
|
"e",
|
||||||
|
"i",
|
||||||
|
"a",
|
||||||
|
"t",
|
||||||
|
"l",
|
||||||
|
"tech"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
67
security_rules_schema.json
Normal file
67
security_rules_schema.json
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||||
|
"type": "object",
|
||||||
|
"patternProperties": {
|
||||||
|
"^\\d+$": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": false,
|
||||||
|
"properties": {
|
||||||
|
"status": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"disregarded",
|
||||||
|
"observed",
|
||||||
|
"not applicable",
|
||||||
|
"unknown"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"argument": {
|
||||||
|
"anyOf": [
|
||||||
|
{ "type": "string" },
|
||||||
|
{ "type": "array", "items": { "type": "string" } }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"artifacts": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": false,
|
||||||
|
"properties": {
|
||||||
|
"repository": { "type": "string" },
|
||||||
|
"branch": { "type": "string" },
|
||||||
|
"file": { "type": "string" },
|
||||||
|
"lines": { "type": "array", "items": { "type": "integer" } }
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"file"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"status",
|
||||||
|
"argument"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"1",
|
||||||
|
"2",
|
||||||
|
"3",
|
||||||
|
"4",
|
||||||
|
"5",
|
||||||
|
"6",
|
||||||
|
"7",
|
||||||
|
"8",
|
||||||
|
"9",
|
||||||
|
"10",
|
||||||
|
"11",
|
||||||
|
"12",
|
||||||
|
"13",
|
||||||
|
"14",
|
||||||
|
"15",
|
||||||
|
"16",
|
||||||
|
"17",
|
||||||
|
"18"
|
||||||
|
]
|
||||||
|
}
|
Reference in New Issue
Block a user