Added typing for Dataset

Added security rules on model pages Hide toc for long security rules Use datatable for dataset table
2022-11-23 13:00:38 +01:00
parent 57829c67f5
commit 94545afb28
1 changed files with 241 additions and 11 deletions
--- a/createreadmes.py
+++ b/createreadmes.py
@@ -3,7 +3,7 @@ import json
 import itertools
 import yaml
 import jsonschema
-from typing import Any, TypedDict
+from typing import Any, List, NotRequired, Optional, TypedDict
 import requests
 try:
    from yachalk import chalk
@@ -23,11 +23,168 @@ def error(msg: str) -> Exception:
 def warning(msg: str):
    print(chalk.yellow(msg) if yachalk_imported else "Warning: {}".format(msg))
-def open_dataset() -> dict[str, Any]:
+class License(TypedDict):
    key: str
    name: str
    spdx_id: str
    url: str
    node_id: str
 class Permissions(TypedDict):
    admin: bool
    maintain: bool
    push: bool
    triage: bool
    pull: bool
 class Owner(TypedDict):
    login: str
    id: int
    node_id: str
    avatar_url: str
    gravatar_id: str
    url: str
    html_url: str
    followers_url: str
    following_url: str
    gists_url: str
    starred_url: str
    subscriptions_url: str
    organizations_url: str
    repos_url: str
    events_url: str
    received_events_url: str
    type: str
    site_admin: bool
    name: NotRequired[str]
    company: NotRequired[Optional[str]]
    blog: NotRequired[str]
    location: NotRequired[Optional[str]]
    email: NotRequired[Optional[str]]
    hireable: NotRequired[Optional[bool]]
    bio: NotRequired[Optional[str]]
    twitter_username: NotRequired[Optional[str]]
    public_repos: NotRequired[int]
    public_gists: NotRequired[int]
    followers: NotRequired[int]
    following: NotRequired[int]
    created_at: NotRequired[str]
    updated_at: NotRequired[str]
 class GithubRepositoryInformation(TypedDict):
    id: int
    node_id: str
    name: str
    full_name: str
    private: bool
    owner: Owner
    html_url: str
    description: Optional[str]
    fork: bool
    url: str
    forks_url: str
    keys_url: str
    collaborators_url: str
    teams_url: str
    hooks_url: str
    issue_events_url: str
    events_url: str
    assignees_url: str
    branches_url: str
    tags_url: str
    blobs_url: str
    git_tags_url: str
    git_refs_url: str
    trees_url: str
    statuses_url: str
    languages_url: str
    stargazers_url: str
    contributors_url: str
    subscribers_url: str
    subscription_url: str
    commits_url: str
    git_commits_url: str
    comments_url: str
    issue_comment_url: str
    contents_url: str
    compare_url: str
    merges_url: str
    archive_url: str
    downloads_url: str
    issues_url: str
    pulls_url: str
    milestones_url: str
    notifications_url: str
    labels_url: str
    releases_url: str
    deployments_url: str
    created_at: str
    updated_at: str
    pushed_at: str
    git_url: str
    ssh_url: str
    clone_url: str
    svn_url: str
    homepage: Optional[str]
    size: int
    stargazers_count: int
    watchers_count: int
    language: str
    has_issues: bool
    has_projects: bool
    has_downloads: bool
    has_wiki: bool
    has_pages: bool
    forks_count: int
    mirror_url: None
    archived: bool
    disabled: bool
    open_issues_count: int
    license: Optional[License]
    allow_forking: bool
    is_template: bool
    web_commit_signoff_required: bool
    topics: List[str]
    visibility: str
    forks: int
    open_issues: int
    watchers: int
    default_branch: str
    permissions: Permissions
    temp_clone_token: str
    organization: NotRequired[Owner]
    network_count: int
    subscribers_count: int
 class ModelInformation(TypedDict):
    title: NotRequired[str]
    slug: str
    branch: NotRequired[str]
    data: GithubRepositoryInformation
    owner: Owner
    stars: int
    forks: int
    owner_name: str
    owner_slug: str
    s: int
    e: int
    i: int
    a: int
    t: int
    l: int
    tech: List[str]
 Dataset = dict[str, ModelInformation]
 def open_dataset() -> Dataset:
    with open(dataset_info, 'r') as f:
        return json.load(f)
-def save_dataset(dataset: dict[str, Any]):
+def save_dataset(dataset: Dataset):
    with open(dataset_info, 'w') as f:
        json.dump(dataset, f, indent=4)
@@ -64,7 +221,7 @@ class Artifact(TypedDict):
 class SecurityRule(TypedDict):
    status: str
    argument: str
-    artifacts: None | list[Artifact]
+    artifacts: NotRequired[list[Artifact]]
 rule_schema = yaml.safe_load("""type: object
 additionalProperties: no
@@ -105,10 +262,9 @@ def check_security_rules(security_rules: dict[Any, Any] | None) -> dict[int, Sec
            if rule["status"] == "unknown":
                warning(f"Rule {n} is still unknown!")
        except jsonschema.ValidationError as e:
            error("Security rule {n}: {msg} at $.{n}.{path}".format(n=n, msg=e.message, path=e.json_path))
            warning("Not checking further rules!")
-            break
+            raise Exception("Security rule {n}: {msg} at $.{n}.{path}".format(n=n, msg=e.message, path=e.json_path)) from e
-    return security_rules
+    return dict(sorted(security_rules.items()))
 update_dataset = False
@@ -118,11 +274,81 @@ def get_name(slug: str):
 def get_tag_slug(tag: str) -> str:
    return tag.lower().replace(' ', '_')
-def write_model_readmes(dataset: dict[str, Any]):
+rule_names = {
    1: "API Gateway",
    2: "Mutual Authentication",
    3: "Decoupled Authentication",
    4: "Internal Identity Represenation",
    5: "Authentication Token Validation",
    6: "Login Rate Limiting",
    7: "Edge Encryption",
    8: "Internal Encryption",
    9: "Central Logging Subsystem",
    10: "Local Logging Agent",
    11: "Log Sanitization",
    12: "Log Message Broker",
    13: "Circuit Breaker",
    14: "Load Balancing",
    15: "Service Mesh Usage Limits",
    16: "Service Registry Deployment",
    17: "Service Registry Validation",
    18: "Secret Manager",
 }
 def artifact_to_string(info: ModelInformation, artifact: Artifact):
    file = Path(artifact['file'])
    filename = file.name
    file_url = f"https://github.com/{info['slug']}/blob/{info.get('branch', 'master')}/{artifact['file']}"
    return f"- {filename}: Line{'s'[:len(artifact['lines'])^1]}: {', '.join(f'[{line}]({file_url}#L{line})' for line in artifact['lines'])}"
 def rule_to_string(info: ModelInformation, id: int, rule: SecurityRule | None):
    if rule is None:
        # warning(f"Rule {id} is missing!") # TODO Enable warning
        return ""
    text = f"""#### Rule {id}: {rule_names[id]}
 This rule is {rule['status']}: {rule['argument']}"""
    artifacts = rule.get("artifacts", [])
    if len(artifacts) > 0:
        text = text + f"""
 Artifacts:
 {chr(10).join(artifact_to_string(info, artifact) for artifact in artifacts)}"""
    return text
 def write_security_rules(info: ModelInformation, security_rules: dict[int, SecurityRule]):
    return f"""## Security Rules
 ### Authentication / Authorization
 {(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(1, 7))}
 ### Encryption
 {(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(7, 9))}
 ### Logging
 {(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(9, 13))}
 ### Availability
 {(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(13, 16))}
 ### Service Registry
 {(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(16, 18))}
 ### Secret Management
 {(chr(10)*2).join(rule_to_string(info, i, security_rules.get(i)) for i in range(18, 19))}"""
 def write_model_readmes(dataset: Dataset):
    for model_id, info in dataset.items():
        dir = output_path / 'dataset'
        readme = dir / f'{model_id}.md'
-        slug: str = info['slug']
+        slug = info['slug']
        data = info.get('data')
        if not data:
            data = get_repo(slug)
@@ -164,6 +390,7 @@ keywords: model TODO
 tags: [{', '.join(get_tag_slug(tech) for tech in info['tech'])}]
 sidebar: datasetdoc_sidebar
 permalink: {model_id}.html
 toc: false
 ---
 ## Repository Information
@@ -204,9 +431,11 @@ Formats:
 - [PNG Raster Image](../../dataset/{model_id}/{model_id}/{model_id}.png)
 ![Data Flow Diagram](../../dataset/{model_id}/{model_id}/{model_id}.svg)
 {write_security_rules(info, security_rules)}
 """)
-def write_root_readme(dataset: dict[str, Any]):
+def write_root_readme(dataset: Dataset):
    print(f"Writing main readme file")
    with open('index.md', 'w', encoding="utf-8") as f:
        f.write(f"""---
@@ -245,6 +474,7 @@ tags: []
 sidebar: datasetdoc_sidebar
 permalink: dataset.html
 summary: Dataset of dataflow diagrams of microservice applications.
 datatable: true
 ---
 # Dataset of Dataflow Diagrams
@@ -262,7 +492,7 @@ Name | Source | LoC | Stars | Forks | DFD Items | Technologies
 <div class="datatable-end"></div>
 """)
-def write_tag_readme(dataset: dict[str, Any]):
+def write_tag_readme(dataset: Dataset):
    tag_dir = output_path / 'tags'
    known_tech = set(tech for model in dataset.values() for tech in model['tech'])
    print(f"Writing tag data file")