shk-readme-generator/createreadmes.py

277 lines
9.2 KiB
Python
Raw Normal View History

2022-11-22 16:26:31 +01:00
from pathlib import Path
import json
2022-11-22 16:30:07 +01:00
import itertools
2022-11-22 16:27:23 +01:00
import yaml
import jsonschema
2022-11-22 16:26:31 +01:00
from typing import Any
import requests
2022-11-22 16:27:23 +01:00
try:
from yachalk import chalk
yachalk_imported = True
except ModuleNotFoundError:
yachalk_imported = False
2022-11-22 16:26:31 +01:00
dataset_path = Path('dataset')
2022-11-22 16:30:07 +01:00
output_path = Path('pages')
dataset_info = dataset_path / Path('dataset.json')
2022-11-22 16:26:31 +01:00
token = "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"
2022-11-22 16:30:07 +01:00
def error(msg: str) -> Exception:
print(chalk.red(msg) if yachalk_imported else "Error: {}".format(msg))
return Exception(msg)
2022-11-22 16:27:23 +01:00
def warning(msg: str):
2022-11-22 16:30:07 +01:00
print(chalk.yellow(msg) if yachalk_imported else "Warning: {}".format(msg))
2022-11-22 16:27:23 +01:00
2022-11-22 16:26:31 +01:00
def open_dataset() -> dict[str, Any]:
with open(dataset_info, 'r') as f:
return json.load(f)
def save_dataset(dataset: dict[str, Any]):
with open(dataset_info, 'w') as f:
json.dump(dataset, f, indent=4)
def get_json(uri: str):
print(uri)
resp = requests.get(url=uri, headers={"Authorization": f"Bearer {token}"})
print(resp)
if not resp.ok:
try:
2022-11-22 16:27:23 +01:00
resp_error = resp.json()['message']
2022-11-22 16:26:31 +01:00
except Exception:
2022-11-22 16:27:23 +01:00
resp_error = resp.text
raise Exception(f"Invalid response: {resp_error}")
2022-11-22 16:26:31 +01:00
return resp.json()
def get_repo(slug: str):
return get_json(f"https://api.github.com/repos/{slug}")
def get_user(name: str):
return get_json(f"https://api.github.com/users/{name}")
def get_file(slug: str, path: str):
return get_json(f"https://api.github.com/repos/{slug}/contents/{path}")
def plural(amount: int, name: str, plural: str = 's'):
return f"{amount} {name}{plural[:amount^1]}"
2022-11-22 16:27:23 +01:00
from typing import NamedTuple
class Artifact(NamedTuple):
file: str
lines: list[int]
class SecurityRule(NamedTuple):
status: str
argument: str
artifacts: None | list[Artifact]
rule_schema = yaml.safe_load("""type: object
additionalProperties: no
required:
- status
- argument
properties:
status:
type: string
enum:
- disregarded
2022-11-22 16:30:07 +01:00
- observed
2022-11-22 16:27:23 +01:00
- not applicable
- unknown
argument:
type: string
artifacts:
type: array
items:
type: object
properties:
file:
type: string
lines:
type: array
items:
type: integer""")
2022-11-22 16:30:44 +01:00
def check_security_rules(security_rules: dict[Any, Any] | None) -> dict[int, SecurityRule]:
if security_rules is None:
raise Exception("Security rules file is empty!")
2022-11-22 16:27:23 +01:00
for n in range(1, 19):
try:
rule = security_rules.get(n, None)
2022-11-22 16:30:44 +01:00
if rule is None: raise jsonschema.ValidationError('Rule {} is not evaluated'.format(n))
2022-11-22 16:27:23 +01:00
jsonschema.validate(rule, rule_schema)
except jsonschema.ValidationError as e:
error("Security rule {n}: {msg} at $.{n}.{path}".format(n=n, msg=e.message, path=e.json_path))
warning("Not checking further rules!")
break
return security_rules
2022-11-22 16:26:31 +01:00
update_dataset = False
def get_name(slug: str):
return slug[slug.find('/')+1:]
2022-11-22 16:30:07 +01:00
def get_tag_slug(tag: str) -> str:
return tag.lower().replace(' ', '_')
2022-11-22 16:26:31 +01:00
def write_model_readmes(dataset: dict[str, Any]):
for model_id, info in dataset.items():
2022-11-22 16:30:07 +01:00
dir = output_path / 'dataset'
readme = dir / f'{model_id}.md'
2022-11-22 16:26:31 +01:00
slug: str = info['slug']
data = info.get('data')
if not data:
data = get_repo(slug)
info['data'] = data
owner_url = data.get('owner', {}).get('url')
if not owner_url:
raise Exception(f'No owner in repo {slug}!')
owner = info.get('owner')
if not owner:
owner = get_json(owner_url)
info['owner'] = owner
owner_name = owner.get('name')
if not owner_name:
raise Exception(f'No owner name in repo {slug}!')
stars = data['stargazers_count']
forks = data['forks']
owner_slug = owner['login']
info['stars'] = stars
info['forks'] = forks
info['owner_name'] = owner_name
info['owner_slug'] = owner_slug
2022-11-22 16:30:44 +01:00
security_rules_file = dataset_path / model_id / 'security_rules.yaml'
2022-11-22 16:27:23 +01:00
try:
with open(security_rules_file, 'r') as f:
security_rules = yaml.safe_load(f)
security_rules = check_security_rules(security_rules)
except FileNotFoundError:
warning("Security rules file not found at {}".format(security_rules_file))
security_rules = {}
2022-11-22 16:30:44 +01:00
except Exception as e:
warning("Security rules file at {} is invalid: {}".format(security_rules_file, e))
security_rules = {}
2022-11-22 16:26:31 +01:00
print(f"Writing readme file {readme}")
2022-11-22 16:30:07 +01:00
dir.mkdir(exist_ok=True)
2022-11-22 16:26:31 +01:00
with open(readme, 'w', encoding="utf-8") as f:
2022-11-22 16:30:07 +01:00
f.write(f"""---
title: {slug}
keywords: model TODO
tags: [{', '.join(get_tag_slug(tech) for tech in info['tech'])}]
sidebar: datasetdoc_sidebar
permalink: {model_id}.html
---
2022-11-22 16:26:31 +01:00
## Repository Information
Repository: [GitHub](https://github.com/{slug})
Owner: [{owner_name}](https://github.com/{owner_slug})
The repository has {plural(stars, 'star')} and was forked {plural(forks, 'time')}. The codebase consists of {plural(info['l'], 'line')} of code and makes use of the following technologies:
{chr(10).join(f'- {tech}' for tech in info['tech'])}
## Data Flow Diagram
### Statistics
The Application consists of a total of {plural(info['t'], 'element')}:
Element | Count
-- | --
Services | {info['s']}
External Entities | {info['e']}
Information Flows | {info['i']}
Annotations | {info['a']}
Total Items | {info['t']}
### Diagram
2022-11-22 16:30:44 +01:00
The below diagram is generated from the corresponding [model file](../../dataset/{model_id}/{model_id}.py).
2022-11-22 16:26:31 +01:00
Formats:
2022-11-22 16:30:44 +01:00
- [PlantUML Model](../../dataset/{model_id}/{model_id}/{model_id}.txt)
- [SVG Vector Image](../../dataset/{model_id}/{model_id}/{model_id}.svg)
- [PNG Raster Image](../../dataset/{model_id}/{model_id}/{model_id}.png)
2022-11-22 16:26:31 +01:00
2022-11-22 16:30:44 +01:00
![Data Flow Diagram](../../dataset/{model_id}/{model_id}/{model_id}.svg)""")
2022-11-22 16:26:31 +01:00
def write_root_readme(dataset: dict[str, Any]):
print(f"Writing main readme file")
2022-11-22 16:30:07 +01:00
with open('index.md', 'w', encoding="utf-8") as f:
f.write(f"""---
title: code2DFD Dataset
keywords: sample homepage
tags: [getting_started]
sidebar: datasetdoc_sidebar
permalink: index.html
summary: Dataset of dataflow diagrams of microservice applications.
---
# Dataset of Dataflow Diagrams
2022-11-22 16:26:31 +01:00
2022-11-22 16:30:07 +01:00
This repository contains of {len(dataset)} manually created dataflow diagrams (DFDs) of microservice applications found on GitHub. The dataset is published as an additional contribution to "Automatic Extraction of Security-Rich Dataflow Diagrams for Microservice Applications written in Java" [Simon Schneider, Riccardo Scandariato]. Each folder in the [`dataset`](dataset/) directory contains one DFD in a [CodeableModels](https://github.com/uzdun/CodeableModels)-format that can be executed to generate PNG, SVG and TXT files for the DFD. Each model refers to stereotypes and metaclasses from the [metamodel](microservice_dfds_metamodel.py) which needs to be imported. This repository already contains rendered versions for each model, thus setup and rendering is only necessary once changes to the models are made.
2022-11-22 16:26:31 +01:00
## Models
2022-11-22 16:27:23 +01:00
<div class="datatable-begin"></div>
2022-11-22 16:26:31 +01:00
Name | Source | LoC | Stars | Forks | DFD Items | Technologies
-- | -- | -- | -- | -- | -- | --
2022-11-22 16:30:07 +01:00
{chr(10).join(f"[{info['slug']}]({model_id}.html) | [GitHub](https://github.com/{info['slug']}) | {info['l']} | {info['stars']} | {info['forks']} | {info['t']} | {len(info['tech'])}" for model_id, info in dataset.items())}
2022-11-22 16:27:23 +01:00
<div class="datatable-end"></div>
## DFD Items
Do culpa deserunt est excepteur amet. Non pariatur ea elit ad eiusmod veniam exercitation nulla. Commodo do adipisicing amet et. Voluptate laboris commodo dolor eu mollit ipsum. Amet reprehenderit velit eu culpa amet exercitation. Elit esse ullamco duis mollit quis. Eiusmod qui reprehenderit sunt cupidatat Lorem anim occaecat enim sint eiusmod tempor.
## Use-Cases
Veniam culpa nostrud id laborum deserunt consectetur consectetur voluptate. Sint aute cupidatat velit irure elit laboris anim labore esse labore. Quis ullamco ut consequat amet. Enim sit laboris deserunt veniam duis aliqua irure proident.
2022-11-22 16:26:31 +01:00
""")
2022-11-22 16:30:07 +01:00
def write_tag_readme(dataset: dict[str, Any]):
tag_dir = output_path / 'tags'
known_tech = set(tech for model in dataset.values() for tech in model['tech'])
print(f"Writing tag data file")
with open('_data/tags.yml', 'r+') as f:
tags = yaml.safe_load(f)
tags['allowed-tags'] = list(sorted(set(itertools.chain(tags['allowed-tags'], (get_tag_slug(tech) for tech in known_tech)))))
f.seek(0)
yaml.dump(tags, f)
f.truncate()
for tech in known_tech:
slug = get_tag_slug(tech)
info_file = tag_dir / f'tag_{slug}.md'
print(f"Writing tag file for {tech}")
with open(info_file, 'w', encoding="utf-8") as f:
f.write(f"""---
title: "{tech}"
tagName: {slug}
search: exclude
permalink: tag_{slug}.html
sidebar: datasetdoc_sidebar
folder: tags
---
{{% include taglogic.html %}}
{{% include links.html %}}
""")
2022-11-22 16:26:31 +01:00
def main():
2022-11-22 16:30:07 +01:00
global known_tags
2022-11-22 16:26:31 +01:00
dataset = open_dataset()
2022-11-22 16:30:07 +01:00
write_tag_readme(dataset)
2022-11-22 16:26:31 +01:00
write_root_readme(dataset)
write_model_readmes(dataset)
2022-11-22 16:30:07 +01:00
if update_dataset:
save_dataset(dataset)
2022-11-22 16:26:31 +01:00
2022-11-22 16:30:07 +01:00
yaml.dump
2022-11-22 16:26:31 +01:00
if __name__ == '__main__':
2022-11-22 16:29:30 +01:00
main()