shk-readme-generator/createreadmes.py

from pathlib import Path
import json
from typing import Any
import requests

dataset_path = Path('dataset')
dataset_info = dataset_path / 'dataset.json'
token = "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"

def open_dataset() -> dict[str, Any]:
    with open(dataset_info, 'r') as f:
        return json.load(f)

def save_dataset(dataset: dict[str, Any]):
    with open(dataset_info, 'w') as f:
        json.dump(dataset, f, indent=4)

def get_json(uri: str):
    print(uri)
    resp = requests.get(url=uri, headers={"Authorization": f"Bearer {token}"})
    print(resp)
    if not resp.ok:
        try:
            error = resp.json()['message']
        except Exception:
            error = resp.text
        raise Exception(f"Invalid response: {error}")
    return resp.json()

def get_repo(slug: str):
    return get_json(f"https://api.github.com/repos/{slug}")
    
def get_user(name: str):
    return get_json(f"https://api.github.com/users/{name}")

def get_file(slug: str, path: str):
    return get_json(f"https://api.github.com/repos/{slug}/contents/{path}")
    
def plural(amount: int, name: str, plural: str = 's'):
    return f"{amount} {name}{plural[:amount^1]}"

update_dataset = False

def get_name(slug: str):
    return slug[slug.find('/')+1:]

def write_model_readmes(dataset: dict[str, Any]):
    for model_id, info in dataset.items():
        dir = dataset_path / model_id
        readme = dir / 'README.md'
        slug: str = info['slug']
        data = info.get('data')
        if not data:
            data = get_repo(slug)
            info['data'] = data
        owner_url = data.get('owner', {}).get('url')
        if not owner_url:
            raise Exception(f'No owner in repo {slug}!')
        owner = info.get('owner')
        if not owner:
            owner = get_json(owner_url)
            info['owner'] = owner
        owner_name = owner.get('name')
        if not owner_name:
            raise Exception(f'No owner name in repo {slug}!')
        stars = data['stargazers_count']
        forks = data['forks']
        owner_slug = owner['login']
        info['stars'] = stars
        info['forks'] = forks
        info['owner_name'] = owner_name
        info['owner_slug'] = owner_slug
        print(f"Writing readme file {readme}")
        with open(readme, 'w', encoding="utf-8") as f:
            f.write(f"""# {slug}

## Repository Information

Repository: [GitHub](https://github.com/{slug})

Owner: [{owner_name}](https://github.com/{owner_slug})

The repository has {plural(stars, 'star')} and was forked {plural(forks, 'time')}. The codebase consists of {plural(info['l'], 'line')} of code and makes use of the following technologies:

{chr(10).join(f'- {tech}' for tech in info['tech'])}

## Data Flow Diagram

### Statistics

The Application consists of a total of {plural(info['t'], 'element')}:

Element | Count
-- | --
Services | {info['s']}
External Entities | {info['e']}
Information Flows | {info['i']}
Annotations | {info['a']}
Total Items | {info['t']}

### Diagram

The below diagram is generated from the corresponding [model file]({model_id}.py).

Formats:
- [PlantUML Model]({model_id}/{model_id}.txt)
- [SVG Vector Image]({model_id}/{model_id}.svg)
- [PNG Raster Image]({model_id}/{model_id}.png)

![Data Flow Diagram]({model_id}/{model_id}.svg)""")

def write_root_readme(dataset: dict[str, Any]):
    print(f"Writing main readme file")
    with open('README.md', 'w', encoding="utf-8") as f:
        f.write(f"""# Dataset of Dataflow Diagrams

This repository contains of 17 manually created dataflow diagrams (DFDs) of microservice applications found on GitHub. The dataset is published as an additional contribution to "Automatic Extraction of Security-Rich Dataflow Diagrams for Microservice Applications written in Java" [Simon Schneider, Riccardo Scandariato]. Each folder in the [`dataset`](dataset/) directory contains one DFD in a [CodeableModels](https://github.com/uzdun/CodeableModels)-format that can be executed to generate PNG, SVG and TXT files for the DFD. Each model refers to stereotypes and metaclasses from the [metamodel](microservice_dfds_metamodel.py) which needs to be imported. This repository already contains rendered versions for each model, thus setup and rendering is only necessary once changes to the models are made.

## Models

Name | Source | LoC | Stars | Forks | DFD Items | Technologies
-- | -- | -- | -- | -- | -- | --
""")
        for model_id, info in dataset.items():
            f.write(f"[{info['slug']}](dataset/{model_id}/README.md) | [GitHub](https://github.com/{info['slug']}) | {info['l']} | {info['stars']} | {info['forks']} | {info['t']} | {len(info['tech'])}\n")

def main():
    dataset = open_dataset()
    write_root_readme(dataset)
    write_model_readmes(dataset)
    save_dataset(dataset)

if __name__ == '__main__':
    main()
Revision 25th of October 2022-11-22 16:26:31 +01:00			`from pathlib import Path`
			`import json`
			`from typing import Any`
			`import requests`

			`dataset_path = Path('dataset')`
			`dataset_info = dataset_path / 'dataset.json'`
			`token = "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"`

			`def open_dataset() -> dict[str, Any]:`
			`with open(dataset_info, 'r') as f:`
			`return json.load(f)`

			`def save_dataset(dataset: dict[str, Any]):`
			`with open(dataset_info, 'w') as f:`
			`json.dump(dataset, f, indent=4)`

			`def get_json(uri: str):`
			`print(uri)`
			`resp = requests.get(url=uri, headers={"Authorization": f"Bearer {token}"})`
			`print(resp)`
			`if not resp.ok:`
			`try:`
			`error = resp.json()['message']`
			`except Exception:`
			`error = resp.text`
			`raise Exception(f"Invalid response: {error}")`
			`return resp.json()`

			`def get_repo(slug: str):`
			`return get_json(f"https://api.github.com/repos/{slug}")`

			`def get_user(name: str):`
			`return get_json(f"https://api.github.com/users/{name}")`

			`def get_file(slug: str, path: str):`
			`return get_json(f"https://api.github.com/repos/{slug}/contents/{path}")`

			`def plural(amount: int, name: str, plural: str = 's'):`
			`return f"{amount} {name}{plural[:amount^1]}"`

			`update_dataset = False`

			`def get_name(slug: str):`
			`return slug[slug.find('/')+1:]`

			`def write_model_readmes(dataset: dict[str, Any]):`
			`for model_id, info in dataset.items():`
			`dir = dataset_path / model_id`
			`readme = dir / 'README.md'`
			`slug: str = info['slug']`
			`data = info.get('data')`
			`if not data:`
			`data = get_repo(slug)`
			`info['data'] = data`
			`owner_url = data.get('owner', {}).get('url')`
			`if not owner_url:`
			`raise Exception(f'No owner in repo {slug}!')`
			`owner = info.get('owner')`
			`if not owner:`
			`owner = get_json(owner_url)`
			`info['owner'] = owner`
			`owner_name = owner.get('name')`
			`if not owner_name:`
			`raise Exception(f'No owner name in repo {slug}!')`
			`stars = data['stargazers_count']`
			`forks = data['forks']`
			`owner_slug = owner['login']`
			`info['stars'] = stars`
			`info['forks'] = forks`
			`info['owner_name'] = owner_name`
			`info['owner_slug'] = owner_slug`
			`print(f"Writing readme file {readme}")`
			`with open(readme, 'w', encoding="utf-8") as f:`
			`f.write(f"""# {slug}`

			`## Repository Information`

			`Repository: [GitHub](https://github.com/{slug})`

			`Owner: [{owner_name}](https://github.com/{owner_slug})`

			`The repository has {plural(stars, 'star')} and was forked {plural(forks, 'time')}. The codebase consists of {plural(info['l'], 'line')} of code and makes use of the following technologies:`

			`{chr(10).join(f'- {tech}' for tech in info['tech'])}`

			`## Data Flow Diagram`

			`### Statistics`

			`The Application consists of a total of {plural(info['t'], 'element')}:`

			`Element \| Count`
			`-- \| --`
			`Services \| {info['s']}`
			`External Entities \| {info['e']}`
			`Information Flows \| {info['i']}`
			`Annotations \| {info['a']}`
			`Total Items \| {info['t']}`

			`### Diagram`

			`The below diagram is generated from the corresponding [model file]({model_id}.py).`

			`Formats:`
			`- [PlantUML Model]({model_id}/{model_id}.txt)`
			`- [SVG Vector Image]({model_id}/{model_id}.svg)`
			`- [PNG Raster Image]({model_id}/{model_id}.png)`

			`![Data Flow Diagram]({model_id}/{model_id}.svg)""")`

			`def write_root_readme(dataset: dict[str, Any]):`
			`print(f"Writing main readme file")`
			`with open('README.md', 'w', encoding="utf-8") as f:`
			`f.write(f"""# Dataset of Dataflow Diagrams`

			This repository contains of 17 manually created dataflow diagrams (DFDs) of microservice applications found on GitHub. The dataset is published as an additional contribution to "Automatic Extraction of Security-Rich Dataflow Diagrams for Microservice Applications written in Java" [Simon Schneider, Riccardo Scandariato]. Each folder in the [`dataset`](dataset/) directory contains one DFD in a [CodeableModels](https://github.com/uzdun/CodeableModels)-format that can be executed to generate PNG, SVG and TXT files for the DFD. Each model refers to stereotypes and metaclasses from the [metamodel](microservice_dfds_metamodel.py) which needs to be imported. This repository already contains rendered versions for each model, thus setup and rendering is only necessary once changes to the models are made.

			`## Models`

			`Name \| Source \| LoC \| Stars \| Forks \| DFD Items \| Technologies`
			`-- \| -- \| -- \| -- \| -- \| -- \| --`
			`""")`
			`for model_id, info in dataset.items():`
			`f.write(f"[{info['slug']}](dataset/{model_id}/README.md) \| [GitHub](https://github.com/{info['slug']}) \| {info['l']} \| {info['stars']} \| {info['forks']} \| {info['t']} \| {len(info['tech'])}\n")`

			`def main():`
			`dataset = open_dataset()`
			`write_root_readme(dataset)`
			`write_model_readmes(dataset)`
			`save_dataset(dataset)`

			`if __name__ == '__main__':`
			`main()`