2022-11-22 16:26:31 +01:00
from pathlib import Path
import json
2022-11-22 16:30:07 +01:00
import itertools
2022-11-22 16:27:23 +01:00
import yaml
import jsonschema
2022-11-22 16:31:20 +01:00
from typing import Any , TypedDict
2022-11-22 16:26:31 +01:00
import requests
2022-11-22 16:27:23 +01:00
try :
from yachalk import chalk
yachalk_imported = True
except ModuleNotFoundError :
yachalk_imported = False
2022-11-22 16:26:31 +01:00
dataset_path = Path ( ' dataset ' )
2022-11-22 16:30:07 +01:00
output_path = Path ( ' pages ' )
dataset_info = dataset_path / Path ( ' dataset.json ' )
2022-11-22 16:26:31 +01:00
token = " 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 "
2022-11-22 16:30:07 +01:00
def error ( msg : str ) - > Exception :
print ( chalk . red ( msg ) if yachalk_imported else " Error: {} " . format ( msg ) )
return Exception ( msg )
2022-11-22 16:27:23 +01:00
def warning ( msg : str ) :
2022-11-22 16:30:07 +01:00
print ( chalk . yellow ( msg ) if yachalk_imported else " Warning: {} " . format ( msg ) )
2022-11-22 16:27:23 +01:00
2022-11-22 16:26:31 +01:00
def open_dataset ( ) - > dict [ str , Any ] :
with open ( dataset_info , ' r ' ) as f :
return json . load ( f )
def save_dataset ( dataset : dict [ str , Any ] ) :
with open ( dataset_info , ' w ' ) as f :
json . dump ( dataset , f , indent = 4 )
def get_json ( uri : str ) :
print ( uri )
resp = requests . get ( url = uri , headers = { " Authorization " : f " Bearer { token } " } )
print ( resp )
if not resp . ok :
try :
2022-11-22 16:27:23 +01:00
resp_error = resp . json ( ) [ ' message ' ]
2022-11-22 16:26:31 +01:00
except Exception :
2022-11-22 16:27:23 +01:00
resp_error = resp . text
raise Exception ( f " Invalid response: { resp_error } " )
2022-11-22 16:26:31 +01:00
return resp . json ( )
def get_repo ( slug : str ) :
return get_json ( f " https://api.github.com/repos/ { slug } " )
def get_user ( name : str ) :
return get_json ( f " https://api.github.com/users/ { name } " )
def get_file ( slug : str , path : str ) :
return get_json ( f " https://api.github.com/repos/ { slug } /contents/ { path } " )
def plural ( amount : int , name : str , plural : str = ' s ' ) :
return f " { amount } { name } { plural [ : amount ^ 1 ] } "
2022-11-22 16:31:20 +01:00
from typing import TypedDict
2022-11-22 16:27:23 +01:00
2022-11-22 16:31:20 +01:00
class Artifact ( TypedDict ) :
2022-11-22 16:27:23 +01:00
file : str
lines : list [ int ]
2022-11-22 16:31:20 +01:00
class SecurityRule ( TypedDict ) :
2022-11-22 16:27:23 +01:00
status : str
argument : str
artifacts : None | list [ Artifact ]
rule_schema = yaml . safe_load ( """ type: object
additionalProperties : no
required :
- status
- argument
properties :
status :
type : string
enum :
- disregarded
2022-11-22 16:30:07 +01:00
- observed
2022-11-22 16:27:23 +01:00
- not applicable
- unknown
argument :
type : string
artifacts :
type : array
items :
type : object
properties :
file :
type : string
lines :
type : array
items :
type : integer """ )
2022-11-22 16:30:44 +01:00
def check_security_rules ( security_rules : dict [ Any , Any ] | None ) - > dict [ int , SecurityRule ] :
if security_rules is None :
raise Exception ( " Security rules file is empty! " )
2022-11-22 16:27:23 +01:00
for n in range ( 1 , 19 ) :
try :
rule = security_rules . get ( n , None )
2022-11-22 16:31:20 +01:00
if rule is None : raise jsonschema . ValidationError ( f " Rule { n } is not evaluated " )
2022-11-22 16:27:23 +01:00
jsonschema . validate ( rule , rule_schema )
2022-11-22 16:31:20 +01:00
rule : SecurityRule
if rule [ " status " ] == " unknown " :
warning ( f " Rule { n } is still unknown! " )
2022-11-22 16:27:23 +01:00
except jsonschema . ValidationError as e :
error ( " Security rule {n} : {msg} at $. {n} . {path} " . format ( n = n , msg = e . message , path = e . json_path ) )
warning ( " Not checking further rules! " )
break
return security_rules
2022-11-22 16:26:31 +01:00
update_dataset = False
def get_name ( slug : str ) :
return slug [ slug . find ( ' / ' ) + 1 : ]
2022-11-22 16:30:07 +01:00
def get_tag_slug ( tag : str ) - > str :
return tag . lower ( ) . replace ( ' ' , ' _ ' )
2022-11-22 16:26:31 +01:00
def write_model_readmes ( dataset : dict [ str , Any ] ) :
for model_id , info in dataset . items ( ) :
2022-11-22 16:30:07 +01:00
dir = output_path / ' dataset '
readme = dir / f ' { model_id } .md '
2022-11-22 16:26:31 +01:00
slug : str = info [ ' slug ' ]
data = info . get ( ' data ' )
if not data :
data = get_repo ( slug )
info [ ' data ' ] = data
owner_url = data . get ( ' owner ' , { } ) . get ( ' url ' )
if not owner_url :
raise Exception ( f ' No owner in repo { slug } ! ' )
owner = info . get ( ' owner ' )
if not owner :
owner = get_json ( owner_url )
info [ ' owner ' ] = owner
owner_name = owner . get ( ' name ' )
if not owner_name :
raise Exception ( f ' No owner name in repo { slug } ! ' )
stars = data [ ' stargazers_count ' ]
forks = data [ ' forks ' ]
owner_slug = owner [ ' login ' ]
info [ ' stars ' ] = stars
info [ ' forks ' ] = forks
info [ ' owner_name ' ] = owner_name
info [ ' owner_slug ' ] = owner_slug
2022-11-22 16:30:44 +01:00
security_rules_file = dataset_path / model_id / ' security_rules.yaml '
2022-11-22 16:27:23 +01:00
try :
with open ( security_rules_file , ' r ' ) as f :
security_rules = yaml . safe_load ( f )
security_rules = check_security_rules ( security_rules )
except FileNotFoundError :
warning ( " Security rules file not found at {} " . format ( security_rules_file ) )
security_rules = { }
2022-11-22 16:30:44 +01:00
except Exception as e :
warning ( " Security rules file at {} is invalid: {} " . format ( security_rules_file , e ) )
security_rules = { }
2022-11-22 16:26:31 +01:00
print ( f " Writing readme file { readme } " )
2022-11-22 16:30:07 +01:00
dir . mkdir ( exist_ok = True )
2022-11-22 16:26:31 +01:00
with open ( readme , ' w ' , encoding = " utf-8 " ) as f :
2022-11-22 16:30:07 +01:00
f . write ( f """ ---
title : { slug }
keywords : model TODO
tags : [ { ' , ' . join ( get_tag_slug ( tech ) for tech in info [ ' tech ' ] ) } ]
sidebar : datasetdoc_sidebar
permalink : { model_id } . html
- - -
2022-11-22 16:26:31 +01:00
## Repository Information
Repository : [ GitHub ] ( https : / / github . com / { slug } )
Owner : [ { owner_name } ] ( https : / / github . com / { owner_slug } )
The repository has { plural ( stars , ' star ' ) } and was forked { plural ( forks , ' time ' ) } . The codebase consists of { plural ( info [ ' l ' ] , ' line ' ) } of code and makes use of the following technologies :
{ chr ( 10 ) . join ( f ' - { tech } ' for tech in info [ ' tech ' ] ) }
## Data Flow Diagram
### Statistics
The Application consists of a total of { plural ( info [ ' t ' ] , ' element ' ) } :
Element | Count
- - | - -
Services | { info [ ' s ' ] }
External Entities | { info [ ' e ' ] }
Information Flows | { info [ ' i ' ] }
Annotations | { info [ ' a ' ] }
Total Items | { info [ ' t ' ] }
### Diagram
2022-11-22 16:30:44 +01:00
The below diagram is generated from the corresponding [ model file ] ( . . / . . / dataset / { model_id } / { model_id } . py ) .
2022-11-22 16:26:31 +01:00
Formats :
2022-11-22 16:30:44 +01:00
- [ PlantUML Model ] ( . . / . . / dataset / { model_id } / { model_id } / { model_id } . txt )
- [ SVG Vector Image ] ( . . / . . / dataset / { model_id } / { model_id } / { model_id } . svg )
- [ PNG Raster Image ] ( . . / . . / dataset / { model_id } / { model_id } / { model_id } . png )
2022-11-22 16:26:31 +01:00
2022-11-22 16:30:44 +01:00
! [ Data Flow Diagram ] ( . . / . . / dataset / { model_id } / { model_id } / { model_id } . svg ) """ )
2022-11-22 16:26:31 +01:00
def write_root_readme ( dataset : dict [ str , Any ] ) :
print ( f " Writing main readme file " )
2022-11-22 16:30:07 +01:00
with open ( ' index.md ' , ' w ' , encoding = " utf-8 " ) as f :
f . write ( f """ ---
title : code2DFD Dataset
keywords : sample homepage
tags : [ getting_started ]
sidebar : datasetdoc_sidebar
permalink : index . html
summary : Dataset of dataflow diagrams of microservice applications .
- - -
# Dataset of Dataflow Diagrams
2022-11-22 16:26:31 +01:00
2022-11-22 16:30:07 +01:00
This repository contains of { len ( dataset ) } manually created dataflow diagrams ( DFDs ) of microservice applications found on GitHub . The dataset is published as an additional contribution to " Automatic Extraction of Security-Rich Dataflow Diagrams for Microservice Applications written in Java " [ Simon Schneider , Riccardo Scandariato ] . Each folder in the [ ` dataset ` ] ( dataset / ) directory contains one DFD in a [ CodeableModels ] ( https : / / github . com / uzdun / CodeableModels ) - format that can be executed to generate PNG , SVG and TXT files for the DFD . Each model refers to stereotypes and metaclasses from the [ metamodel ] ( microservice_dfds_metamodel . py ) which needs to be imported . This repository already contains rendered versions for each model , thus setup and rendering is only necessary once changes to the models are made .
2022-11-22 16:26:31 +01:00
## Models
2022-11-22 16:27:23 +01:00
< div class = " datatable-begin " > < / div >
2022-11-22 16:26:31 +01:00
Name | Source | LoC | Stars | Forks | DFD Items | Technologies
- - | - - | - - | - - | - - | - - | - -
2022-11-22 16:30:07 +01:00
{ chr ( 10 ) . join ( f " [ { info [ ' slug ' ] } ]( { model_id } .html) | [GitHub](https://github.com/ { info [ ' slug ' ] } ) | { info [ ' l ' ] } | { info [ ' stars ' ] } | { info [ ' forks ' ] } | { info [ ' t ' ] } | { len ( info [ ' tech ' ] ) } " for model_id , info in dataset . items ( ) ) }
2022-11-22 16:27:23 +01:00
< div class = " datatable-end " > < / div >
## DFD Items
Do culpa deserunt est excepteur amet . Non pariatur ea elit ad eiusmod veniam exercitation nulla . Commodo do adipisicing amet et . Voluptate laboris commodo dolor eu mollit ipsum . Amet reprehenderit velit eu culpa amet exercitation . Elit esse ullamco duis mollit quis . Eiusmod qui reprehenderit sunt cupidatat Lorem anim occaecat enim sint eiusmod tempor .
## Use-Cases
Veniam culpa nostrud id laborum deserunt consectetur consectetur voluptate . Sint aute cupidatat velit irure elit laboris anim labore esse labore . Quis ullamco ut consequat amet . Enim sit laboris deserunt veniam duis aliqua irure proident .
2022-11-22 16:26:31 +01:00
""" )
2022-11-22 16:30:07 +01:00
def write_tag_readme ( dataset : dict [ str , Any ] ) :
tag_dir = output_path / ' tags '
known_tech = set ( tech for model in dataset . values ( ) for tech in model [ ' tech ' ] )
print ( f " Writing tag data file " )
with open ( ' _data/tags.yml ' , ' r+ ' ) as f :
tags = yaml . safe_load ( f )
tags [ ' allowed-tags ' ] = list ( sorted ( set ( itertools . chain ( tags [ ' allowed-tags ' ] , ( get_tag_slug ( tech ) for tech in known_tech ) ) ) ) )
f . seek ( 0 )
yaml . dump ( tags , f )
f . truncate ( )
for tech in known_tech :
slug = get_tag_slug ( tech )
info_file = tag_dir / f ' tag_ { slug } .md '
print ( f " Writing tag file for { tech } " )
with open ( info_file , ' w ' , encoding = " utf-8 " ) as f :
f . write ( f """ ---
title : " {tech} "
tagName : { slug }
search : exclude
permalink : tag_ { slug } . html
sidebar : datasetdoc_sidebar
folder : tags
- - -
{ { % include taglogic . html % } }
{ { % include links . html % } }
""" )
2022-11-22 16:26:31 +01:00
def main ( ) :
2022-11-22 16:30:07 +01:00
global known_tags
2022-11-22 16:26:31 +01:00
dataset = open_dataset ( )
2022-11-22 16:30:07 +01:00
write_tag_readme ( dataset )
2022-11-22 16:26:31 +01:00
write_root_readme ( dataset )
write_model_readmes ( dataset )
2022-11-22 16:30:07 +01:00
if update_dataset :
save_dataset ( dataset )
2022-11-22 16:26:31 +01:00
2022-11-22 16:30:07 +01:00
yaml . dump
2022-11-22 16:26:31 +01:00
if __name__ == ' __main__ ' :
2022-11-22 16:29:30 +01:00
main ( )