From 0edcbdca10c0a9720f240caf4180de327cf5c731 Mon Sep 17 00:00:00 2001 From: Michael Chen Date: Mon, 28 Oct 2019 15:13:00 +0100 Subject: [PATCH] Initial commit --- .gitignore | 398 ++++++++++++++++++ azala.info Scraper.sln | 25 ++ azala.info Scraper/App.config | 6 + azala.info Scraper/Card.cs | 16 + azala.info Scraper/Program.cs | 174 ++++++++ azala.info Scraper/Program.cs.old | 56 +++ azala.info Scraper/Properties/AssemblyInfo.cs | 36 ++ azala.info Scraper/azala.info Scraper.csproj | 90 ++++ azala.info Scraper/packages.config | 10 + 9 files changed, 811 insertions(+) create mode 100644 .gitignore create mode 100644 azala.info Scraper.sln create mode 100644 azala.info Scraper/App.config create mode 100644 azala.info Scraper/Card.cs create mode 100644 azala.info Scraper/Program.cs create mode 100644 azala.info Scraper/Program.cs.old create mode 100644 azala.info Scraper/Properties/AssemblyInfo.cs create mode 100644 azala.info Scraper/azala.info Scraper.csproj create mode 100644 azala.info Scraper/packages.config diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8a30d25 --- /dev/null +++ b/.gitignore @@ -0,0 +1,398 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# ASP.NET Scaffolding +ScaffoldingReadMe.txt + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.tlog +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*.json +coverage*.xml +coverage*.info + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio 6 auto-generated project file (contains which files were open etc.) +*.vbp + +# Visual Studio 6 workspace and project file (working project files containing files to include in project) +*.dsw +*.dsp + +# Visual Studio 6 technical files +*.ncb +*.aps + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# Visual Studio History (VSHistory) files +.vshistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +# Fody - auto-generated XML schema +FodyWeavers.xsd + +# VS Code files for those working on multiple tools +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ + +# Windows Installer files from build outputs +*.cab +*.msi +*.msix +*.msm +*.msp + +# JetBrains Rider +*.sln.iml diff --git a/azala.info Scraper.sln b/azala.info Scraper.sln new file mode 100644 index 0000000..acaae46 --- /dev/null +++ b/azala.info Scraper.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29418.71 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "azala.info Scraper", "azala.info Scraper\azala.info Scraper.csproj", "{886207E9-C764-47F1-A5E5-1556A9E28648}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {886207E9-C764-47F1-A5E5-1556A9E28648}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {886207E9-C764-47F1-A5E5-1556A9E28648}.Debug|Any CPU.Build.0 = Debug|Any CPU + {886207E9-C764-47F1-A5E5-1556A9E28648}.Release|Any CPU.ActiveCfg = Release|Any CPU + {886207E9-C764-47F1-A5E5-1556A9E28648}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {B230E700-F905-4365-9568-773D9A713941} + EndGlobalSection +EndGlobal diff --git a/azala.info Scraper/App.config b/azala.info Scraper/App.config new file mode 100644 index 0000000..56efbc7 --- /dev/null +++ b/azala.info Scraper/App.config @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/azala.info Scraper/Card.cs b/azala.info Scraper/Card.cs new file mode 100644 index 0000000..11a5bf2 --- /dev/null +++ b/azala.info Scraper/Card.cs @@ -0,0 +1,16 @@ +using Newtonsoft.Json; + +namespace azala.info_Scraper { + internal class Card { + [JsonProperty(PropertyName = "id")] + public long ID { get; set; } + [JsonProperty(PropertyName = "card_text")] + public string Text { get; set; } + [JsonProperty(PropertyName = "num_blanks")] + public int Blanks { get; set; } + [JsonProperty(PropertyName = "o")] + public int Order { get; set; } + [JsonProperty(PropertyName = "deck_id")] + public int DeckID { get; internal set; } + } +} \ No newline at end of file diff --git a/azala.info Scraper/Program.cs b/azala.info Scraper/Program.cs new file mode 100644 index 0000000..f1a1c4b --- /dev/null +++ b/azala.info Scraper/Program.cs @@ -0,0 +1,174 @@ +using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using MySql.Data; +using MySql.Data.MySqlClient; +using System.Data; +using System.Text.RegularExpressions; +using System.Threading; + +namespace azala.info_Scraper { + public class Program { + private readonly MySqlConnection connection; + + public Program() { + connection = new MySqlConnection($"Server={"localhost"};Database={"cardsagainsthumanity"};Uid={"dbuser"};Pwd={"DSKJjojofsa9%=(!=i2100"};"); + connection.StateChange += Connection_StateChange; + errorstream = new StreamWriter("errors.txt", true); + } + + private void Connection_StateChange(object sender, StateChangeEventArgs e) { + Console.WriteLine("MySQL connection is now {0}!", e.CurrentState); + } + + public async Task OpenConnection() { + Console.WriteLine("Connecting to MySQL Server..."); + await connection.OpenAsync(); + } + ~Program() { + connection.Close(); + } + + public static async Task Main() { + var program = new Program(); + var openTask = program.OpenConnection(); + Console.WriteLine("Give a directory:"); + var path = XConsole.ReadDirectory(); + try { + await openTask; + program.Run(path); +#pragma warning disable CA1031 // Do not catch general exception types + } catch (Exception e) { + Console.WriteLine("Uncaught ERROR: {0}", e); + } +#pragma warning restore CA1031 // Do not catch general exception types + Console.WriteLine("Program end..."); + Console.ReadLine(); + } + + private static readonly string[] searchPatterns = { "*.que", "*.ans" }; + private readonly StreamWriter errorstream; + private string processeddirectory; + + private void Run(string path) { + processeddirectory = Path.Combine(path, "processed"); + if (connection.State != ConnectionState.Open) + throw new Exception("Connection not opened!"); + Console.WriteLine("Creating tables..."); + try { + CreateTables(); + } catch (MySqlException e) { + Console.WriteLine("ERROR: Creating tables: {0}", e.Message); + throw; + } + Console.WriteLine("Tables created!"); + while (true) { + foreach (var file in XDirectory.GetFiles(path, searchPatterns, SearchOption.TopDirectoryOnly)) { + try { + ProcessFile(file); + Console.WriteLine(); + } catch (MySqlException e) { + Console.WriteLine("MySQL ERROR: Processing file failed: {0}", e.Message); + } catch (ArgumentException e) { + Console.WriteLine("Argument exception: {0}", e.Message); + } + } + Console.WriteLine("Waiting 1s.."); + Thread.Sleep(1000); + } + } + + private void CreateTables() { + CreateCardsTable(); + } + + private void CreateCardsTable() { + var command = connection.CreateCommand(); + command.CommandText = "CREATE TABLE IF NOT EXISTS cards(id INTEGER NOT NULL PRIMARY KEY,deck_id INTEGER NOT NULL,card_text VARCHAR(2048) NOT NULL,num_blanks INTEGER NOT NULL,o INTEGER NOT NULL);"; + if (command.ExecuteNonQuery() == 1) + Console.WriteLine("Table created!"); + else + Console.WriteLine("Table not changed!"); + } + + private void ProcessFile(string file) { + var filename = Path.GetFileName(file); + Console.WriteLine("Processing file '{0}'", filename); + var file_id = Path.GetFileNameWithoutExtension(filename); + + if (!Directory.Exists(processeddirectory)) + Directory.CreateDirectory(processeddirectory); + + Console.WriteLine("Id is {0}", file_id); + + if (int.TryParse(file_id, out var deck_id)) { + var contents = File.ReadAllText(file); + var cardlist = JsonConvert.DeserializeObject>(contents); + var insertedIds = new HashSet(); + + foreach (var card in cardlist) { + if (insertedIds.Contains(card.ID)) { + Console.WriteLine("Card with id '{0}' already inserted! Skipping...", card.ID); + continue; + } + card.DeckID = deck_id; + AddCard(card); + insertedIds.Add(card.ID); + } + Console.WriteLine("Sucessfully inserted {0}/{1} new cards!", insertedIds.Count, cardlist.Count); + File.Move(file, Path.Combine(processeddirectory, filename)); + } else { + throw new ArgumentException($"File '{filename}' with non-integer ID!"); + } + } + + private void AddCard(Card card) { + using var command = connection.CreateCommand(); + command.CommandText = "INSERT IGNORE INTO cards(id,deck_id,card_text,num_blanks,o) VALUES (@id,@deckid,@text,@blanks,@order);"; + command.Parameters.AddWithValue("@deckid", card.DeckID); + command.Parameters.AddWithValue("@id", card.ID); + command.Parameters.AddWithValue("@text", card.Text); + command.Parameters.AddWithValue("@blanks", card.Blanks); + command.Parameters.AddWithValue("@order", card.Order); + var result = command.ExecuteNonQuery(); + if (result != 1) { + Console.WriteLine("ERROR: {0} lines affected!", result); + errorstream.WriteLine("[{0}] Insertion: {1} rows affected!\n{2}", DateTime.Now.ToString(), result, JsonConvert.SerializeObject(card)); + } + } + } + + internal class XConsole { + internal static string ReadDirectory() { + while (true) { + var input = Console.ReadLine(); + if (Directory.Exists(input)) + return input; + Console.WriteLine("Directory doesn't exist!"); + } + } + } + public static class XDirectory { // Regex version + public static IEnumerable GetFiles(string path, + string searchPatternExpression = "", + SearchOption searchOption = SearchOption.TopDirectoryOnly) { + var reSearchPattern = new Regex(searchPatternExpression, RegexOptions.IgnoreCase); + return Directory.EnumerateFiles(path, "*", searchOption) + .Where(file => + reSearchPattern.IsMatch(Path.GetExtension(file))); + } + + // Takes same patterns, and executes in parallel + public static IEnumerable GetFiles(string path, + string[] searchPatterns, + SearchOption searchOption = SearchOption.TopDirectoryOnly) { + return searchPatterns.AsParallel() + .SelectMany(searchPattern => + Directory.EnumerateFiles(path, searchPattern, searchOption)); + } + } +} diff --git a/azala.info Scraper/Program.cs.old b/azala.info Scraper/Program.cs.old new file mode 100644 index 0000000..0718e83 --- /dev/null +++ b/azala.info Scraper/Program.cs.old @@ -0,0 +1,56 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net.WebSockets; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Websocket.Client; + +namespace azala.info_Scraper { + class Program { + private const string AZALA_INFO_URL = "wss://azala.info/main"; + private readonly WebsocketClient socket; + + public Program(string url) { + Console.WriteLine("Connecting to {0}", url); + var uri = new Uri(url); + + socket = new WebsocketClient(uri) { + ReconnectTimeoutMs = (int)TimeSpan.FromSeconds(30).TotalMilliseconds + }; + socket.ReconnectionHappened.Subscribe(type => + Console.WriteLine($"Reconnection happened, type: {type}")); + socket.MessageReceived.Subscribe(MessageReceived); + } + + private void MessageReceived(ResponseMessage msg) { + if (msg.MessageType == WebSocketMessageType.Text) { + TextMessage(msg.Text); + } + } + + private void TextMessage(string text) { + try { + + } catch (Exception e) { + Console.WriteLine("Cannot parse message (JSON expected): {0}",e); + } + } + + static async Task Main() { + try { + await new Program(AZALA_INFO_URL).Run(); +#pragma warning disable CA1031 // Do not catch general exception types + } catch (Exception e) { + Console.WriteLine("Uncaught error:\n{0}", e); + } +#pragma warning restore CA1031 // Do not catch general exception types + } + + private async Task Run() { + await socket.Start(); + Console.ReadLine(); + } + } +} diff --git a/azala.info Scraper/Properties/AssemblyInfo.cs b/azala.info Scraper/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..4e824be --- /dev/null +++ b/azala.info Scraper/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// Allgemeine Informationen über eine Assembly werden über die folgenden +// Attribute gesteuert. Ändern Sie diese Attributwerte, um die Informationen zu ändern, +// die einer Assembly zugeordnet sind. +[assembly: AssemblyTitle("azala.info Scraper")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("azala.info Scraper")] +[assembly: AssemblyCopyright("Copyright © 2019")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Durch Festlegen von ComVisible auf FALSE werden die Typen in dieser Assembly +// für COM-Komponenten unsichtbar. Wenn Sie auf einen Typ in dieser Assembly von +// COM aus zugreifen müssen, sollten Sie das ComVisible-Attribut für diesen Typ auf "True" festlegen. +[assembly: ComVisible(false)] + +// Die folgende GUID bestimmt die ID der Typbibliothek, wenn dieses Projekt für COM verfügbar gemacht wird +[assembly: Guid("886207e9-c764-47f1-a5e5-1556a9e28648")] + +// Versionsinformationen für eine Assembly bestehen aus den folgenden vier Werten: +// +// Hauptversion +// Nebenversion +// Buildnummer +// Revision +// +// Sie können alle Werte angeben oder Standardwerte für die Build- und Revisionsnummern verwenden, +// indem Sie "*" wie unten gezeigt eingeben: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/azala.info Scraper/azala.info Scraper.csproj b/azala.info Scraper/azala.info Scraper.csproj new file mode 100644 index 0000000..f33347d --- /dev/null +++ b/azala.info Scraper/azala.info Scraper.csproj @@ -0,0 +1,90 @@ + + + + + Debug + AnyCPU + {886207E9-C764-47F1-A5E5-1556A9E28648} + Exe + azala.info_Scraper + azala.info Scraper + v4.7.2 + 512 + true + true + + + AnyCPU + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + 8.0 + + + AnyCPU + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + 8.0 + + + + ..\packages\BouncyCastle.1.8.3.1\lib\BouncyCastle.Crypto.dll + + + ..\packages\Google.Protobuf.3.6.1\lib\net45\Google.Protobuf.dll + + + ..\packages\MySql.Data.8.0.18\lib\net452\MySql.Data.dll + + + ..\packages\Newtonsoft.Json.12.0.2\lib\net45\Newtonsoft.Json.dll + + + ..\packages\SSH.NET.2016.1.0\lib\net40\Renci.SshNet.dll + + + + + + + + + + + + ..\packages\System.Reactive.4.0.0\lib\net46\System.Reactive.dll + + + + + + + + + + + + ..\packages\Websocket.Client.3.2.56\lib\netstandard2.0\Websocket.Client.dll + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/azala.info Scraper/packages.config b/azala.info Scraper/packages.config new file mode 100644 index 0000000..f1ae3bf --- /dev/null +++ b/azala.info Scraper/packages.config @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file