azala.info-scraper/azala.info Scraper/Program.cs
2023-04-19 16:28:05 +02:00

175 lines
7.3 KiB
C#

using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using MySql.Data;
using MySql.Data.MySqlClient;
using System.Data;
using System.Text.RegularExpressions;
using System.Threading;
namespace azala.info_Scraper {
public class Program {
private readonly MySqlConnection connection;
public Program() {
connection = new MySqlConnection($"Server={"localhost"};Database={"cardsagainsthumanity"};Uid={"dbuser"};Pwd={"DSKJjojofsa9%=(!=i2100"};");
connection.StateChange += Connection_StateChange;
errorstream = new StreamWriter("errors.txt", true);
}
private void Connection_StateChange(object sender, StateChangeEventArgs e) {
Console.WriteLine("MySQL connection is now {0}!", e.CurrentState);
}
public async Task OpenConnection() {
Console.WriteLine("Connecting to MySQL Server...");
await connection.OpenAsync();
}
~Program() {
connection.Close();
}
public static async Task Main() {
var program = new Program();
var openTask = program.OpenConnection();
Console.WriteLine("Give a directory:");
var path = XConsole.ReadDirectory();
try {
await openTask;
program.Run(path);
#pragma warning disable CA1031 // Do not catch general exception types
} catch (Exception e) {
Console.WriteLine("Uncaught ERROR: {0}", e);
}
#pragma warning restore CA1031 // Do not catch general exception types
Console.WriteLine("Program end...");
Console.ReadLine();
}
private static readonly string[] searchPatterns = { "*.que", "*.ans" };
private readonly StreamWriter errorstream;
private string processeddirectory;
private void Run(string path) {
processeddirectory = Path.Combine(path, "processed");
if (connection.State != ConnectionState.Open)
throw new Exception("Connection not opened!");
Console.WriteLine("Creating tables...");
try {
CreateTables();
} catch (MySqlException e) {
Console.WriteLine("ERROR: Creating tables: {0}", e.Message);
throw;
}
Console.WriteLine("Tables created!");
while (true) {
foreach (var file in XDirectory.GetFiles(path, searchPatterns, SearchOption.TopDirectoryOnly)) {
try {
ProcessFile(file);
Console.WriteLine();
} catch (MySqlException e) {
Console.WriteLine("MySQL ERROR: Processing file failed: {0}", e.Message);
} catch (ArgumentException e) {
Console.WriteLine("Argument exception: {0}", e.Message);
}
}
Console.WriteLine("Waiting 1s..");
Thread.Sleep(1000);
}
}
private void CreateTables() {
CreateCardsTable();
}
private void CreateCardsTable() {
var command = connection.CreateCommand();
command.CommandText = "CREATE TABLE IF NOT EXISTS cards(id INTEGER NOT NULL PRIMARY KEY,deck_id INTEGER NOT NULL,card_text VARCHAR(2048) NOT NULL,num_blanks INTEGER NOT NULL,o INTEGER NOT NULL);";
if (command.ExecuteNonQuery() == 1)
Console.WriteLine("Table created!");
else
Console.WriteLine("Table not changed!");
}
private void ProcessFile(string file) {
var filename = Path.GetFileName(file);
Console.WriteLine("Processing file '{0}'", filename);
var file_id = Path.GetFileNameWithoutExtension(filename);
if (!Directory.Exists(processeddirectory))
Directory.CreateDirectory(processeddirectory);
Console.WriteLine("Id is {0}", file_id);
if (int.TryParse(file_id, out var deck_id)) {
var contents = File.ReadAllText(file);
var cardlist = JsonConvert.DeserializeObject<List<Card>>(contents);
var insertedIds = new HashSet<long>();
foreach (var card in cardlist) {
if (insertedIds.Contains(card.ID)) {
Console.WriteLine("Card with id '{0}' already inserted! Skipping...", card.ID);
continue;
}
card.DeckID = deck_id;
AddCard(card);
insertedIds.Add(card.ID);
}
Console.WriteLine("Sucessfully inserted {0}/{1} new cards!", insertedIds.Count, cardlist.Count);
File.Move(file, Path.Combine(processeddirectory, filename));
} else {
throw new ArgumentException($"File '{filename}' with non-integer ID!");
}
}
private void AddCard(Card card) {
using var command = connection.CreateCommand();
command.CommandText = "INSERT IGNORE INTO cards(id,deck_id,card_text,num_blanks,o) VALUES (@id,@deckid,@text,@blanks,@order);";
command.Parameters.AddWithValue("@deckid", card.DeckID);
command.Parameters.AddWithValue("@id", card.ID);
command.Parameters.AddWithValue("@text", card.Text);
command.Parameters.AddWithValue("@blanks", card.Blanks);
command.Parameters.AddWithValue("@order", card.Order);
var result = command.ExecuteNonQuery();
if (result != 1) {
Console.WriteLine("ERROR: {0} lines affected!", result);
errorstream.WriteLine("[{0}] Insertion: {1} rows affected!\n{2}", DateTime.Now.ToString(), result, JsonConvert.SerializeObject(card));
}
}
}
internal class XConsole {
internal static string ReadDirectory() {
while (true) {
var input = Console.ReadLine();
if (Directory.Exists(input))
return input;
Console.WriteLine("Directory doesn't exist!");
}
}
}
public static class XDirectory { // Regex version
public static IEnumerable<string> GetFiles(string path,
string searchPatternExpression = "",
SearchOption searchOption = SearchOption.TopDirectoryOnly) {
var reSearchPattern = new Regex(searchPatternExpression, RegexOptions.IgnoreCase);
return Directory.EnumerateFiles(path, "*", searchOption)
.Where(file =>
reSearchPattern.IsMatch(Path.GetExtension(file)));
}
// Takes same patterns, and executes in parallel
public static IEnumerable<string> GetFiles(string path,
string[] searchPatterns,
SearchOption searchOption = SearchOption.TopDirectoryOnly) {
return searchPatterns.AsParallel()
.SelectMany(searchPattern =>
Directory.EnumerateFiles(path, searchPattern, searchOption));
}
}
}