bayernfahrplan/source/fahrplanparser.d

161 lines
3.9 KiB
D
Raw Normal View History

module fahrplanparser;
2016-12-29 18:43:54 +01:00
import std.algorithm : filter, map;
2016-12-29 18:39:10 +01:00
import std.array : empty, front, replace;
import std.conv : to;
2016-12-29 18:43:54 +01:00
import std.datetime : dur, TimeOfDay;
import std.regex : ctRegex, matchAll;
import std.string : strip;
import std.typecons : tuple, Tuple;
import kxml.xml : readDocument, XmlNode;
import substitution;
private:
enum ScheduleHeadings
{
date,
departure,
line,
direction,
platform
}
public:
2016-12-29 18:39:10 +01:00
auto parsedFahrplan(in string data)
{
// dfmt off
2016-12-29 18:39:10 +01:00
return data.readDocument
.parseXPath(`//table[@id="departureMonitor"]/tbody/tr`)[1 .. $]
.getRowContents
.filter!(row => !row.empty)
.map!(a => ["departure" : a[0].parseTime[0].to!string[0 .. $ - 3],
"delay" : a[0].parseTime[1].total!"minutes".to!string,
"line" : a[1],
"direction" : a[2].substitute]);
// dfmt on
}
private:
class BadTimeInputException : Exception
{
this(string msg) @safe pure nothrow @nogc
{
super(msg);
}
this() @safe pure nothrow @nogc
{
this("");
}
}
auto parseTime(in string input) @safe
{
auto matches = matchAll(input, ctRegex!(`(?P<hours>\d{1,2}):(?P<minutes>\d{2})`));
if (matches.empty)
throw new BadTimeInputException();
auto actualTime = TimeOfDay(matches.front["hours"].to!int, matches.front["minutes"].to!int);
matches.popFront;
if (!matches.empty)
{
auto expectedTime = TimeOfDay(matches.front["hours"].to!int,
matches.front["minutes"].to!int);
auto timeDiff = actualTime - expectedTime;
if (timeDiff < dur!"minutes"(0))
timeDiff = dur!"hours"(24) + timeDiff;
return tuple(expectedTime, timeDiff);
}
return tuple(actualTime, dur!"minutes"(0));
}
@safe unittest
{
import std.exception : assertThrown;
assertThrown(parseTime(""));
assertThrown(parseTime("lkeqf"));
assertThrown(parseTime(":"));
assertThrown(parseTime("00:0"));
assert("00:00".parseTime == tuple(TimeOfDay(0, 0), dur!"minutes"(0)));
assert("0:00".parseTime == tuple(TimeOfDay(0, 0), dur!"minutes"(0)));
assert("00:00 00:00".parseTime == tuple(TimeOfDay(0, 0), dur!"minutes"(0)));
assert("00:00 00:00 12:00".parseTime == tuple(TimeOfDay(0, 0), dur!"minutes"(0)));
assert("12:3412:34".parseTime == tuple(TimeOfDay(12, 34), dur!"minutes"(0)));
assert("ölqjfo12:34oieqf12:31ölqjf".parseTime == tuple(TimeOfDay(12, 31), dur!"minutes"(3)));
assert("17:53 (planmäßig 17:51 Uhr)".parseTime == tuple(TimeOfDay(17, 51), dur!"minutes"(2)));
assert("00:00 23:59".parseTime == tuple(TimeOfDay(23, 59), dur!"minutes"(1)));
}
auto getRowContents(XmlNode[] rows)
{
return rows.map!(x => getRowContent(x));
}
auto getRowContent(XmlNode row)
{
return row.parseXPath("//td")[ScheduleHeadings.departure .. ScheduleHeadings.direction + 1].map!(
cell => stripLinks(cell));
}
auto stripLinks(XmlNode cell)
{
auto links = cell.parseXPath("//a");
if (links.empty)
{
return cell.getCData;
}
else
{
return links.front.getCData.replace("...", "");
}
}
@system unittest
{
auto foo = new XmlNode("foo");
assert(foo.stripLinks == "");
auto link = new XmlNode("a");
link.setCData("test");
foo.addChild(link);
assert(foo.stripLinks == "test");
link.setCData("test2...");
assert(foo.stripLinks == "test2");
auto bar = new XmlNode("bar");
bar.setCData("test3");
assert(bar.stripLinks == "test3");
bar.addChild(link);
assert(bar.stripLinks == "test2");
auto baz = new XmlNode("baz");
auto subNode = new XmlNode("subNode");
baz.addChild(subNode);
assert(baz.stripLinks == "");
baz.addChild(link);
assert(baz.stripLinks == "test2");
baz.addCData("test4");
assert(baz.stripLinks == "test2");
baz.removeChild(link);
assert(baz.stripLinks == "test4");
}