#!/usr/bin/python3

import sys
import re

filename="p2_12.txt"
if len(sys.argv) > 1:
    filename=sys.argv[1]

in_rules=True
rules={}
data=[]

for line in open(filename, "r"):
    line = line.rstrip()
    if line == "":
        in_rules=False
        continue

    if in_rules:
        (rule_number,rule) = line.split(":")
        rule_number=int(rule_number)
        rule=rule.strip()
        rules[rule_number] = [s.strip() for s in rule.split("|")]
    else:
        data.append(line)

def get_rule_regex(rulenumber):
    regex=""
    first=False
    for rule in rules[rulenumber]:
        if rule[0] != '"':
            # not a character, so we're going to go down the wishing well
            for number in rule.split(" "):
                number=int(number)
                regex+=get_rule_regex(number)
            regex+="|"
        else:
            regex+=rule[1:-1]
    if regex[-1] == "|":
        regex=regex[0:-1]
    if len(rules[rulenumber]) > 1 or len(rules[rulenumber][0]) > 3:
        regex="("+regex+")"

    return regex

# because we're doing insane things, lets build the regex for rules[8] first, which is literally 42 multiple times
#rules[8]=["42", "42 8"]
#rules[11]=["42 31", "42 11 31"]

rules[8]=['"((' + get_rule_regex(42) + ')(?P<rule8>(' + get_rule_regex(42) + ')*))"']
rules[11]=['"((' + get_rule_regex(42) + '))(?P<rule11>.*)((' + get_rule_regex(31) + '))"']

# ok - so all the rules are in, lets build a huge regex that gets all the rules
pattern="^"+get_rule_regex(0)+"$"

match_count=0

for line in data:
    matches = re.match(pattern, line)
    if matches:
        if matches.group('rule11'):
            part=matches.group('rule8') + matches.group('rule11')
            rule_31_matches=0
            rule_42_matches=0
            while len(part) > 0:
                # remove the end, and keep count, we need at least that many rule 42s to
                # be removed from the front
                remove_pattern=get_rule_regex(31)+"$"
                new_part=re.sub(remove_pattern, '', part)
                if len(new_part) == len(part):
                    # try removing a rule 42 from the start
                    remove_pattern="^" + get_rule_regex(42)
                    new_part=re.sub(remove_pattern, '', part)
                    if len(new_part) == len(part):
                        # if nothing got removed, and we're not 0 length, we're not a match
                        break
                    else:
                        part=new_part
                        rule_42_matches+=1
                else:
                    rule_31_matches+=1
                    part=new_part
            else:
                if rule_42_matches >= rule_31_matches:
                    match_count+=1
        else:
            match_count+=1

print("There were {} matches in total".format(match_count))
