import re


class WordIterator:
    """
    A basic wrapper for sequential reading of words from file
    """

    def __init__(self, filename):
        self.filename = filename
        self.word_queue = []
        self.line_payload = 50  # how many lines will be loaded into the queue at one time
        self.__lines_count = sum(1 for line in open(filename))
        self.__load_line = 1  # index of first line which haven't been already loaded into the queue
        self.__separators = []

    def read(self):
        """
        Returns first word from file that wasn't already read by this particular object.
        Return value format: (word, separator)
        When all words had been read, it returns None
        """
        if len(self.word_queue) == 0:
            self.__load_payload()
        if len(self.word_queue) == 0:
            return None
        else:
            return self.word_queue.pop(0)

    def push_back(self, word):
        """
        Pushes word from argument into the first position of the queue
        In case we already read a word but we want to re-read it again
        """
        self.word_queue = [word] + self.word_queue

    def add_separator(self, sep):
        """
        All words will be separated with sep separator
        """
        if sep in self.__separators:
            return
        self.__separators.append(sep)
        new_queue = []
        regex_arg = re.compile('(' + sep + ')')
        for word, sep in self.word_queue:
            new_word = re.sub(regex_arg, r' \1 ', word)
            new_words = new_word.split(' ')
            new_queue += self.__make_pairs(word + sep, new_words)
        self.word_queue = new_queue

    def __load_payload(self):
        """
        Loads words from input file into the queue
        Format of words: (word, separator)
        """
        index = 0
        with open(self.filename, 'r') as file:
            for line in file:
                index += 1
                if index < self.__load_line:
                    continue
                elif index < self.__load_line + self.line_payload:
                    self.word_queue += self.__parse_words(line)
                    if index >= self.__lines_count:
                        index += 1
                else:
                    break
        self.__load_line = index

    def __parse_words(self, line):
        """
        Parses a line passed by argument using regular expressions.
        It separates each word on the line and stores it into list.
        returns list of tuples (word, separator between word and next word)
        """
        orig_line = line
        if line == "\n" or line == "\r\n":
            return self.__make_pairs(orig_line, [line])
        # All non escape occurrences of some characters are seperated to be a single 'word'
        line = re.sub(r'(?<!\\)(?:\\\\)*([{}\[\]()%])', r' \1 ', line)
        # $$ and $ are separated from the text to be a single 'word'
        line = re.sub(r'(?<!\\)(?:\\\\)*((\$\$)|(\$))', r' \1 ', line)
        # All escaped alphabetic characters are separated from the previous word
        line = re.sub(r'(?<!\\)(\\\\)*(\\)([A-Za-z])', r'\1 \2\3', line)
        # Non escaped character ~ is replaced with space
        line = re.sub(r'(?<!\\)(?:\\\\)*(~)', r' \1 ', line)
        # Iterate through every separator and put spaces around them
        for sep in self.__separators:
            line = re.sub('(' + sep + ')', r' \1 ', line)
        words_on_line = re.split(r'\s+', line)
        # At the end of every nonempty line newline character is placed
        words_on_line.append("\n")
        return self.__make_pairs(orig_line, words_on_line)

    @staticmethod
    def __make_pairs(orig_line, words):
        """
        Generates list of tuples (word, separator between the word and the next word in list)
        """
        pairs = []
        read_index = 0
        for i in range(len(words)):
            if i + 1 >= len(words):
                start = orig_line[read_index:].find(words[i]) + len(words[i]) + read_index
                pairs.append((words[i], orig_line[start:]))
            else:
                start = orig_line[read_index:].find(words[i]) + len(words[i]) + read_index
                end = orig_line[start:].find(words[i+1]) + start
                pairs.append((words[i], orig_line[start: end]))
                read_index = end
        return pairs