Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

patch: optimize selects by extracting exclusive branches #205

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

Andersama
Copy link
Contributor

Should supersede #158, actually extracts mutually exclusive paths.

EG: in lexer example, it'll recognize that "([a-z]+)|([0-9]+)" has mutually exclusive paths and will split them apart from one another based on the first character. In theory should reduce runtimes, needs testing.

@Andersama Andersama force-pushed the exclusive-set branch 2 times, most recently from 6586a49 to 8060195 Compare July 18, 2021 05:06
@Andersama
Copy link
Contributor Author

Andersama commented Jul 20, 2021

@hanickadot have you experimented with creating a custom vtable for the select expression?

Eg: something like

namespace detail {
	template<typename AlphabetType, size_t N, typename T>
	constexpr bool overwrite_alphabet(std::array<AlphabetType, N> &table, T atom, uint8_t val) {
		bool has_collision = false;
        for (size_t i = 0; i < table.size(); i++) {
            //check if we have a collision with the existing table
            has_collision = (table[i] != 0 && (T::match_char(i)));
			table[i] = T::match_char(i) ? val : table[i];
		}
        return has_collision;
	}
    template<typename AlphabetType, size_t N, typename... Ts>
	constexpr bool overwrite_alphabet(std::array<AlphabetType, N> &table, ctll::list<Ts...> atom, uint8_t val) {
		bool has_collision = false;
        for (size_t i = 0; i < table.size(); i++) {
            //check if we have a collision with the existing table
            has_collision = (table[i] != 0 && ((Ts::match_char(i)) || ...));
			table[i] = ((Ts::match_char(i)) || ...) ? val : table[i];
		}
        return has_collision;
	}

	template<typename AlphabetType, size_t N, typename... Ts>
	constexpr auto write_vtable_cases(ctll::list<Ts...> atoms) {
		std::array<AlphabetType, N> table{};

		int dummy;
        bool has_collision = false;
		size_t idx = sizeof...(Ts); 
        //see foonathan's nifty fold expressions
		//reverse order of overwrite_alphabet, make sure indexs count down*
		(dummy = ... = ((has_collision |= overwrite_alphabet(table, Ts{}, idx--)), 0));

		size_t count_nonzero = 0;
		for (size_t i = 0; i < table.size(); i++) {
			count_nonzero += table[i] != 0;
            //now we shift all the indexs over by one and make room for fail state
			table[i] = table[i] != 0 ? table[i] - 1 : sizeof...(Ts); //last index is reject state
		}

		return std::make_tuple(count_nonzero, has_collision, table);
	}

    template<typename ListAst, typename R, typename Iterator, typename EndIterator>
    constexpr auto evaluate_wrapped(const Iterator begin, Iterator current, const EndIterator last, const flags & f, R captures) {
        return evaluate(begin, current, last, f, captures, ListAst{});
    }
}

// matching select in patterns
template <typename R, typename Iterator, typename EndIterator, typename HeadOptions, typename... TailOptions, typename... Tail> 
constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, const EndIterator last, const flags & f, R captures, ctll::list<select<HeadOptions, TailOptions...>, Tail...>) noexcept {
	if constexpr (sizeof...(TailOptions) > 0 && is_random_accessible(typename std::iterator_traits<Iterator>::iterator_category{})) {
		constexpr auto vtable_tuple = detail::write_vtable_cases<uint8_t,256>(ctll::list<decltype(calculate_first(sequence<HeadOptions, Tail...>{})), decltype(calculate_first(sequence<TailOptions, Tail...>{}))...>{});
        constexpr auto vtable_cases = std::get<2>(vtable_tuple);
        constexpr auto has_collisions = std::get<1>(vtable_tuple);

		constexpr std::array<R(*)(const Iterator, Iterator, const EndIterator, const flags&, R), sizeof...(TailOptions) + 2> vtable = {
            detail::evaluate_wrapped<sequence<HeadOptions, Tail...>, R, Iterator, EndIterator>,
            detail::evaluate_wrapped<sequence<TailOptions, Tail...>, R, Iterator, EndIterator>...,
            detail::evaluate_wrapped<sequence<reject>, R, Iterator, EndIterator>
		};
		uint8_t case_num = current != last ? vtable_cases[*current] : (vtable.size()-1);
        if constexpr (has_collisions) {
            for (size_t i = case_num; i < vtable.size(); i++) {
                if (auto r = vtable[i](begin, current, last, f, captures)) {
                    return r;
                }
            }
        } else {
            return vtable[case_num](begin, current, last, f, captures);
        }
		return not_matched;
	} else {
		if (auto r = evaluate(begin, current, last, f, captures, ctll::list<HeadOptions, Tail...>())) {
			return r;
		} else {
			return evaluate(begin, current, last, f, captures, ctll::list<select<TailOptions...>, Tail...>());
		}
	}
}

So far as I can tell, with larger select expresions this cuts down on compile time, msvc performs better when the character table is made static (compile error with clang), but clang appears to do far better.

For example in MSVC I can compile

static constexpr auto real_lexer_pattern = ctll::fixed_string{
    "(\\s++)|(//[^\r\n]*+)|(/[*](?:[^*]++|[*][^\\x2F])+[*]/)|"
    "([a-zA-Z_][a-zA-Z0-9]*+)|"
    "([0-9]+[eE][\\+\\-]?[0-9]+(?:[fFlL]?))|"
    "([0-9]*[.][0-9]+(?:[eE][\\+\\-]?[0-9]+)?(?:[fFlL]?))|"
    "([0-9]+[.][0-9]*(?:[eE][\\+\\-]?[0-9]+)?(?:[fFlL]?))|"

    "(0[xX][0-9a-fA-F]+[pP][\\+\\-]?[0-9]+(?:[fFlL]?))|"
    "(0[xX][0-9a-fA-F]*[.][0-9a-fA-F]+(?:[pP][\\+\\-]?[0-9]+)?(?:[fFlL]?))|"
    "(0[xX][0-9a-fA-F]+[.][0-9a-fA-F]*(?:[pP][\\+\\-]?[0-9]+)?(?:[fFlL]?))|"

    "(0[xX][0-9a-fA-F]+)|"
    "(0[0-7]+)|"
    "(0[bB][0-1]+)|"
    "([0-9]++)|" 

    "(\"(?:[^\\\\\"]+|\\\\[\\s\\S])*+\")|"
    "('(?:[^\\\\']+|\\\\[\\s\\S])*+')|"
    "(!=)|(!)|"
    "(#)|($)|"
    "(%=)|(%)|"
    "(&&=)|(&&)|(&=)|(&)|"
    "([(])|([)])|"
    "([*]=)|([*])|"
    "([+][+])|([+]=)|([+])|"
    "[,]|"
    "(--)|(-=)|(->)|(-)|"
    "([.][.][.])|([.][.])|([.])|"
    "(/=)|(/)|"
    "(::)|(:=)|(:)|"
    "(;)|"
    "(<<=)|(<<)|(<=>)|(<=)|(<>)|(<)|"
    "(>>=)|(>=)|(>)|"
    "(==)|(=)|"
    "(>=)|(>>=)|(>>)|(>)|"
    "([?])|([@])|(\\[)|"
    "(\\\\)|"
    "(\\])|"
    "(^=)|(^)|"
    "([`])|([{])|"
    "([|]=)|([|][|]=)|([|][|])|([|])|"
    "([}])|"
    "(~=)|(~)"
};

std::optional<lex_item> real_lexer(std::string_view v) noexcept {
    auto m = ctre::starts_with<real_lexer_pattern>(v);
    if (m) {
        if (m.get<1>()) {
            return lex_item{ type::space, m.view() };
        }
        else if (m.get<2>()) {
            return lex_item{ type::space, m.view() };
        }
        else if (m.get<3>()) {
            return lex_item{ type::space, m.view() };
        }
        else if (m.get<4>()) {
            return lex_item{ type::identifier, m.view() };
        }
        else if (m.get<5>()) {
            return lex_item{ type::flt, m.view() };
        }
        else if (m.get<6>()) {
            return lex_item{ type::flt, m.view() };
        }
        else if (m.get<7>()) {
            return lex_item{ type::flt, m.view() };
        }
        else if (m.get<8>()) {
            return lex_item{ type::flt, m.view() };
        }
        else if (m.get<9>()) {
            return lex_item{ type::flt, m.view() };
        }
        else if (m.get<10>()) {
            return lex_item{ type::flt, m.view() };
        }
        else if (m.get<11>()) {
            return lex_item{ type::number, m.view() };
        }
        else if (m.get<12>()) {
            return lex_item{ type::number, m.view() };
        }
        else if (m.get<13>()) {
            return lex_item{ type::number, m.view() };
        }
        else if (m.get<14>()) {
            return lex_item{ type::number, m.view() };
        }
        else if (m.get<15>()) {
            return lex_item{ type::str, m.view() };
        }
        else if (m.get<16>()) {
            return lex_item{ type::chr, m.view() };
        } else {
            return lex_item{ type::ops, m.view() };
        }
    }
    return std::nullopt;
}

in couple of seconds with the vtable type approach.

I'm currently upwards of half an hour waiting for the original to compile, not sure if it'll even finish.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

1 participant