Skip to content

Commit

Permalink
first public commit
Browse files Browse the repository at this point in the history
This state was used for the presentation on 2018-09-27.
  • Loading branch information
Toni Dietze committed Sep 26, 2018
0 parents commit 31fc8c6
Show file tree
Hide file tree
Showing 52 changed files with 4,501 additions and 0 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/_output/
/frame-*
/slides-*

!/frame-*.tex
!/slides-*.bib
!/slides-*.tex
1 change: 1 addition & 0 deletions .kateconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
default-dictionary en
Binary file not shown.
81 changes: 81 additions & 0 deletions csv-07-evalb-wsj-2300-2320-len-restricted.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
iteration,Number of sentence,Number of Error sentence,Number of Skip sentence,Number of Valid sentence,Bracketing Recall,Bracketing Precision,Bracketing FMeasure,Complete match,Average crossing,No crossing,2 or less crossing,Tagging accuracy
11729,2245,0,2121,124,73.79,55.79,63.54,17.74,1.80,62.90,81.45,100.00
11730,2245,0,2121,124,73.79,55.79,63.54,17.74,1.80,62.90,81.45,100.00
11731,2245,0,2120,125,72.78,54.78,62.51,17.60,2.03,62.40,80.80,100.00
11732,2245,0,2118,127,72.49,54.73,62.37,17.32,2.09,61.42,79.53,100.00
11733,2245,0,2118,127,72.97,55.19,62.84,17.32,1.95,61.42,79.53,100.00
11734,2245,0,2116,129,72.11,54.49,62.07,17.05,2.03,60.47,79.07,100.00
11735,2245,0,2116,129,72.11,54.49,62.07,17.05,2.03,60.47,79.07,100.00
11736,2245,0,2116,129,72.11,54.49,62.07,17.05,2.03,60.47,79.07,100.00
11737,2245,0,2116,129,72.11,54.49,62.07,17.05,2.03,60.47,79.07,100.00
11738,2245,0,2116,129,72.11,54.45,62.05,17.05,2.03,60.47,79.07,100.00
11739,2245,0,30,2215,67.94,66.24,67.08,7.77,3.29,30.88,53.77,100.00
11740,2245,0,30,2215,67.95,66.25,67.09,7.77,3.29,30.88,53.77,100.00
11741,2245,0,30,2215,67.95,66.24,67.09,7.77,3.29,30.88,53.77,100.00
11742,2245,0,30,2215,67.96,66.25,67.09,7.77,3.29,30.93,53.91,100.00
11743,2245,0,30,2215,67.94,66.26,67.09,7.77,3.29,30.93,53.91,100.00
11744,2245,0,30,2215,67.92,66.24,67.07,7.77,3.29,30.84,53.91,100.00
11745,2245,0,30,2215,67.87,66.20,67.03,7.77,3.29,30.74,53.95,100.00
11746,2245,0,30,2215,67.86,66.20,67.02,7.77,3.29,30.70,53.86,100.00
11747,2245,0,28,2217,67.84,66.17,66.99,7.76,3.30,30.67,53.81,100.00
11748,2245,0,28,2217,67.83,66.16,66.98,7.76,3.30,30.67,53.81,100.00
11749,2245,0,28,2217,67.86,66.18,67.01,7.80,3.30,30.81,53.72,100.00
11750,2245,0,28,2217,67.87,66.20,67.03,7.80,3.30,30.94,53.86,100.00
11751,2245,0,28,2217,67.90,66.23,67.05,7.80,3.29,30.94,53.81,100.00
11752,2245,0,28,2217,67.92,66.26,67.08,7.80,3.29,30.94,53.81,100.00
11753,2245,0,28,2217,67.95,66.30,67.12,7.80,3.28,30.94,53.86,100.00
11754,2245,0,28,2217,67.99,66.36,67.17,7.80,3.27,30.99,53.90,100.00
11755,2245,0,28,2217,67.99,66.37,67.17,7.85,3.27,31.03,53.95,100.00
11756,2245,0,28,2217,67.98,66.36,67.16,7.80,3.27,31.03,53.99,100.00
11757,2245,0,28,2217,67.95,66.34,67.14,7.80,3.28,31.08,53.72,100.00
11758,2245,0,28,2217,67.99,66.38,67.18,7.80,3.28,31.08,53.72,100.00
11759,2245,0,28,2217,68.03,66.40,67.20,7.80,3.27,31.17,53.95,100.00
11760,2245,0,28,2217,68.00,66.38,67.18,7.80,3.27,31.17,53.86,100.00
11761,2245,0,26,2219,68.06,66.47,67.25,7.80,3.26,31.05,53.90,100.00
11762,2245,0,26,2219,68.09,66.51,67.29,7.80,3.25,31.14,53.99,100.00
11763,2245,0,26,2219,68.08,66.50,67.28,7.84,3.26,31.23,54.08,100.00
11764,2245,0,26,2219,68.09,66.57,67.32,7.89,3.24,31.46,54.03,100.00
11765,2245,0,26,2219,68.13,66.60,67.36,7.84,3.24,31.32,53.99,100.00
11766,2245,0,26,2219,68.10,66.59,67.34,7.84,3.24,31.41,53.94,100.00
11767,2245,0,26,2219,68.10,66.60,67.34,7.84,3.24,31.37,53.85,100.00
11768,2245,0,26,2219,68.07,66.61,67.33,7.84,3.25,31.19,53.99,100.00
11769,2245,0,26,2219,68.08,66.65,67.36,7.84,3.24,31.23,54.12,100.00
11770,2245,0,26,2219,68.15,66.74,67.44,8.07,3.24,31.23,54.35,100.00
11771,2245,0,26,2219,68.28,66.96,67.61,8.20,3.22,31.37,54.30,100.00
11772,2245,0,26,2219,68.19,66.94,67.56,8.29,3.24,31.28,54.08,100.00
11773,2245,0,26,2219,68.23,66.98,67.60,8.38,3.23,31.37,54.35,100.00
11774,2245,0,26,2219,68.30,67.06,67.68,8.38,3.21,31.68,54.71,100.00
11775,2245,0,26,2219,68.38,67.17,67.77,8.43,3.20,31.64,54.98,100.00
11776,2245,0,26,2219,68.42,67.22,67.81,8.38,3.20,31.50,55.16,100.00
11777,2245,0,26,2219,68.53,67.35,67.93,8.43,3.19,31.68,55.43,100.00
11778,2245,0,26,2219,68.72,67.57,68.14,8.47,3.15,31.91,55.75,100.00
11779,2245,0,26,2219,68.86,67.73,68.29,8.61,3.14,31.86,55.66,100.00
11780,2245,0,26,2219,68.77,67.68,68.22,8.70,3.14,31.59,55.61,100.00
11781,2245,0,26,2219,68.82,67.74,68.28,8.79,3.13,31.73,55.61,100.00
11782,2245,0,26,2219,68.87,67.80,68.33,8.74,3.13,31.77,55.79,100.00
11783,2245,0,26,2219,69.04,67.98,68.50,8.74,3.11,31.95,55.88,100.00
11784,2245,0,26,2219,69.04,68.00,68.51,8.74,3.10,31.82,55.93,100.00
11785,2245,0,26,2219,68.99,67.97,68.47,8.74,3.11,31.73,56.06,100.00
11786,2245,0,26,2219,69.07,68.16,68.61,8.56,3.09,31.68,56.38,100.00
11787,2245,0,26,2219,69.10,68.22,68.65,8.56,3.10,31.59,56.24,100.00
11788,2245,0,26,2219,69.12,68.26,68.69,8.52,3.09,31.73,56.51,100.00
11789,2245,0,26,2219,69.19,68.33,68.76,8.56,3.08,31.95,56.42,100.00
11790,2245,0,25,2220,69.30,68.52,68.90,8.74,3.05,32.30,56.89,100.00
11791,2245,0,25,2220,69.22,68.47,68.84,8.65,3.05,32.16,56.62,100.00
11792,2245,0,25,2220,69.41,68.84,69.12,9.05,3.01,33.11,57.21,100.00
11793,2245,0,25,2220,69.46,68.91,69.18,8.92,2.98,33.15,57.52,100.00
11794,2245,0,25,2220,69.58,69.13,69.35,9.05,2.96,33.24,57.52,100.00
11795,2245,0,25,2220,69.45,69.05,69.25,9.10,2.98,33.02,57.66,100.00
11796,2245,0,25,2220,69.52,69.28,69.40,9.23,2.95,32.93,57.97,100.00
11797,2245,0,25,2220,69.76,70.08,69.92,9.55,2.87,33.87,58.15,100.00
11798,2245,0,23,2222,69.80,70.21,70.00,9.63,2.86,33.71,58.51,100.00
11799,2245,0,23,2222,70.29,71.13,70.71,10.58,2.79,34.97,59.18,100.00
11800,2245,0,23,2222,70.35,71.21,70.78,10.53,2.77,34.97,59.41,100.00
11801,2245,0,23,2222,70.50,71.45,70.97,10.85,2.75,35.46,59.36,100.00
11802,2245,0,22,2223,70.38,71.53,70.95,10.84,2.75,35.31,59.42,100.00
11803,2245,0,22,2223,70.49,71.63,71.06,10.84,2.74,35.72,59.65,100.00
11804,2245,0,22,2223,70.21,71.49,70.85,10.71,2.77,34.55,59.15,100.00
11805,2245,0,21,2224,69.69,71.08,70.38,10.70,2.83,33.95,58.59,100.00
11806,2245,0,21,2224,69.16,70.56,69.85,10.66,2.90,33.36,57.96,100.00
11807,2245,0,18,2227,68.30,70.33,69.30,9.83,2.92,31.16,56.85,100.00
11808,2245,0,17,2228,68.12,70.43,69.25,10.01,2.95,32.59,56.69,100.00
29 changes: 29 additions & 0 deletions figure-heuristics-tikz.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
\begin{tikzpicture}[anchor=north]
\path (-1.5em, 2em) rectangle (29.5em, -10em);
\node at (0, 0) {\(\begin{aligned}
A & \uncover<2->{{} \mapsto 3}
\\ B & \uncover<2->{{} \mapsto 4}
\\ C & \uncover<2->{{} \mapsto 6}
\\ D & \uncover<2->{{} \mapsto 9}
\\ \vdots
\end{aligned}\)};
\node<2-> at (0.2em, 2em) {counts:};
\node<3->[single arrow, fill=HKS41K20, align=center] at (4em, -1em) {create \\ mergers};
\node<3-> (n2) at (10em, 0) {\(\begin{aligned}
& \text{mrg \(A\), \(B\)}
\\ & \text{mrg \(A\), \(C\)}
\\ & \text{mrg \(B\), \(C\)}
\\ & \text{mrg \(A\), \(D\)}
\\ & \text{mrg \(B\), \(D\)}
\\ & \vdots\qquad
\end{aligned}\)};
\draw<4->[rounded corners, color=HKS07K100] (n2.north west) rectangle (n2.east);
\node<5->[single arrow, fill=HKS41K20, align=center] at (15em, -1em) {saturate \\ mergers};
\node<5-> (n3) at (24em, 0) {\(\begin{alignedat}{2}
& \text{mrg \(A\), \(B\), \(E\)} && \uncover<6->{{} \mapsto 0.3}
\\ & \text{mrg \(A\), \(C\)} && \uncover<6->{{} \mapsto 0.5}
\\ & \text{mrg \(B\), \(C\), \(D\), \(F\)} && \uncover<6->{{} \mapsto 0.1}
\end{alignedat}\)};
\node<6-> at (26.5em, 2em) {likelihoods:};
\draw<7->[rounded corners, color=HKS07K100] ($(n3.west) - (0, 0.8em)$) rectangle ($(n3.east) + (0, 0.8em)$);
\end{tikzpicture}
51 changes: 51 additions & 0 deletions figure-overfitting-tikz.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
\begin{tikzpicture}
[ tri/.style =
{ inner sep=0.12em
, shape border rotate=90
, isosceles triangle
, isosceles triangle apex angle=60
, draw
}
]
\matrix[anchor=base, column sep=1em, row sep=3em] (m) {
\node[anchor = base east, align = right] {corpus: \\ {\footnotesize(training data)}\phantom{:}};
&& \node[tri, fill = CVD light green ] {B};
& \node[tri, fill = CVD light green ] {C};
&& \node[tri, fill = CVD light green ] {E};
\\ \node[anchor=base east] {$\trees{\varSigma}\colon$};
& \node[tri, fill = CVD light red] (A) {A};
& \node[tri, fill = CVD light green ] (B) {B};
& \node[tri, fill = CVD light green ] (C) {C};
& \node[tri, fill = CVD light yellow] (D) {D};
& \node[tri, fill = CVD light green ] (E) {E};
& \node[tri, fill = CVD light red ] (F) {F};
& \node[tri, fill = CVD light yellow] (G) {G};
& \node {$\dots$};
\\ \node[anchor=base east, visible=<2->] {probabilities:};
& \node<4-> (pA) {0};
& \node<2-> (pB) {0.2};
& \node<2-> (pC) {0.1};
& \node<3-> (pD) {0.1};
& \node<2-> (pE) {0.4};
& \node<4-> (pF) {0};
& \node<3-> (pG) {0.1};
& \node<2-> {$\dots$};
\\
};
\begin{scope}
[ |->
, > = Computer Modern Rightarrow
, line width = 0.4pt
, shorten < = 0.75em
, shorten > = 0.5em
, node distance = 4em
]
\draw<4-> (A) -- (pA);
\draw<2-> (B) -- (pB);
\draw<2-> (C) -- (pC);
\draw<3-> (D) -- (pD);
\draw<2-> (E) -- (pE);
\draw<4-> (F) -- (pF);
\draw<3-> (G) -- (pG);
\end{scope}
\end{tikzpicture}
Binary file added figure-telescope-reflected.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added figure-telescope-without-reflected.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added figure-telescope-without.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added figure-telescope.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
33 changes: 33 additions & 0 deletions frame-algorithm-cbsm.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
\documentclass[beamer]{standalone}
\input{preamble.tex}
\title{\jobname}
\begin{document}
\begin{standaloneframe}{\jobname}
\begin{algorithmic}[1]
\Require corpus \(c\) over \(\trees{Σ}\)
\Ensure sequence of bottom-up deterministic pta \(ℳ_0, …, ℳ_n\), \(n ∈ ℕ\)
\State \(ℳ_0 = (\mathscr{A}_0, ι_0, ρ_0) \gets \text{canonical pta of \(c\)}\)
\State \(i \gets 0\)
\While{there exists a non-trivial \(\mathscr{A}_i\)-merger}
\State
\(π \gets \Call{bestMerger}{\mathscr{A}_i, c}\)
\State
\(i \gets i + 1\)
\State
\(\mathscr{A}_i \gets π(\mathscr{A}_{i-1})\)
\State
\(ℳ_i \gets \mle[\mathscr{A}_i]{c}\)
\EndWhile
\end{algorithmic}
\begin{overprint}
\onslide<2>
\begin{center}
\emph{Note:} \(\mathcal{L}(\mathscr{A}_0) \subseteq \mathcal{L}(\mathscr{A}_1) \subseteq \mathcal{L}(\mathscr{A}_2) \subseteq \dots\)
\end{center}
\onslide<3>
\begin{block}{compare with}
\printfullcite{2001CarrascoOncinaCalera-Rubio}
\end{block}
\end{overprint}
\end{standaloneframe}
\end{document}
27 changes: 27 additions & 0 deletions frame-algorithm-split-merge-details.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
\documentclass[beamer]{standalone}
\input{preamble.tex}
\begin{document}
\begin{standaloneframe}{\jobname}
\begin{algorithmic}[1]
\Function{split}{$$}
\State $\pi \gets$ $$-splitter splitting every state $A$ in $$ into $A^1$ and $A^2$
\State \Return a proper $\pi$-split of $$
\EndFunction
\vspace{1em}
\Function{merge}{$ℳ'$}
\State $\pi \gets \text{identity mapping}$
\ForAll{states $A$ s.t.\ $A^1$, $A^2$ in $ℳ'$}
\State $\widehat{\pi} \gets \text{identity mapping}$
\State $\widehat{\pi}(A^1) \gets A$ and $\widehat{\pi}(A^2) \gets A$
\State $\lambda \gets \text{a good $\widehat{\pi}$-distributor}$
\If{$\displaystyle\frac{\lklhd{c}{\mrg_{\widehat{\pi}}^\lambda(ℳ')}}{\lklhd{c}{ℳ'}} \geq \mu$}
\label{algorithmic:lklhd-ratio}
\State $\pi(A^1) \gets A$ and $\pi(A^2) \gets A$
\EndIf
\EndFor
\State $\lambda \gets \text{a good $\pi$-distributor}$
\State \Return $\mrg_\pi^\lambda(ℳ')$
\EndFunction
\end{algorithmic}
\end{standaloneframe}
\end{document}
124 changes: 124 additions & 0 deletions frame-algorithm-split-merge.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
\documentclass[beamer]{standalone}
\input{preamble.tex}
\begin{document}
% 1 2 3 4 5 6 7
\timeline{+ - - - - - -}{tlBPRef}
\timeline{- + + + + + +}{tlPseudocode}
\timeline{+ + - + + + +}{tlFlowchart}
\timeline{- - + - - - -}{tlTheorem}
\timeline{- - - - + + -}{tlSoutSplit}
\timeline{- - - - - + -}{tlSoutEM}
\begin{standaloneframe}{\jobname}
\transfade<\tlSoutSplit,\tlSoutEM>%
\begin{overprint}
\onslide<\tlBPRef>
\begin{block}{Berkeley Parser}
\printfullcite{2006PetrovBarrettThibauxKlein}
\end{block}
\onslide<\tlPseudocode>
\begin{description}
\item[\textbf{Input:} \usebeamertemplate{itemize item}]
corpus \(c\)
\item[\usebeamertemplate{itemize item}]
pta $ℳ_0$ such that $\lklhd{c}{⟦ℳ_0⟧} > 0$
%\item[\usebeamertemplate{itemize item}]
% \(μ ∈ [0, 1]\) and \(ε ∈ {]0, 1[}\)
\item[\textbf{Output:} \usebeamertemplate{itemize item}]
sequence of pta\s
\end{description}
\begin{algorithmic}
\For{\(i ← 1, 2, \dots\)}
\State
\sout<\tlSoutSplit>{\makebox[18.5em]{%
\(ℳ'_1 ← \Call{split}{ℳ_{i-1}}\);%
\hfill%
\(ℳ'_2 ← \Call{EM}{ℳ'_1, c}\)%
}}
\State
\makebox[18.5em]{%
\(ℳ'_3 ← \Call{merge}{ℳ'_2, c}\);%
\hfill%
\sout<\tlSoutEM>{\(ℳ_i ← \Call{EM}{ℳ'_3, c}\)}%
}
\EndFor
\end{algorithmic}
\end{overprint}
\begin{overprint}
\onslide<\tlFlowchart>
\centering
\vspace{1.25em}
\footnotesize
\begin{tikzpicture}
[ data/.style={align=center, draw, rounded corners}
, func/.style={align=center, draw}
]
\matrix[column sep=3em, row sep=2em, ampersand replacement = \&]{
\coordinate (init);
\&
\node (Mi) [data ] {\strut \(ℳ_i\)};
\&
\node (split) [func, invisible = {<\tlSoutSplit>}] {\strut \Call{split}{}};
\&
\node (M1) [data, invisible = {<\tlSoutSplit>}] {\strut \(ℳ'_1\)};
\\\&
\node (EMmerge) [func, invisible = {<\tlSoutEM>} ] {\strut \Call{EM}{}};
\&
\node (corpus) [data ] {\strut corpus};
\&
\node (EMsplit) [func, invisible = {<\tlSoutSplit>}] {\strut \Call{EM}{}};
\\\&
\node (M3) [data, invisible = {<\tlSoutEM>} ] {\strut \(ℳ'_3\)};
\&
\node (merge) [func ] {\strut \Call{merge}{}};
\&
\node (M2) [data, invisible = {<\tlSoutSplit>}] {\strut \(ℳ'_2\)};
\\};

\begin{scope}[->, line cap = rect, rounded corners]
\draw (init) -- (Mi) node[midway, above] {\(i ≔ 0\)};
\path (EMmerge) -- (Mi) node[midway, left ] {\(i ≔ i + 1\)};
\draw (corpus) to[bend left=20] (merge);
\alt<\tlSoutSplit>{%
\draw[rounded corners] (Mi) -- (M1.center) |- (merge);
}{%
\draw (Mi) -- (split);
\draw (split) -- (M1);
\draw (M1) -- (EMsplit);
\draw (EMsplit) -- (M2);
\draw (M2) -- (merge);
\draw (corpus) to[bend left=20] (EMsplit);
}%
\alt<\tlSoutEM>{%
\draw[rounded corners] (merge) -| (Mi);
}{%
\draw (merge) -- (M3);
\draw (M3) -- (EMmerge);
\draw (EMmerge) -- (Mi);
\draw (corpus) to[bend left=20] (EMmerge);
}%
\end{scope}
\end{tikzpicture}
\onslide<\tlTheorem>
\begin{block}{Theorem \hfill [TD]}\it
Let \(i ≥ 1\) be an iteration of the state splitting and merging algorithm.
Under the assumptions that
\begin{itemize}
\item
\(\Call{EM}{ℳ, c} = \mle[c]{ℳ}\) for every pta \(\), and
\item
\(\crisp{ℳ_{i-1}}\) is a sub-ta of \(\crisp{ℳ'_3}\) up to isomorphism,
\end{itemize}
we have that
\setlength{\abovedisplayskip} {0pt}%
\setlength{\belowdisplayskip} {0pt}%
\setlength{\abovedisplayshortskip}{0pt}%
\setlength{\belowdisplayshortskip}{0pt}%
\[
\lklhd{c}{⟦ℳ_{i-1}⟧}
\lklhd{c}{⟦ℳ_i⟧}
\text{.}
\]
\end{block}
\end{overprint}
\end{standaloneframe}
\end{document}

0 comments on commit 31fc8c6

Please sign in to comment.