diff --git a/pres.pdf b/pres.pdf index e14f8a0..930b6db 100644 Binary files a/pres.pdf and b/pres.pdf differ diff --git a/pres.tex b/pres.tex index d053afe..c6e78b9 100644 --- a/pres.tex +++ b/pres.tex @@ -170,25 +170,22 @@ \item Should treat model as a black box \end{itemize} - + \note{ + \textbf{Interpretable} \\ + Use a representation understandable to humans \\ + Could be a binary vector indicating presence or absence of a word \\ + Could be a binary vector indicating presence of absence of super-pixels in an image \\ + \textbf{Fidelity} \\ + Essentially means the model should be faithful. \\ + Local fidelity does not imply global fidelity \\ + The explanation should aim to correspond to how the model behaves in the vicinity of the instance being predicted \\ + \textbf{Model-agnostic} \\ + The explanation should be blind to what model is underneath \\ + } \end{itemize} \end{frame} - \note[itemize] { - \item \textbf{Interpretable} - - \item Use a representation understandable to humans - \item Could be a binary vector indicating presence or absence of a word - \item Could be a binary vector indicating presence of absence of super-pixels in an image - \item \textbf{Fidelity} - \item Essentially means the model should be faithful. - \item Local fidelity does not imply global fidelity - \item The explanation should aim to correspond to how the model behaves in the vicinity of the instance being predicted - \item \textbf{Model-agnostic} - \item The explanation should be blind to what model is underneath - } - @@ -206,17 +203,16 @@ \end{itemize} $$\xi(x) = \operatornamewithlimits{argmin}_{g \in G} \mathcal{L}(f,g,\pi_x) + \Omega(g)$$ + \note{ + \textbf{Intepretable models could be:} \\ + Linear models, decision trees \\ + $g$ is a vector showing presence or absence of \emph{interpretable components} \\ + $\Omega(g)$ could be height of a DT or number of non-zero weights of linear model \\ + In classification, $f(x)$ is the probability or binary indicator that x belongs to a certain class \\ + So a more complex g will achieve a more faithful interpretation (a lower L), but will increase the value of Omega(g) \\ + } \end{frame} - \note[itemize] { - \item \textbf{Intepretable models could be:} - \item Linear models, decision trees - \item $g$ is a vector showing presence or absence of \emph{interpretable components} - \item $\Omega(g)$ could be height of a DT or number of non-zero weights of linear model - \item In classification, $f(x)$ is the probability or binary indicator that x belongs to a certain class - \item So a more complex g will achieve a more faithful interpretation (a lower L), but will increase the value of Omega(g) - } - \begin{frame} \frametitle{Sampling for Local Exploration} @@ -229,13 +225,14 @@ \center \includegraphics[scale=0.15]{graphics/sample_points.png} - + \note{ + WTF is x' here? - An interpretable version of x \\ + g acts in d' while f acts in d, so when we say that we have z' in dimension d', it's the model g, we can recover the z in the original representation i.e. explained by f in dimension d. + + } \end{frame} - \note[itemize] { - \item WTF is x' here? - An interpretable version of x - \item g acts in d' while f acts in d, so when we say that we have z' in dimension d', it's the model g, we can recover the z in the original representation i.e. explained by f in dimension d. - } + % \subsubsection{Examples} @@ -277,13 +274,14 @@ \Return $w$ \caption{Sparse Linear Explanations using LIME} \end{algorithm} +\note{ + Talk through the algorithm, discussing the sampling and K-Lasso (least absolute shrinkage and selection operator), which is used for feature selection \\ + This algorithm approximates the minimization problem of computing a single individual explanation of a prediction. \\ + K-Lasso is the procedure of learning the weights via least squares. Wtf are these weights??? - The features + } \end{frame} - \note[itemize] { - \item Talk through the algorithm, discussing the sampling and K-Lasso (least absolute shrinkage and selection operator), which is used for feature selection - \item This algorithm approximates the minimization problem of computing a single individual explanation of a prediction. - \item K-Lasso is the procedure of learning the weights via least squares. Wtf are these weights??? - The features - } + \subsection{Explaining Models} @@ -317,13 +315,14 @@ \center \includegraphics[scale=0.68]{graphics/picker_first.png} \\ \hspace{1cm} + \note{ + This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not. \\ + The blue line explains the most inherent feature, which is important, as it is found in most of the instances. \\ + The red lines indicate the two samples which are most important in explaining the model. \\ + Thus, explaining importance, is done by: $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$ + } \end{frame} - \note[itemize] { - \item This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not. - \item The blue line explains the most inherent feature, which is important, as it is found in most of the instances. - \item The red lines indicate the two samples which are most important in explaining the model. - \item Thus, explaining importance, is done by: $I_j = sqrt(sum_i=1^n W_ij)$ - } + \begin{frame} \frametitle{Picking instances} \center @@ -331,13 +330,14 @@ \begin{itemize} \item $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$ \end{itemize} - \end{frame} - \note[itemize] { - \item This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not. - \item The blue line explains the most inherent feature, which is important, as it is found in most of the instances. - \item The red lines indicate the two samples which are most important in explaining the model. - \item Thus, explaining importance, is done by: $I_j = sqrt(sum_i=1^n W_ij)$ - } + \note{ + This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not. \\ + The blue line explains the most inherent feature, which is important, as it is found in most of the instances. \\ + The red lines indicate the two samples which are most important in explaining the model. \\ + Thus, explaining importance, is done by: $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$ + } + \end{frame} + \begin{frame} \frametitle{Picking instances} \center @@ -345,13 +345,14 @@ \begin{itemize} \item $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$ \end{itemize} - \end{frame} - \note[itemize] { - \item This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not. - \item The blue line explains the most inherent feature, which is important, as it is found in most of the instances. - \item The red lines indicate the two samples which are most important in explaining the model. - \item Thus, explaining importance, is done by: $I_j = \sqrt(\sum_{i=1}^n W_ij)$ - } + \note{ + This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not. \\ + The blue line explains the most inherent feature, which is important, as it is found in most of the instances. \\ + The red lines indicate the two samples which are most important in explaining the model. \\ + Thus, explaining importance, is done by: $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$ + } + \end{frame} + \begin{frame} \frametitle{Picking instances} \center @@ -359,14 +360,14 @@ \begin{itemize} \item $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$ \end{itemize} + \note{ + This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not. \\ + The blue line explains the most inherent feature, which is important, as it is found in most of the instances. \\ + The red lines indicate the two samples which are most important in explaining the model. \\ + Thus, explaining importance, is done by: $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$ + } + \end{frame} - \end{frame} - \note[itemize] { - \item This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not. - \item The blue line explains the most inherent feature, which is important, as it is found in most of the instances. - \item The red lines indicate the two samples which are most important in explaining the model. - \item Thus, explaining importance, is done by: $I_j = sqrt(sum_i=1^n W_ij)$ - } \begin{frame} \frametitle{Submodular Picks}