Almost done questionmark

This commit is contained in:
Alexander Munch-Hansen 2019-11-24 01:53:17 +01:00
parent 84eb6fe64b
commit 589920550a
2 changed files with 178 additions and 58 deletions

BIN
pres.pdf

Binary file not shown.

236
pres.tex
View File

@ -13,7 +13,7 @@
% Spørg lige Lasse hvorfor min(L(f,g,pi_x(z)) + omega(g)) bliver intractable, når omega(g) er en konstant!
\usepackage{dirtytalk}
\usepackage{bbm}
\usepackage{setspace}
\usepackage[T1]{fontenc}
@ -105,7 +105,7 @@
\begin{frame}
\frametitle{Previous Solutions}
\begin{itemize}
% Practitioners consistently overestimate their models accuracy [20], propagate feedback loops [23], or fail to notice data leaks
\note{ Practitioners consistently overestimate their models accuracy [20], propagate feedback loops [23], or fail to notice data leaks }
\item Relying on accuracy based on validation set
\item Gestalt
\item Modeltracker
@ -125,21 +125,22 @@
\begin{frame}
% It becomes clear the dataset has issues, as there is a fake correlation between the header information and the class Atheism. It is also clear what the problems are, and the steps that can be taken to fix these issues and train a more trustworthy classifier.
\frametitle{A look into two predictions}
\includegraphics[scale=0.25]{graphics/christ_vs_atheism.png}
\end{frame}
\note{ It becomes clear the dataset has issues, as there is a fake correlation between the header information and the class Atheism. It is also clear what the problems are, and the steps that can be taken to fix these issues and train a more trustworthy classifier.}
\begin{frame}
\frametitle{A look into two predictions}
\includegraphics[scale=0.25]{graphics/christ_vs_atheism_annotated_1.png}
\end{frame}
\note{ It becomes clear the dataset has issues, as there is a fake correlation between the header information and the class Atheism. It is also clear what the problems are, and the steps that can be taken to fix these issues and train a more trustworthy classifier.}
\begin{frame}
\frametitle{A look into two predictions}
\includegraphics[scale=0.25]{graphics/christ_vs_atheism_annotated_2.png}
\end{frame}
\note{ It becomes clear the dataset has issues, as there is a fake correlation between the header information and the class Atheism. It is also clear what the problems are, and the steps that can be taken to fix these issues and train a more trustworthy classifier.}
\subsection{The LIME framework}
\begin{frame}
\frametitle{LIME}
@ -154,62 +155,73 @@
\begin{frame}
\frametitle{Properties of a good explanation}
\begin{itemize}
\item It should be \emph{intepretable}:
\item It should be \emph{intepretable}
\begin{itemize}
\item They must provide qualitative understanding between the input variables and the response
\item They must take into account the users limitations
\item Use a representation understandable to humans
\item Could be a binary vector indicating presence or absence of a word
\item Could be a binary vector indicating presence of absence of super-pixels in an image
\end{itemize}
\item It should have \emph{fidelity}:
\item It should have \emph{fidelity}
\begin{itemize}
\item Essentially means the model should be faithful.
\item Local fidelity does not imply global fidelity
\item The explanation should aim to correspond to how the model behaves in the vicinity of the instance being predicted
\item Essentially means the model should be faithful.
\end{itemize}
\item It should be \emph{model-agnostic}:
\item It should be \emph{model-agnostic}
\begin{itemize}
\item The explanation should be blind to what model is underneath
\item Should treat model as a black box
\end{itemize}
\end{itemize}
\end{frame}
\note[itemize] {
\item \textbf{Interpretable}
\item Use a representation understandable to humans
\item Could be a binary vector indicating presence or absence of a word
\item Could be a binary vector indicating presence of absence of super-pixels in an image
\item \textbf{Fidelity}
\item Essentially means the model should be faithful.
\item Local fidelity does not imply global fidelity
\item The explanation should aim to correspond to how the model behaves in the vicinity of the instance being predicted
\item \textbf{Model-agnostic}
\item The explanation should be blind to what model is underneath
}
\subsection{Explaining Predictions}
\begin{frame}[shrink=20]
\begin{frame}[shrink=10]
\frametitle{The Fidelity-Interpretability Trade-off}
We want a simple explanation, still capable of displaying fidelity
\begin{itemize}
\item Let an explanation be defined as a model $g \in \{0,1\}^{d^{\prime}} \in G$, where $G$ is a class of \emph{potentially interpretable} models
\begin{itemize}
\item Linear models, decision trees
\item $g$ is a vector showing presence or absence of \emph{interpretable components}
\end{itemize}
\item $\Omega(g)$ explains the \emph{complexity} of an explanation $g$
\begin{itemize}
\item Could be height of a decision tree or number of non-zero weights of a linear model
\end{itemize}
\item The model we try to explain is $f : \mathbb{R}^d \rightarrow \mathbb{R}$
\begin{itemize}
\item In classification, $f(x)$ is the probability or binary indicator that x belongs to a certain class
\end{itemize}
\item $\pi_x(z)$ is a proximity measure between instance $z$ and $x$ and defines the locality around $x$
\item $\mathcal{L}(f,g,\pi_x)$ defines how \emph{unfaithful} $g$ is in approximating $f$ in the locality around $\pi_x$.
\item Ensuring both \emph{interpretability} and \emph{local fidelity}, we minimize $\mathcal{L}$ while having $\Omega(g)$ be low as well
\item To ensure both \emph{interpretability} and \emph{local fidelity}, we minimize $\mathcal{L}$ while having $\Omega(g)$ be low as well
\end{itemize}
% So a more complex g will achieve a more faithful interpretation (a lower L), but will increase the value of Omega(g)
$$\xi(x) = \operatornamewithlimits{argmin}_{g \in G} \mathcal{L}(f,g,\pi_x) + \Omega(g)$$
\end{frame}
\note[itemize] {
\item \textbf{Intepretable models could be:}
\item Linear models, decision trees
\item $g$ is a vector showing presence or absence of \emph{interpretable components}
\item $\Omega(g)$ could be height of a DT or number of non-zero weights of linear model
\item In classification, $f(x)$ is the probability or binary indicator that x belongs to a certain class
\item So a more complex g will achieve a more faithful interpretation (a lower L), but will increase the value of Omega(g)
}
\begin{frame}
% Note: WTF is x' here? - An interpretable version of x
% Note: g acts in d' while f acts in d, so when we say that we have z' in dimension d', it's the model g, we can recover the z in the original representation i.e. explained by f in dimension d.
\frametitle{Sampling for Local Exploration}
Goal: Minimizing $\mathcal{L}(f,g,\pi_x)$ without making assumptions on $f$
\begin{itemize}
\item For a sample $x$, we need to draw samples around $x$
\item For a sample $x'$, we need to draw samples around $x'$
\item Accomplished by drawing non-zero elements of $x$, resulting in perturbed samples $z^\prime$
\item Given $z^\prime \in \{0,1\}^{d^\prime}$, we compute un-pertubed $z \in R^d$, $f(z)$, so we have a label for $z^\prime$.
\end{itemize}
@ -218,6 +230,12 @@
\includegraphics[scale=0.15]{graphics/sample_points.png}
\end{frame}
\note[itemize] {
\item WTF is x' here? - An interpretable version of x
\item g acts in d' while f acts in d, so when we say that we have z' in dimension d', it's the model g, we can recover the z in the original representation i.e. explained by f in dimension d.
}
% \subsubsection{Examples}
\begin{frame}
@ -235,8 +253,12 @@
\begin{frame}
% \frametitle{Sparse Linear Explanations}
% Talk through the algorithm, discussing the sampling and K-Lasso (least absolute shrinkage and selection operator), which is used for feature selection
\frametitle{Explaining an individual prediction}
\begin{itemize}
\item Solving eq $\operatornamewithlimits{argmin}_{g \in G} \mathcal{L}(f,g,\pi_x) + \Omega(g)$ is intractable, but this algo approximates it.
\item K-Lasso is the procedure of picking K features with Lasso and then using Least Squares to compute weights (features).
\end{itemize}
\begin{algorithm}[H]
\setstretch{0.9}
\SetAlgoLined
@ -247,16 +269,20 @@
$\mathcal{Z} \leftarrow \{\}$ \\
\For{$i \in \{1,2,3,\dots, N\}$}{
$z_i^{\prime} \leftarrow sample\_around(x^{\prime})$ \\
$\mathcal{Z} \leftarrow \mathcal{Z} \cup \langle z_i^{\prime}, f(z_i), \pi_{x}(z_i) \rangle$ \\
$\text{add }\langle z_i^{\prime}, f(z_i), \pi_{x}(z_i) \rangle \text{ to } \mathcal{Z}$\\
}
$w \leftarrow \text{K-Lasso}(\mathcal{Z},K) \vartriangleright \text{with } z_i^{\prime} \text{ as features, } f(z) \text{ as target}$ \\
$w \leftarrow \text{K-Lasso}(\mathcal{Z},K) \qquad \qquad // \text{with } z_i^{\prime} \text{ as features, } f(z) \text{ as target}$ \\
\Return $w$
\caption{Sparse Linear Explanations using LIME}
\end{algorithm}
% This algorithm approximates the minimization problem of computing a single individual explanation of a prediction.
% K-Lasso is the procedure of learning the weights via least squares. Wtf are these weights??? - The features
\end{frame}
\note[itemize] {
\item Talk through the algorithm, discussing the sampling and K-Lasso (least absolute shrinkage and selection operator), which is used for feature selection
\item This algorithm approximates the minimization problem of computing a single individual explanation of a prediction.
\item K-Lasso is the procedure of learning the weights via least squares. Wtf are these weights??? - The features
}
\subsection{Explaining Models}
\begin{frame}
@ -277,10 +303,10 @@
The task of selecting \emph{B} instances for the user to inspect
\begin{itemize}
\item Not dependent on the existence of explanations
\item So it should not assist users in selecting instances themselves
\item Should not assist users in selecting instances themselves
\item Looking at raw data is not enough to understand predicitions and get insights
\item Should take into account the explanations that accompany each prediction
\item Should pick a diverse, representative set of explanations to show the user, so non-redundant explanations that represent how the model behaves globally.
\note{Should pick a diverse, representative set of explanations to show the user, so non-redundant explanations that represent how the model behaves globally.}
\end{itemize}
\end{frame}
@ -290,32 +316,60 @@
\center
\includegraphics[scale=0.81]{graphics/picker_first.png}
\end{frame}
\note[itemize] {
\item This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not.
\item The blue line explains the most inherent feature, which is important, as it is found in most of the instances.
\item The red lines indicate the two samples which are most important in explaining the model.
\item Thus, explaining importance, is done by: $I_j = sqrt(sum_i=1^n W_ij)$
}
\begin{frame}
\frametitle{Picking instances}
\center
\includegraphics[scale=0.32]{graphics/picker_second.png}
\end{frame}
\note[itemize] {
\item This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not.
\item The blue line explains the most inherent feature, which is important, as it is found in most of the instances.
\item The red lines indicate the two samples which are most important in explaining the model.
\item Thus, explaining importance, is done by: $I_j = sqrt(sum_i=1^n W_ij)$
}
\begin{frame}
\frametitle{Picking instances}
\center
\includegraphics[scale=0.32]{graphics/picker_third.png}
\end{frame}
\note[itemize] {
\item This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not.
\item The blue line explains the most inherent feature, which is important, as it is found in most of the instances.
\item The red lines indicate the two samples which are most important in explaining the model.
\item Thus, explaining importance, is done by: $I_j = sqrt(sum_i=1^n W_ij)$
}
\begin{frame}
\frametitle{Picking instances}
\center
\includegraphics[scale=0.32]{graphics/picker_final.png}
\end{frame}
\note[itemize] {
\item This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not.
\item The blue line explains the most inherent feature, which is important, as it is found in most of the instances.
\item The red lines indicate the two samples which are most important in explaining the model.
\item Thus, explaining importance, is done by: $I_j = sqrt(sum_i=1^n W_ij)$
}
\begin{frame}
\frametitle{Definitions}
\begin{itemize}
\item $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$
% c is a coverage function, which computes the total importance of the features that appear in at least one instance in a set V .
% NOte: maximizing a weighted coverage function is NP-hard, but the version used in the algorithm is iterativily greedy, so it just adds the one with the maximum gain, which offers a constant-factor approximation guarantee of 11/e to the optimum.
\item $c(V,W,I) = \sum\limits_{j=1}^{d^\prime} \mathbbm{1}_{[\exists i \in V : W_{ij} > 0]}\ I_j$
\item $Pick(W,I) = \operatornamewithlimits{argmax}\limits_{V,|V| \leq B} c(V,W,I)$
\end{itemize}
\end{frame}
\note[itemize] {
\item c is a coverage function, which computes the total importance of the features that appear in at least one instance in a set V .
\item Note: maximizing a weighted coverage function is NP-hard, but the version used in the algorithm is iterativily greedy, so it just adds the one with the maximum gain, which offers a constant-factor approximation guarantee of $11/e$ to the optimum.
}
\begin{frame}
\frametitle{Explanation of algorithm 2}
@ -326,19 +380,20 @@
\begin{frame}
\frametitle{Submodular Picks}
$c(V,W,I) = \sum\limits_{j=1}^{d^\prime} \mathbbm{1}_{[\exists i \in V : W_{ij} > 0]}\ I_j$
\begin{algorithm}[H]
\setstretch{0.9}
\SetAlgoLined
\kwRequire{Instances $X$, Budget $B$}
\Indp
\ForAll{$x_i \in X$}{
$W_i \leftarrow \mathbf{explain}(x_i, x_i^{\prime})$ \qquad \qquad $\vartriangleright$ Using Algorithm 1
$W_i \leftarrow \mathbf{explain}(x_i, x_i^{\prime})$ \qquad \qquad // Using Algorithm 1
}
\For{$j \in \{1\dots d^{\prime}$} {
$I_j \leftarrow \sqrt{\sum_{i=1}^n |W_{ij}|}$ \qquad \qquad \quad $\vartriangleright$ Compute feature importances
$I_j \leftarrow \sqrt{\sum_{i=1}^n |W_{ij}|}$ \qquad \qquad \quad // Compute feature importances
}
$V \leftarrow \{\}$ \\
\While(\qquad \qquad \qquad \quad \ \ $\vartriangleright$ Greedy optimisation of Eq 4){$|V| < B$} {
\While(\qquad \qquad \qquad \quad \ \ // Greedy optimisation of Eq 4){$|V| < B$} {
$V \leftarrow V \cup \text{argmax}_i \ c(V \cup \{i\}, W, i)$
}
\Return $V$
@ -375,14 +430,16 @@
\item Explanations of \textbf{LIME} is compared with \textbf{parzen}
\begin{itemize}
\item \textbf{parzen} approximates black box classifier globally and explains individual predictions by taking the gradient of the prediction probability function.
\item Both are also compared to a greedy method where features are picked by removing most contributing ones until prediction change, as well as a random procedure.
% K explains the amount of words in the BoW model and the complexity of the model. Higher K => More complex but more faithful, lower k => Less complex, potentially less faithful
\item $K = 10$ for the experiments
\end{itemize}
\item $K = 10$ for the experiments
\end{itemize}
\end{frame}
\note[itemize] {
\item Both are also compared to a greedy method where features are picked by removing most contributing ones until prediction change, as well as a random procedure.
\item K explains the amount of words in the BoW model and the complexity of the model. Higher K => More complex but more faithful, lower k => Less complex, potentially less faithful
}
\begin{frame}
\frametitle{Faithfulness}
\begin{itemize}
@ -397,7 +454,6 @@
\begin{frame}
\frametitle{Faithfulness}
% We observe that the greedy approach is comparable to parzen on logistic regression, but is substantially worse on decision trees since changing a single feature at a time often does not have an effect on the prediction. The overall recall by parzen is low, likely due to the difficulty in approximating the original highdimensional classifier. LIME consistently provides > 90% recall for both classifiers on both datasets, demonstrating that LIME explanations are faithful to the models.
\centering
% Books faithfulness
@ -406,20 +462,29 @@
\includegraphics[height=0.35\textheight]{graphics/dvds_dt_lr.png}
\end{frame}
\note[itemize] {
\item We observe that the greedy approach is comparable to parzen on logistic regression, but is substantially worse on decision trees since changing a single feature at a time often does not have an effect on the prediction.
\item The overall recall by parzen is low, likely due to the difficulty in approximating the original highdimensional classifier.
\item LIME consistently provides > 90\% recall for both classifiers on both datasets, demonstrating that LIME explanations are faithful to the models.
}
\begin{frame}
\frametitle{Should I trust this prediction?}
% In statistical analysis of binary classification, the F1 score (also F-score or F-measure) is a measure of a test's accuracy. It considers both the precision p and the recall r of the test to compute the score: p is the number of correct positive results divided by the number of all positive results returned by the classifier, and r is the number of correct positive results divided by the number of all relevant samples (all samples that should have been identified as positive). The F1 score is the harmonic mean of the precision and recall, where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.
% Seems kind of unfair, that random and greedy is mistrusted by simply having an unstrutworthy feature in their explanation, while LIME and parzen just have to not change, when these untrustworthy are removed.
\includegraphics[scale=0.4]{graphics/F1_trust.png}
\end{frame}
\note[itemize] {
\item In statistical analysis of binary classification, the F1 score (also F-score or F-measure) is a measure of a test's accuracy. It considers both the precision p and the recall r of the test to compute the score: p is the number of correct positive results divided by the number of all positive results returned by the classifier, and r is the number of correct positive results divided by the number of all relevant samples (all samples that should have been identified as positive). The F1 score is the harmonic mean of the precision and recall, where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.
\item Seems kind of unfair, that random and greedy is mistrusted by simply having an unstrutworthy feature in their explanation, while LIME and parzen just have to not change, when these untrustworthy are removed.
}
\begin{frame}
\frametitle{Can I trust this model?}
\begin{itemize}
\item Evaluate if explanations can be used for model selection
\item They add 10 artificially “noisy” features s.t.
\item They add 10 artificially \say{noisy} features s.t.
\begin{itemize}
\item Each artificial feature appears in 10\% of the examples in one class, and 20\% of the other in the training/validation data.
\item While on the test instances, each artificial feature appears in 10\% of the examples in each class.
@ -431,29 +496,82 @@
\begin{frame}
\frametitle{Can I trust this model?}
% They evaluate whether the explanations can be used for model selection, simulating the case where a human has to decide between two competing models with similar accuracy on validation data.
% Accomplished by "marking" the artificial features found within the B instances seen, as unstrustworthy. We then evaluate how many total predictions in the validation set should be trusted (as in the previous section, treating only marked features as untrustworthy).
% SP-parzen and RP-parzen are omittedfrom the figure since they did not produce useful explanations, performing only slightly better than random. Is this ok?
\includegraphics[scale=0.4]{graphics/graph_trust.png}
\end{frame}
\note[itemize]{
\item They evaluate whether the explanations can be used for model selection, simulating the case where a human has to decide between two competing models with similar accuracy on validation data.
\item Accomplished by "marking" the artificial features found within the B instances seen, as unstrustworthy. We then evaluate how many total predictions in the validation set should be trusted (as in the previous section, treating only marked features as untrustworthy).
\item SP-parzen and RP-parzen are omitted from the figure since they did not produce useful explanations, performing only slightly better than random. Is this ok?
}
\begin{frame}
\frametitle{Human evaluation setup}
\begin{itemize}
\item Create new dataset, the \emph{religion} set
\begin{itemize}
\item Consist of $819$ christianity and atheism websites
\end{itemize}
\item Most experiments are trained on the \emph{newsgroup} dataset
\begin{itemize}
\item The one containing the emails
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Can humans pick the best classifier?}
\includegraphics[scale=0.35]{graphics/avg_acc_humans.png}
\end{frame}
\note[itemize] {
\item Non-expert humans, without any knowledge of machine learning
\item Train two classifiers, one on standard data set and one on a cleaned version of the same data set
\item Use the newsgroup dataset for training, which is the one with the atheism/christianity emails
\item Run the classifiers on a \say{religion} dataset, that the authors create themselves, to question if the classifiers generalizes well
\item Standard one achieves higher validation accuracy
\item Humnas are asked to pick the best classifier when seeing explanations from the two classifiers for B and K = 6 (They see 6 explanations with 6 features)
\item Repeated $100$ times
\item Clearly SP LIME outperforms other options
}
\begin{frame}
\frametitle{Can non-experts improve a classifier?}
\center
\includegraphics[scale=0.4]{graphics/picking_features_human.png}
\begin{itemize}
\item $200$ words were removed with SP, $157$ with RP
\item Out of the $200$ words removed, $174$ were selected by at least half the users, $68$ by all
\end{itemize}
\end{frame}
\note[itemize] {
\item Non-expert humans, without any knowledge of machine learning
\item Use newsgroup dataset
\item Ask mechanical turk users to select features to be removed, before the classifier is retrained
\item B = K = 10
\item Accuracy shown in graph, is on the homebrewed religion dataset
\item Without cleaning, the classifiers achieve roughly $58\%$, so it helps a lot!
\item It only took on average 11 minutes to remove all the words over all 3 iterations, so little time investment, but much better accuracy
}
\begin{frame}
\frametitle{Can we learn something from the explanations?}
% Hand picked images to create the correlation between wolf and snow, s.t. the classifier miss-predicts whenever a husky is in snow or a wolf is without snow
\note{Hand picked images to create the correlation between wolf and snow, s.t. the classifier miss-predicts whenever a husky is in snow or a wolf is without snow}
\center
\includegraphics[scale=0.2]{graphics/husky_wolf_img.png}
\begin{itemize}
\item Images picked to create fake correlation between wolf and snow
\item Use Logistic Regression classifier
\item Features come from Google's pre-trained \emph{Inception} \textbf{NN}
\end{itemize}
\end{frame}
\note[itemize] {
\item Use graduate students who has taken at least one course in machine learning
}
\begin{frame}
\frametitle{Can we learn something from the explanations?}
\begin{itemize}
@ -473,7 +591,9 @@
\includegraphics[scale=0.3]{graphics/husky_wolf_expla.png}
\end{frame}
%\subsection{Human Subjects}
\note[itemize] {
\item Clearly shows that seeing the explanations changes their answers consistently
}
\section{Conclusion}
\begin{frame}