adm-pres/pres.tex

641 lines
29 KiB
TeX
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

\documentclass{beamer}
\setbeamertemplate{note page}[plain]
\usetheme[progressbar=frametitle]{metropolis}
\usepackage{pgfpages}
\setbeameroption{show notes on second screen=right}
% g \in G is explanation as a model
% f is the model we're trying to explain
% does, being model agnostic, means we do not care about specifics of f.
% We use Locally Weighted Square Loss as L, where I suspect pi is the weight and we thus estimate the difference between the actual model
% and our explanation, and multiply this with the proximity of the data point z, to x.
% Spørg lige Lasse hvorfor min(L(f,g,pi_x(z)) + omega(g)) bliver intractable, når omega(g) er en konstant!
\usepackage{dirtytalk}
\usepackage{bbm}
\usepackage{setspace}
\usepackage[T1]{fontenc}
\usepackage[sfdefault,scaled=.85]{FiraSans}
%\usepackage{newtxsf}
\usepackage[ruled, linesnumbered]{algorithm2e}
\SetKwInput{kwRequire}{Require}
\SetKw{kwExpl}{explain}
\title{Why Should I Trust You?}
\subtitle{Explaining the Predictions of Any Classifier}
\author{Casper Vestergaard Kristensen \and Alexander Munch-Hansen}
\institute{Aarhus University}
\date{\today}
\begin{document}
\begin{frame}
\titlepage
\end{frame}
\begin{frame}
\setbeamertemplate{section in toc}[sections numbered]
\frametitle{Outline}
\setstretch{0.5}
\tableofcontents
\end{frame}
\section{Meta information}
%\subsection{Authors}
\begin{frame}
\frametitle{Authors}
\begin{itemize}
\item Marco Tulio Ribeiro, PhD from University of Washington, Currently a researcher for Microsoft
\item Sameer Singh, PhD from University of Massachusetts Amherst, adviser for Marco
\item Carlos Guestrin, Professor at University of Washington, adviser for Marco
\end{itemize}
\end{frame}
\note[itemize] {
\item This paper won the Audience appreciation award
\item These also wrote \say{Model-Agnostic Interpretability of Machine Learning}
\item Marco's research focus for his PhD was making it easier for humans to understand and interact with machine learning models.
}
%\subsection{Publishing}
\begin{frame}[fragile]{Metropolis}
\frametitle{Publishing}
\begin{itemize}
\item Conference Paper, Research
\item KDD '16 Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining
\begin{itemize}
\item A premier interdisciplinary conference, brings together researchers and practitioners from data science, data mining, knowledge discovery, large-scale data analytics, and big data.
\item Sigkdd has the highest h5 index of any conference involving databases or data in general
\item Highly trusted source
\end{itemize}
\end{itemize}
\end{frame}
\note[itemize] {
\item Main take-away is that this paper was shown at a respected conference
}
\section{Article}
%\subsection{Problem}
\begin{frame}
\frametitle{Problem definition}
\begin{itemize}
\item People often use Machine Learning models for predictions
\item Blindly trusting a prediction can lead to poor decision making
\item We seek to understand the reasons behind predictions
\begin{itemize}
\item As well as the model doing the predictions
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Problem definition}
\begin{itemize}
\item People often use Machine Learning models for predictions
\item Blindly trusting a prediction can lead to poor decision making
\item We seek to understand the reasons behind predictions
\begin{itemize}
\item As well as the model doing the predictions
\end{itemize}
\end{itemize}
\center
\includegraphics[scale=0.2]{graphics/doctor_pred.png}
\end{frame}
%\subsection{Previous Solutions}
\begin{frame}
\frametitle{Previous Solutions}
\begin{itemize}
\note{ Practitioners consistently overestimate their models accuracy [20], propagate feedback loops [23], or fail to notice data leaks }
\item Relying on accuracy based on validation set
\item Recognizing the utility of explanations in assessing trust, many have proposed using interpretable models
\begin{itemize}
\item May generalize poorly, if data can't be explained in few dimensions
\item So interpretability, in these cases, comes at the cost of flexibility, accuracy, or efficiency
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{A look into two predictions}
\includegraphics[scale=0.25]{graphics/christ_vs_atheism.png}
\end{frame}
\note{ It becomes clear the dataset has issues, as there is a fake correlation between the header information and the class Atheism. It is also clear what the problems are, and the steps that can be taken to fix these issues and train a more trustworthy classifier.}
\begin{frame}
\frametitle{A look into two predictions}
\includegraphics[scale=0.25]{graphics/christ_vs_atheism_annotated_1.png}
\end{frame}
\note{ It becomes clear the dataset has issues, as there is a fake correlation between the header information and the class Atheism. It is also clear what the problems are, and the steps that can be taken to fix these issues and train a more trustworthy classifier.}
\begin{frame}
\frametitle{A look into two predictions}
\includegraphics[scale=0.25]{graphics/christ_vs_atheism_annotated_2.png}
\end{frame}
\note{ It becomes clear the dataset has issues, as there is a fake correlation between the header information and the class Atheism. It is also clear what the problems are, and the steps that can be taken to fix these issues and train a more trustworthy classifier.}
\subsection{The LIME framework}
\begin{frame}
\frametitle{LIME}
\begin{itemize}
\item The algorithm created
\item Explains the predictions of \emph{any} classifier or regressor in a faithful way, by approximating it locally with an \emph{interpretable} model.
\end{itemize}
\end{frame}
\note{Everything you want}
\begin{frame}
\frametitle{Properties of a good explanation}
\begin{itemize}
\item It should be \emph{intepretable}
\begin{itemize}
\item They must provide qualitative understanding between the input variables and the response
\item They must take into account the users limitations
\end{itemize}
\item It should have \emph{fidelity}
\begin{itemize}
\item Essentially means the model should be faithful.
\end{itemize}
\item It should be \emph{model-agnostic}
\begin{itemize}
\item Should treat model as a black box
\end{itemize}
\note{
\textbf{Interpretable} \\
Use a representation understandable to humans \\
Could be a binary vector indicating presence or absence of a word \\
Could be a binary vector indicating presence of absence of super-pixels in an image \\
\textbf{Fidelity} \\
Essentially means the model should be faithful. \\
Local fidelity does not imply global fidelity \\
The explanation should aim to correspond to how the model behaves in the vicinity of the instance being predicted \\
\textbf{Model-agnostic} \\
The explanation should be blind to what model is underneath \\
}
\end{itemize}
\end{frame}
\subsection{Explaining Predictions}
\begin{frame}[shrink=10]
\frametitle{The Fidelity-Interpretability Trade-off}
We want a simple explanation, still capable of displaying fidelity
\begin{itemize}
\item Let an explanation be defined as a model $g \in \{0,1\}^{d^{\prime}} \in G$, where $G$ is a class of \emph{potentially interpretable} models
\item $\Omega(g)$ explains the \emph{complexity} of an explanation $g$
\item The model we try to explain is $f : \mathbb{R}^d \rightarrow \mathbb{R}$
\item $\pi_x(z)$ is a proximity measure between instance $z$ and $x$ and defines the locality around $x$
\item $\mathcal{L}(f,g,\pi_x)$ defines how \emph{unfaithful} $g$ is in approximating $f$ in the locality around $\pi_x$.
\item To ensure both \emph{interpretability} and \emph{local fidelity}, we minimize $\mathcal{L}$ while having $\Omega(g)$ be low as well
\end{itemize}
$$\xi(x) = \operatornamewithlimits{argmin}_{g \in G} \mathcal{L}(f,g,\pi_x) + \Omega(g)$$
\note{
\textbf{Intepretable models could be:} \\
Linear models, decision trees \\
$g$ is a vector showing presence or absence of \emph{interpretable components} \\
$\Omega(g)$ could be height of a DT or number of non-zero weights of linear model \\
In classification, $f(x)$ is the probability or binary indicator that x belongs to a certain class \\
So a more complex g will achieve a more faithful interpretation (a lower L), but will increase the value of Omega(g) \\
}
\end{frame}
\begin{frame}
\frametitle{Sampling for Local Exploration}
Goal: Minimizing $\mathcal{L}(f,g,\pi_x)$ without making assumptions on $f$
\begin{itemize}
\item For a sample $x'$, we need to draw samples around $x'$
\item Accomplished by drawing non-zero elements of $x$, resulting in perturbed samples $z^\prime$
\item Given $z^\prime \in \{0,1\}^{d^\prime}$, we compute un-pertubed $z \in R^d$, $f(z)$, so we have a label for $z^\prime$.
\end{itemize}
\center
\includegraphics[scale=0.15]{graphics/sample_points.png}
\note{
WTF is x' here? - An interpretable version of x \\
g acts in d' while f acts in d, so when we say that we have z' in dimension d', it's the model g, we can recover the z in the original representation i.e. explained by f in dimension d.
}
\end{frame}
% \subsubsection{Examples}
\begin{frame}
\frametitle{Specifics for linear models}
\begin{itemize}
\item They focus only on linear explanations
\item $G = $ Class of linear models: $g(z^\prime) = w_g \cdot z^\prime$
\item $L = $ The locally weighted square loss
\item $\pi_x(z) = \exp(-D(x,z)^2 / \sigma^2)$
\begin{itemize}
\item An exponential kernel function based on some distance function D (could be L2 distance for images)
\end{itemize}
\item Thus; $L(f, g, \pi_x) = \sum\limits_{z,z^\prime \in \mathcal{Z}} \pi_x(z) (f(z) - g(z^\prime))^2$
\end{itemize}
\end{frame}
\begin{frame}
% \frametitle{Sparse Linear Explanations}
\frametitle{Explaining an individual prediction}
\begin{itemize}
\item Solving eq $\operatornamewithlimits{argmin}_{g \in G} \mathcal{L}(f,g,\pi_x) + \Omega(g)$ is intractable, but this algo approximates it.
\item K-Lasso is the procedure of picking K features with Lasso and then using Least Squares to compute weights (features).
\end{itemize}
\begin{algorithm}[H]
\setstretch{0.9}
\SetAlgoLined
\kwRequire{Classifier $f$, Number of samples $N$}
\kwRequire{Instance $x$, and its intepretable version $x^{\prime}$}
\kwRequire{Similarity kernel $\pi_x$, Length of explanation $K$}
\Indp
$\mathcal{Z} \leftarrow \{\}$ \\
\For{$i \in \{1,2,3,\dots, N\}$}{
$z_i^{\prime} \leftarrow sample\_around(x^{\prime})$ \\
$\text{add }\langle z_i^{\prime}, f(z_i), \pi_{x}(z_i) \rangle \text{ to } \mathcal{Z}$\\
}
$w \leftarrow \text{K-Lasso}(\mathcal{Z},K) \qquad \qquad // \text{with } z_i^{\prime} \text{ as features, } f(z) \text{ as target}$ \\
\Return $w$
\caption{Sparse Linear Explanations using LIME}
\end{algorithm}
\note{
Talk through the algorithm, discussing the sampling and K-Lasso (least absolute shrinkage and selection operator), which is used for feature selection \\
This algorithm approximates the minimization problem of computing a single individual explanation of a prediction. \\
K-Lasso is the procedure of learning the weights via least squares. Wtf are these weights??? - The features
}
\end{frame}
\subsection{Explaining Models}
\begin{frame}
\frametitle{Explaining models}
Idea: We give a global understanding of the model by explaining a set of individual instances
\begin{itemize}
\item Still model agnostic (since the individual explanations are)
\item Instances need to be selected in a clever way, as people won't have time to look through all explanations
\item Some definitions
\begin{itemize}
\item Time/patience of humans is explained by a budget \emph{B} which denotes number of explanations a human will sit through.
\item Given a set of instances \textbf{X}, we define the \emph{pick step} as the task of selecting \textbf{B} instances for the user to inspect.
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{The pick step}
The task of selecting \emph{B} instances for the user to inspect
\begin{itemize}
\item Should return the instances which best explains the model
\item Looking at raw data is not enough to understand predicitions and get insights
\item Should take into account the explanations that accompany each prediction
\note{Should pick a diverse, representative set of explanations to show the user, so non-redundant explanations that represent how the model behaves globally.}
\end{itemize}
\end{frame}
% This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not. The blue line explains the most inherent feature, which is important, as it is found in omst of the instances. The red lines indicate the two samples which are most important in explaining the model. Thus, explaining importance, is done by: I_j = sqrt(sum_i=1^n W_ij)
\begin{frame}
\frametitle{Picking instances}
\center
\includegraphics[scale=0.68]{graphics/picker_first.png} \\
\hspace{1cm}
\note{
This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not. \\
The blue line explains the most inherent feature, which is important, as it is found in most of the instances. \\
The red lines indicate the two samples which are most important in explaining the model. \\
Thus, explaining importance, is done by: $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$
}
\end{frame}
\begin{frame}
\frametitle{Picking instances}
\center
\includegraphics[scale=0.27]{graphics/picker_second.png}
\begin{itemize}
\item $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$
\end{itemize}
\note{
This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not. \\
The blue line explains the most inherent feature, which is important, as it is found in most of the instances. \\
The red lines indicate the two samples which are most important in explaining the model. \\
Thus, explaining importance, is done by: $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$
}
\end{frame}
\begin{frame}
\frametitle{Picking instances}
\center
\includegraphics[scale=0.27]{graphics/picker_third.png}
\begin{itemize}
\item $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$
\end{itemize}
\note{
This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not. \\
The blue line explains the most inherent feature, which is important, as it is found in most of the instances. \\
The red lines indicate the two samples which are most important in explaining the model. \\
Thus, explaining importance, is done by: $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$
}
\end{frame}
\begin{frame}
\frametitle{Picking instances}
\center
\includegraphics[scale=0.27]{graphics/picker_final.png}
\begin{itemize}
\item $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$
\end{itemize}
\note{
This is a matrix explaining instances and their features explained by a binary list s.t. an instance either has a feature or does not. \\
The blue line explains the most inherent feature, which is important, as it is found in most of the instances. \\
The red lines indicate the two samples which are most important in explaining the model. \\
Thus, explaining importance, is done by: $I_j = \sqrt{\sum_{i=1}^n W_{ij}}$
}
\end{frame}
\begin{frame}
\frametitle{Submodular Picks}
$c(V,W,I) = \sum\limits_{j=1}^{d^\prime} \mathbbm{1}_{[\exists i \in V : W_{ij} > 0]}\ I_j$
\begin{algorithm}[H]
\setstretch{0.9}
\SetAlgoLined
\kwRequire{Instances $X$, Budget $B$}
\Indp
\ForAll{$x_i \in X$}{
$W_i \leftarrow \mathbf{explain}(x_i, x_i^{\prime})$ \qquad \qquad // Using Algorithm 1
}
\For{$j \in \{1\dots d^{\prime}$} {
$I_j \leftarrow \sqrt{\sum_{i=1}^n |W_{ij}|}$ \qquad \qquad \quad // Compute feature importances
}
$V \leftarrow \{\}$ \\
\While(\qquad \qquad \qquad \quad \ \ // Greedy optimisation of Eq 4){$|V| < B$} {
$V \leftarrow V \cup \text{argmax}_i \ c(V \cup \{i\}, W, i)$
}
\Return $V$
\caption{Submodular pick (SP) algorithm}
\end{algorithm}
\note{ Note: maximizing a weighted coverage function is NP-hard, but the version used in the algorithm is iterativily greedy, so it just adds the one with the maximum gain, which offers a constant-factor approximation guarantee of $11/e$ to the optimum.}
\end{frame}
\section{Experiments}
\subsection{Simulated User Experiments}
%\subsubsection{Setup}
\begin{frame}
\frametitle{Experiments}
Interested in three questions:
\begin{itemize}
\item Are the explanations faithful to the model?
\item Can the explanations aid users in ascertaining trust in the individual predictions?
\item Are the explanations useful for evaluating the model as a whole?
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Faithfulness and golden standard}
\begin{itemize}
\item Explanations of \textbf{LIME} are compared with \textbf{parzen} as well as greedy and random algorithms.
\begin{itemize}
\item \textbf{parzen} approximates black box classifier globally and explains individual predictions by taking the gradient of the prediction probability function.
\end{itemize}
\item Faithfulness of explanations is measured on classifiers that are interpretable: \textbf{Logistic Regression} and \textbf{Decision Tree}.
\begin{itemize}
\item Both find $10$ features, which are the \emph{gold standard} features
\end{itemize}
\item For each prediction on the test set, explanations are produced and the fraction of the gold features found, is computed.
\end{itemize}
\end{frame}
\note[itemize] {
\item Train logistic regression and decision tree classifiers, so that they use a maximum of 10 features to classify each instance.
\item These 10 features are the gold set of features that are actually considered important by the model.
\item The explanations should recover these features.
}
\begin{frame}
\frametitle{Faithfulness}
\centering
% Books faithfulness
\includegraphics[height=0.35\textheight]{graphics/books_dt_lr.png}{ }
% Dvds faithfulness
\includegraphics[height=0.35\textheight]{graphics/dvds_dt_lr.png}
\end{frame}
\note[itemize] {
\item We observe that the greedy approach is comparable to parzen on logistic regression, but is significantly worse on decision trees, since changing a single feature at a time often does not have an effect on the prediction.
\item The overall recall by parzen is low, likely due to the difficulty in approximating the original highdimensional classifier.
\item LIME consistently provides > 90\% recall for both classifiers on both datasets, demonstrating that LIME explanations are faithful to the models.
}
\begin{frame}
\frametitle{Should I trust this prediction?}
\begin{itemize}
\item Randomly select 25\% of the features as untrustworthy.
\item Simulated users deem a prediction untrustworthy if:
\begin{itemize}
\item Lime \& Parzen: the linear approximation changes, when all untrustworthy features are removed from the explanation.
\item Greedy \& Random: they contain any untrustworthy features.
\end{itemize}
\end{itemize}
\includegraphics[width=0.60\linewidth]{graphics/F1_trust.png}
\includegraphics[width=0.35\linewidth]{graphics/sample_points.png}
\end{frame}
\note[itemize] {
\item 2nd experiment: test trust in individual predicitions.
\item Test-set predictions are deemed (oracle,truely) untrustworthy if the prediction from the black-box classifier changes when these features are removed.
\item Simulated user knows which features to discount.
\item If the line is different when untrustworthy features are removed, something is wrong!
\item F-measure = a measure of a test's accuracy, i.e. if the user correctly distrusts a prediction based on the explanation given by fx LIME.
\item The table show that the other methods achieve lower recall = mistrust too many predictions, or lower precision = trust too many predictions.
}
\begin{frame}
\frametitle{Can I trust this model?}
\begin{itemize}
\item Evaluate if explanations can be used for model selection
\item They add 10 artificially \say{noisy} features s.t.
\begin{itemize}
\item Each artificial feature appears in 10\% of the examples in one class, and 20\% of the other in the training/validation data.
\item While on the test instances, each artificial feature appears in 10\% of the examples in each class.
\end{itemize}
\item Results in models both using actual informative features, but also ones creating random correlations.
\item Pairs of competing classifiers are computed by repeatedly training pairs of random forests with 30 trees until their validation accuracy is within 0.1\% of each other, but their test accuracy differs by at least 5\%.
\end{itemize}
\end{frame}
\note[itemize] {
\item 3rd experiment: two models, user should select the best based on validation accuracy.
\item
}
\begin{frame}
\frametitle{Can I trust this model?}
\includegraphics[scale=0.4]{graphics/graph_trust.png}
\end{frame}
\note[itemize]{
\item They evaluate whether the explanations can be used for model selection, simulating the case where a human has to decide between two competing models with similar accuracy on validation data.
\item Accomplished by "marking" the artificial features found within the B instances seen, as unstrustworthy. We then evaluate how many total predictions in the validation set should be trusted (as in the previous section, treating only marked features as untrustworthy).
\item As B, the number of explanations seen, increases, the simulated human is better at selecting the best model.
}
\subsection{Human user experiments}
\begin{frame}
\frametitle{Can humans pick the best classifier?}
\includegraphics[scale=0.35]{graphics/avg_acc_humans.png}
\end{frame}
\note[itemize] {
\item Non-expert humans, without any knowledge of machine learning
\item Train two classifiers, one on standard data set and one on a cleaned version of the same data set
\item Use the newsgroup dataset for training, which is the one with the atheism/christianity emails
\item Run the classifiers on a \say{religion} dataset, that the authors create themselves, to question if the classifiers generalizes well
\item Standard one achieves higher validation accuracy - but it's not correct!
\item Humans are asked to pick the best classifier when seeing explanations from the two classifiers for B and K = 6 (They see 6 explanations with 6 features)
\item Repeated $100$ times
\item Clearly SP LIME outperforms other options
}
\begin{frame}
\frametitle{Can non-experts improve a classifier?}
\center
\includegraphics[scale=0.4]{graphics/picking_features_human.png}
\begin{itemize}
\item $200$ words were removed with SP, $157$ with RP
\item Out of the $200$ words removed, $174$ were selected by at least half the users, $68$ by all
\end{itemize}
\end{frame}
\note[itemize] {
\item Non-expert humans, without any knowledge of machine learning
\item Use newsgroup dataset
\item Ask mechanical turk users to select features to be removed (email headers), before the classifier is retrained
\item B = K = 10
\item Accuracy shown in graph, is on the homebrewed religion dataset
\item Without cleaning, the classifiers achieve roughly $58\%$, so it helps a lot!
\item It only took on average 11 minutes to remove all the words over all 3 iterations, so little time investment, but much better accuracy
\item SP-LIME outperforms RP-LIME, suggesting that selection of the instances to show the users is crucial for efficient feature engineering.
}
\begin{frame}
\frametitle{Can we learn something from the explanations?}
\note{Hand picked images to create the correlation between wolf and snow, s.t. the classifier miss-predicts whenever a husky is in snow or a wolf is without snow}
\center
\includegraphics[scale=0.2]{graphics/husky_wolf_img.png}
\begin{itemize}
\item Images picked to create fake correlation between wolf and snow
\item Use Logistic Regression classifier
\item Features come from Google's pre-trained \emph{Inception} \textbf{NN}
\end{itemize}
\end{frame}
\note[itemize] {
\item Use graduate students who has taken at least one course in machine learning.
\item Intentionally train bad classifier by having snow on all wolf-images during training.
}
\begin{frame}
\frametitle{Can we learn something from the explanations?}
\begin{itemize}
\item Present 10 predictions without explanations % Such as the previous image (a)
\begin{itemize}
\item 2 are miss-predictions with a husky in snow and a wolf without snow, the rest are correct
\end{itemize}
\item Ask three questions:
\begin{enumerate}
\item Do you trust this algorithm to generalize?
\item Why?
\item How do you think the algorithm distinguishes?
\end{enumerate}
\item Results shown in table, before and after having seen the explanations.
\end{itemize}
\center
\includegraphics[scale=0.3]{graphics/husky_wolf_expla.png}
\end{frame}
%\subsection{Human Subjects}
\note[itemize] {
\item Clearly shows that seeing the explanations leads to insight, changing their answers consistently.
}
\section{Conclusion}
\begin{frame}
\frametitle{Conclusion}
\begin{itemize}
\item They argue that trust is crucial for effective human interaction with machine learning systems
\item Explaining individual predictions is important in assessing trust
\item They proposed LIME, a modular and extensible approach to faithfully explain the predictions of any model in an interpretable manner
\item They introduced SP-LIME, a method to select representative and non-redundant predictions, providing a global view of the model to users.
\item Experiments demonstrated that explanations are useful for a variety of models in trust-related tasks in the text and image domains
\end{itemize}
\end{frame}
\note[itemize] {
\item Establishing trust in machine learning models, requires that the system can explain its behaviour.
\begin{itemize}
\item Both Individual predictions.
\item As well as the entire model.
\end{itemize}
\item To this end, they introduce (submodular-pick) SP-LIME, which select a small number of explanations, which together (hopefully) explain the entire model.
\item Experiments show that this is indeed the case.
}
\begin{frame}
\frametitle{Future work}
\begin{itemize}
\item Explanation families beyond spare linear models.
\item One issue that they do not mention in this work was how to perform the pick step for images.
\item They would like to investigate potential uses in speech, video, and medical domains, as well as recommendation systems.
\item They would like to explore theoretical properties (such as the appropriate number of samples) and computational optimizations (such as using parallelization and GPU processing)
\end{itemize}
\end{frame}
\note[itemize] {
\item The paper only describes sparse linear models as explanations, but the framework supports other explanation families, such as decision trees.
\item They envision adapting the explanation family based on the dataset and classifier.
\item Extend framework to support images(better), speech, video, etc.
\item LIME framework ready for production and available on GitHub.
\item Therefore would like to optimise computation using parallelisation and GPU processing.
}
\section{Recap}
\begin{frame}
\frametitle{Recap}
\begin{itemize}
\item LIME is a framework for explaining predictions made by machine learning algorithms.
\item It explains models by intelligently picking a limited number of individual explanations.
\item Only uses linear models at the moment.
\item Is shown to make it significantly easier for people to better the classifiers, even non-experts.
\end{itemize}
\end{frame}
\note[itemize] {
\item LIME is able to explain entire ML models by presenting the user with a limited number of individual, non-redundant explanations, that describe the model well enough without overwhelming them.
}
\begin{frame}
\frametitle{Discussion}
\begin{itemize}
\item Is it fair that the authors create their data in such a way that \emph{Parzen} becomes unusable in their tests?
\item What do you expect to happen if the data is very non-linear even in the local predicitions?
\end{itemize}
\end{frame}
\end{document}