-
Notifications
You must be signed in to change notification settings - Fork 60
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
2024-05-07-unraveling-the-impact-of-training-samples (#144)
* add author info * add ref link info * add authors and adjust background color for proof * attempt to resolve conflicts * add imgs folder back --------- Co-authored-by: ChenDaiwei-99 <[email protected]> Co-authored-by: Jane Zhang <[email protected]>
- Loading branch information
1 parent
8a5172b
commit 03199f5
Showing
19 changed files
with
415 additions
and
0 deletions.
There are no files selected for viewing
335 changes: 335 additions & 0 deletions
335
_posts/2024-05-07-unraveling-the-impact-of-training-samples.md
Large diffs are not rendered by default.
Oops, something went wrong.
80 changes: 80 additions & 0 deletions
80
assets/bibliography/2024-05-07-unraveling-the-impact-of-training-samples.bib
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
@misc{koh2020understanding, | ||
title={Understanding Black-box Predictions via Influence Functions}, | ||
URL = {https://arxiv.org/abs/1703.04730}, | ||
author={Pang Wei Koh and Percy Liang}, | ||
year={2020}, | ||
eprint={1703.04730}, | ||
archivePrefix={arXiv}, | ||
primaryClass={stat.ML} | ||
} | ||
|
||
@article{bd831960-ac2b-396a-8c8f-de3944255f11, | ||
ISSN = {01621459}, | ||
URL = {http://www.jstor.org/stable/2285666}, | ||
abstract = {This paper treats essentially the first derivative of an estimator viewed as functional and the ways in which it can be used to study local robustness properties. A theory of robust estimation "near" strict parametric models is briefly sketched and applied to some classical situations. Relations between von Mises functionals, the jackknife and U-statistics are indicated. A number of classical and new estimators are discussed, including trimmed and Winsorized means, Huber-estimators, and more generally maximum likelihood and M-estimators. Finally, a table with some numerical robustness properties is given.}, | ||
author = {Frank R. Hampel}, | ||
journal = {Journal of the American Statistical Association}, | ||
number = {346}, | ||
pages = {383--393}, | ||
publisher = {[American Statistical Association, Taylor & Francis, Ltd.]}, | ||
title = {The Influence Curve and Its Role in Robust Estimation}, | ||
urldate = {2023-12-09}, | ||
volume = {69}, | ||
year = {1974} | ||
} | ||
|
||
@misc{ilyas2022datamodels, | ||
title={Datamodels: Predicting Predictions from Training Data}, | ||
URL = {https://arxiv.org/abs/2202.00622}, | ||
author={Andrew Ilyas and Sung Min Park and Logan Engstrom and Guillaume Leclerc and Aleksander Madry}, | ||
year={2022}, | ||
eprint={2202.00622}, | ||
archivePrefix={arXiv}, | ||
primaryClass={stat.ML} | ||
} | ||
|
||
@misc{park2023trak, | ||
title={TRAK: Attributing Model Behavior at Scale}, | ||
URL = {https://arxiv.org/abs/2303.14186}, | ||
author={Sung Min Park and Kristian Georgiev and Andrew Ilyas and Guillaume Leclerc and Aleksander Madry}, | ||
year={2023}, | ||
eprint={2303.14186}, | ||
archivePrefix={arXiv}, | ||
primaryClass={stat.ML} | ||
} | ||
|
||
@article{683a899e-5c03-3862-9059-357c21f7b5da, | ||
ISSN = {00905364}, | ||
URL = {http://www.jstor.org/stable/2240841}, | ||
abstract = {A maximum likelihood fit of a logistic regression model (and other similar models) is extremely sensitive to outlying responses and extreme points in the design space. We develop diagnostic measures to aid the analyst in detecting such observations and in quantifying their effect on various aspects of the maximum likelihood fit. The elements of the fitting process which constitute the usual output (parameter estimates, standard errors, residuals, etc.) will be used for this purpose. With a properly designed computing package for fitting the usual maximum-likelihood model, the diagnostics are essentially "free for the asking." In particular, good data analysis for logistic regression models need not be expensive or time-consuming.}, | ||
author = {Daryl Pregibon}, | ||
journal = {The Annals of Statistics}, | ||
number = {4}, | ||
pages = {705--724}, | ||
publisher = {Institute of Mathematical Statistics}, | ||
title = {Logistic Regression Diagnostics}, | ||
urldate = {2023-12-10}, | ||
volume = {9}, | ||
year = {1981} | ||
} | ||
|
||
@article{johnsonLindenstrauss, | ||
author = {Johnson, William and Lindenstrauss, J.}, | ||
URL = {https://stanford.edu/class/cs114/readings/JL-Johnson.pdf}, | ||
year = {1982}, | ||
month = {01}, | ||
pages = {189-206}, | ||
title = {Extensions of Lipschitz mappings into a Hilbert space}, | ||
volume = {26}, | ||
journal = {Conference in Modern Analysis and Probability} | ||
} | ||
|
||
@misc{shah2022modeldiff, | ||
title={ModelDiff: A Framework for Comparing Learning Algorithms}, | ||
URL = {https://arxiv.org/abs/2211.12491}, | ||
author={Harshay Shah and Sung Min Park and Andrew Ilyas and Aleksander Madry}, | ||
year={2022}, | ||
eprint={2211.12491}, | ||
archivePrefix={arXiv}, | ||
primaryClass={cs.LG} | ||
} |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+326 KB
assets/img/2024-05-07-unraveling-the-impact-of-training-samples/animation.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+221 KB
...s/img/2024-05-07-unraveling-the-impact-of-training-samples/cat_data_leakage.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+212 KB
...024-05-07-unraveling-the-impact-of-training-samples/data_attribution_simply.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+242 KB
.../img/2024-05-07-unraveling-the-impact-of-training-samples/datamodel_our_exp.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+422 KB
assets/img/2024-05-07-unraveling-the-impact-of-training-samples/model_diff_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+476 KB
assets/img/2024-05-07-unraveling-the-impact-of-training-samples/model_diff_2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+296 KB
assets/img/2024-05-07-unraveling-the-impact-of-training-samples/model_diff_3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+448 KB
assets/img/2024-05-07-unraveling-the-impact-of-training-samples/model_diff_4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+226 KB
assets/img/2024-05-07-unraveling-the-impact-of-training-samples/trak_exp_fig.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+271 KB
.../img/2024-05-07-unraveling-the-impact-of-training-samples/trak_scatter_plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.