@inproceedings{arya2024_nn_mpe_nips,
title = {A Neural Network Approach for Efficiently Answering Most Probable Explanation Queries in Probabilistic Models},
author = {Arya, Shivvrat and Rahman, Tahrima and Gogate, Vibhav Giridhar},
booktitle = {The Thirty-eighth Annual Conference on Neural Information Processing Systems (NeurIPS)},
month = nov,
year = {2024},
comment = {Spotlight presentation (top 3% of papers)},
url = {https://openreview.net/forum?id=ufPPf9ghzP},
file = {NeurIPS_12727_A_Neural_Network_Approac.pdf}
}
@inproceedings{2024captaincookdnips,
title = {CaptainCook4D: A Dataset for Understanding Errors in Procedural Activities},
author = {Peddi, Rohith and Arya, Shivvrat and Challa, Bharath and Pallapothula, Likhitha and Vyas, Akshay and Gouripeddi, Bhavya and Zhang, Qifan and Wang, Jikai and Komaragiri, Vasundhara and Ragan, Eric and Ruozzi, Nicholas and Xiang, Yu and Gogate, Vibhav},
booktitle = {The Thirty-eight Conference on Neural Information Processing Systems (NeurIPS), Datasets and Benchmarks Track},
month = nov,
year = {2024},
url = {https://openreview.net/forum?id=YFUp7zMrM9},
website = {https://captaincook4d.github.io/captain-cook/},
code = {https://github.com/CaptainCook4D/},
file = {NeurIPS_1088_CaptainCook4D_A_Dataset_f.pdf}
}
@workshop{Rheault_VR2024,
title = {Predictive Task Guidance with Artificial Intelligence in Augmented Reality},
author = {Rheault, Benjamin and Arya, Shivvrat and Vyas, Akshay and Wang, Jikai and Peddi, Rohith and Benda, Brett and Gogate, Vibhav and Ruozzi, Nicholas and Xiang, Yu and Ragan, Eric D},
year = {2024},
month = jun,
file = {Rheault_VR2024.pdf},
booktitle = {IEEE Virtual Reality (IEEE VR)}
}
@inproceedings{arya2024_nn_mmap_pc,
title = {Neural Network Approximators for Marginal {MAP} in Probabilistic Circuits},
author = {Arya, Shivvrat and Rahman, Tahrima and Gogate, Vibhav Giridhar},
booktitle = {The 7th Workshop on Tractable Probabilistic Modeling (TPM)},
year = {2024},
month = may,
url = {https://openreview.net/forum?id=ZgGg7ETVtZ},
file = {TPM_9_neural_network_approximators_f.pdf}
}
@inproceedings{arya2024_nn_mpe,
title = {A Neural Network Approach for Efficiently Answering Most Probable Explanation Queries in Probabilistic Models},
author = {Arya, Shivvrat and Rahman, Tahrima and Gogate, Vibhav Giridhar},
booktitle = {The 7th Workshop on Tractable Probabilistic Modeling (TPM)},
year = {2024},
month = may,
url = {https://openreview.net/forum?id=Gn7mpdBqqf},
comment = {Best Paper Award},
certificate = {/certificates/tpm_certificate.jpg},
file = {TPM_11_a_neural_network_approach_for_.pdf}
}
@inproceedings{arya_2024_solveconstraineda,
title = {Learning to {{Solve}} the {{Constrained Most Probable Explanation Task}} in {{Probabilistic Graphical Models}}},
booktitle = {Proceedings of {{The}} 27th {{International Conference}} on {{Artificial Intelligence}} and {{Statistics}} (AISTATS)},
author = {Arya, Shivvrat and Rahman, Tahrima and Gogate, Vibhav},
year = {2024},
month = apr,
pages = {2791--2799},
publisher = {PMLR},
issn = {2640-3498},
urldate = {2024-04-21},
langid = {english},
volume = {238},
series = {Proceedings of Machine Learning Research},
url = {https://proceedings.mlr.press/v238/arya24b.html},
file = {SS_CMPE_AISTATS2024CameraReady.pdf},
code = {https://github.com/Shivvrat/SS-CMPE}
}
We propose a self-supervised learning approach for solving the following constrained optimization task in log-linear models or Markov networks. Let f and g be two log-linear models defined over the sets X and Y of random variables respectively. Given an assignment x to all variables in X (evidence) and a real number q, the constrained most-probable explanation (CMPE) task seeks to find an assignment y to all variables in Y such that f(x,y) is maximized and g(x,y)≤q. In our proposed self-supervised approach, given assignments x to X (data), we train a deep neural network that learns to output near-optimal solutions to the CMPE problem without requiring access to any pre-computed solutions. The key idea in our approach is to use first principles and approximate inference methods for CMPE to derive novel loss functions that seek to push infeasible solutions towards feasible ones and feasible solutions towards optimal ones. We analyze the properties of our proposed method and experimentally demonstrate its efficacy on several benchmark problems.
@inproceedings{arya_2024_dependencynetworksa,
title = {Deep {{Dependency Networks}} and {{Advanced Inference Schemes}} for {{Multi-Label Classification}}},
booktitle = {Proceedings of {{The}} 27th {{International Conference}} on {{Artificial Intelligence}} and {{Statistics}} (AISTATS)},
author = {Arya, Shivvrat and Xiang, Yu and Gogate, Vibhav},
year = {2024},
month = apr,
pages = {2818--2826},
publisher = {PMLR},
issn = {2640-3498},
urldate = {2024-04-21},
langid = {english},
code = {https://github.com/Shivvrat/Advanced-Inference-Schemes-for-DDNs},
file = {DDN_AISTATS2024CameraReady.pdf}
}
We present a unified framework called deep dependency networks (DDNs) that combines dependency networks and deep learning architectures for multi-label classification, with a particular emphasis on image and video data. The primary advantage of dependency networks is their ease of training, in contrast to other probabilistic graphical models like Markov networks. In particular, when combined with deep learning architectures, they provide an intuitive, easy-to-use loss function for multi-label classification. A drawback of DDNs compared to Markov networks is their lack of advanced inference schemes, necessitating the use of Gibbs sampling. To address this challenge, we propose novel inference schemes based on local search and integer linear programming for computing the most likely assignment to the labels given observations. We evaluate our novel methods on three video datasets (Charades, TACoS, Wetlab) and three image datasets (MS-COCO, PASCAL VOC, NUS-WIDE), comparing their performance with (a) basic neural architectures and (b) neural architectures combined with Markov networks equipped with advanced inference and learning techniques. Our results demonstrate the superiority of our new DDN methods over the two competing approaches.
@inproceedings{arya_2024_networkapproximatorsa,
title = {Neural {{Network Approximators}} for {{Marginal MAP}} in {{Probabilistic Circuits}}},
author = {Arya, Shivvrat and Rahman, Tahrima and Gogate, Vibhav},
year = {2024},
month = mar,
journal = {Proceedings of the AAAI Conference on Artificial Intelligence (Oral)},
volume = {38},
number = {10},
pages = {10918--10926},
issn = {2374-3468},
doi = {10.1609/aaai.v38i10.28966},
urldate = {2024-03-27},
copyright = {Copyright (c) 2024 Association for the Advancement of Artificial Intelligence},
file = {AAAI_arya_2024_networkapproximatorsa.pdf},
langid = {english},
comment = {Oral presentation (top 6% of papers)}
}
Probabilistic circuits (PCs) such as sum-product networks efficiently represent large multi-variate probability distributions. They are preferred in practice over other probabilistic representations, such as Bayesian and Markov networks, because PCs can solve marginal inference (MAR) tasks in time that scales linearly in the size of the network. Unfortunately, the most probable explanation (MPE) task and its generalization, the marginal maximum-a-posteriori (MMAP) inference task remain NP-hard in these models. Inspired by the recent work on using neural networks for generating near-optimal solutions to optimization problems such as integer linear programming, we propose an approach that uses neural networks to approximate MMAP inference in PCs. The key idea in our approach is to approximate the cost of an assignment to the query variables using a continuous multilinear function and then use the latter as a loss function. The two main benefits of our new method are that it is self-supervised, and after the neural network is learned, it requires only linear time to output a solution. We evaluate our new approach on several benchmark datasets and show that it outperforms three competing linear time approximations: max-product inference, max-marginal inference, and sequential estimation, which are used in practice to solve MMAP tasks in PCs.
@article{Explainable_AI_TiiS,
author = {Roy, Chiradeep* and Nourani, Mahsan* and Arya, Shivvrat* and Shanbhag, Mahesh and Rahman, Tahrima and Ragan, Eric D. and Ruozzi, Nicholas and Gogate, Vibhav (*equal contribution)},
title = {Explainable Activity Recognition in Videos Using Deep Learning and Tractable Probabilistic Models},
year = {2023},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
issn = {2160-6455},
url = {https://doi.org/10.1145/3626961},
doi = {10.1145/3626961},
note = {Just Accepted},
journal = {ACM Transactions on Interactive Intelligent Systems (TiiS)},
month = oct,
keywords = {Tractable Probabilistic Models, Temporal Models, Dynamic Bayesian Networks, Cutset Networks}
}
We consider the following video activity recognition (VAR) task: given a video, infer the set of activities being performed in the video and assign each frame to an activity. Although VAR can be solved accurately using existing deep learning techniques, deep networks are neither interpretable nor explainable and as a result their use is problematic in high stakes decision-making applications (e.g., in healthcare, experimental Biology, aviation, law, etc.). In such applications, failure may lead to disastrous consequences and therefore it is necessary that the user is able to either understand the inner workings of the model or probe it to understand its reasoning patterns for a given decision. We address these limitations of deep networks by proposing a new approach that feeds the output of a deep model into a tractable, interpretable probabilistic model called a dynamic conditional cutset network that is defined over the explanatory and output variables and then performing joint inference over the combined model. The two key benefits of using cutset networks are: (a) they explicitly model the relationship between the output and explanatory variables and as a result the combined model is likely to be more accurate than the vanilla deep model and (b) they can answer reasoning queries in polynomial time and as a result they can derive meaningful explanations by efficiently answering explanation queries. We demonstrate the efficacy of our approach on two datasets, Textually Annotated Cooking Scenes (TACoS), and wet lab, using conventional evaluation measures such as the Jaccard Index and Hamming Loss, as well as a human-subjects study.
@workshop{peddi2023captaincook4dworkshop,
title = {Put on your detective hat: What's wrong in this video?},
author = {Peddi, Rohith and Arya, Shivvrat and Challa, Bharath and Pallapothula, Likhitha and Vyas, Akshay and Wang, Jikai and Zhang, Qifan and Komaragiri, Vasundhara and Ragan, Eric and Ruozzi, Nicholas and Xiang, Yu and Gogate, Vibhav},
year = {2023},
month = jun,
file = {DMLR_ICML_2023_1.pdf},
booktitle = {DMLR Workshop: Data-centric Machine Learning Research}
}
@unpublished{arya2023deep,
title = {Deep Dependency Networks for Multi-Label Classification},
author = {Arya, Shivvrat and Xiang, Yu and Gogate, Vibhav},
year = {2023},
month = jan,
eprint = {2302.00633},
archiveprefix = {arXiv},
primaryclass = {cs.LG},
file = {arxiv_arya2023deep.pdf},
booktitle = {arXiv}
}
We propose a simple approach which combines the strengths of probabilistic graphical models and deep learning architectures for solving the multi-label classification task, focusing specifically on image and video data. First, we show that the performance of previous approaches that combine Markov Random Fields with neural networks can be modestly improved by leveraging more powerful methods such as iterative join graph propagation, integer linear programming, and regularization-based structure learning. Then we propose a new modeling framework called deep dependency networks, which augments a dependency network, a model that is easy to train and learns more accurate dependencies but is limited to Gibbs sampling for inference, to the output layer of a neural network. We show that despite its simplicity, jointly learning this new architecture yields significant improvements in performance over the baseline neural network. In particular, our experimental evaluation on three video activity classification datasets: Charades, Textually Annotated Cooking Scenes (TACoS), and Wetlab, and three multi-label image classification datasets: MS-COCO, PASCAL VOC, and NUS-WIDE show that deep dependency networks are almost always superior to pure neural architectures that do not use dependency networks.
@unpublished{peddi2023captaincook4d,
title = {CaptainCook4D: A dataset for understanding errors in procedural activities},
author = {Peddi, Rohith and Arya, Shivvrat and Challa, Bharath and Pallapothula, Likhitha and Vyas, Akshay and Wang, Jikai and Zhang, Qifan and Komaragiri, Vasundhara and Ragan, Eric and Ruozzi, Nicholas and Xiang, Yu and Gogate, Vibhav},
year = {2023},
eprint = {2312.14556},
archiveprefix = {arXiv},
code = {https://captaincook4d.github.io/captain-cook/},
primaryclass = {cs.CV},
booktitle = {arXiv}
}
Following step-by-step procedures is an essential component of various activities carried out by individuals in their daily lives. These procedures serve as a guiding framework that helps to achieve goals efficiently, whether it is assembling furniture or preparing a recipe. However, the complexity and duration of procedural activities inherently increase the likelihood of making errors. Understanding such procedural activities from a sequence of frames is a challenging task that demands an accurate interpretation of visual information and the ability to reason about the structure of the activity. To this end, we collect a new egocentric 4D dataset, CaptainCook4D, comprising 384 recordings (94.5 hours) of people performing recipes in real kitchen environments. This dataset consists of two distinct types of activity: one in which participants adhere to the provided recipe instructions and another in which they deviate and induce errors. We provide 5.3K step annotations and 10K fine-grained action annotations and benchmark the dataset for the following tasks: supervised error recognition, multistep localization, and procedure learning
@inproceedings{9207436,
author = {Chauhan, Vikas and Tiwari, Aruna and Arya, Shivvrat},
booktitle = {2020 International Joint Conference on Neural Networks (IJCNN)},
title = {Multi-Label classifier based on Kernel Random Vector Functional Link Network},
year = {2020},
volume = {},
number = {},
pages = {1-7},
keywords = {},
doi = {10.1109/IJCNN48605.2020.9207436},
issn = {2161-4407},
month = jul
}
In this paper, a kernelized version of the random vector functional link network is proposed for multi-label classification. This classifier uses pseudo-inverse to find output weights of the network. As pseudo-inverse is non-iterative in nature, it requires less fine-tuning to train the network. Kernelization of RVFL makes it robust and stable as no need to tune the number of neuron in the enhancement layer. A threshold function is used with a kernelized random vector functional link network to make it suitable for multi-label learning problems. Experiments performed on three benchmark multi-label datasets bibtex, emotions, and scene shows that proposed classifier outperforms various the existing multi-label classifiers.
@underreview{unpub_1,
author = {Vikas Chauhan, Aruna Tiwari and Arya, Shivvrat},
title = {Multi-label Classification based on Broad
Learning System},
organization = {Neural Computing and Applications Journal},
year = {0}
}
@underreview{unpub_2,
author = {Vikas Chauhan, Aruna Tiwari and Arya, Shivvrat},
title = {Multi-label Classification based on Random
Vector Functional Link Neural Networks},
organization = {Soft Computing Journal},
year = {0}
}