
Publications
2020
Journal Articles |
Ecem Sogancioglu,Keelin Murphy,Erdi Calli, Ernst Scholten, Steven Schalekamp, Bram van Ginneken Cardiomegaly Detection on Chest Radiographs: Segmentation Versus Classification Journal Article IEEE Access, 8 , pp. 94631 - 94642, 2020, ISSN: 2169-3536. @article{soga19, title = {Cardiomegaly Detection on Chest Radiographs: Segmentation Versus Classification}, author = {Ecem Sogancioglu,Keelin Murphy,Erdi Calli, Ernst Scholten, Steven Schalekamp, Bram van Ginneken}, url = {https://ieeexplore.ieee.org/document/9096290}, doi = {10.1109/ACCESS.2020.2995567}, issn = {2169-3536}, year = {2020}, date = {2020-05-27}, journal = {IEEE Access}, volume = {8}, pages = {94631 - 94642}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
C. González-Gonzalo, B. Liefers, B. van Ginneken, C. I. Sánchez IEEE Transactions on Medical Imaging, 39 (11), pp. 3499 - 3511, 2020, ISSN: 1558-254X. @article{Gonz20, title = {Iterative augmentation of visual evidence for weakly-supervised lesion localization in deep interpretability frameworks: application to color fundus images}, author = {C. González-Gonzalo, B. Liefers, B. van Ginneken, C. I. Sánchez}, url = {https://ieeexplore.ieee.org/abstract/document/9103111, https://arxiv.org/abs/1910.07373}, doi = {10.1109/TMI.2020.2994463}, issn = {1558-254X}, year = {2020}, date = {2020-05-28}, journal = {IEEE Transactions on Medical Imaging}, volume = {39}, number = {11}, pages = {3499 - 3511}, abstract = {Interpretability of deep learning (DL) systems is gaining attention in medical imaging to increase experts’ trust in the obtained predictions and facilitate their integration in clinical settings. We propose a deep visualization method to generate interpretability of DL classification tasks in medical imaging by means of visual evidence augmentation. The proposed method iteratively unveils abnormalities based on the prediction of a classifier trained only with image-level labels. For each image, initial visual evidence of the prediction is extracted with a given visual attribution technique. This provides localization of abnormalities that are then removed through selective inpainting. We iteratively apply this procedure until the system considers the image as normal. This yields augmented visual evidence, including less discriminative lesions which were not detected at first but should be considered for final diagnosis. We apply the method to grading of two retinal diseases in color fundus images: diabetic retinopathy (DR) and age-related macular degeneration (AMD). We evaluate the generated visual evidence and the performance of weakly-supervised localization of different types of DR and AMD abnormalities, both qualitatively and quantitatively. We show that the augmented visual evidence of the predictions highlights the biomarkers considered by experts for diagnosis and improves the final localization performance. It results in a relative increase of 11.2±2.0% per image regarding sensitivity averaged at 10 false positives/image on average, when applied to different classification tasks, visual attribution techniques and network architectures. This makes the proposed method a useful tool for exhaustive visual support of DL classifiers in medical imaging.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Interpretability of deep learning (DL) systems is gaining attention in medical imaging to increase experts’ trust in the obtained predictions and facilitate their integration in clinical settings. We propose a deep visualization method to generate interpretability of DL classification tasks in medical imaging by means of visual evidence augmentation. The proposed method iteratively unveils abnormalities based on the prediction of a classifier trained only with image-level labels. For each image, initial visual evidence of the prediction is extracted with a given visual attribution technique. This provides localization of abnormalities that are then removed through selective inpainting. We iteratively apply this procedure until the system considers the image as normal. This yields augmented visual evidence, including less discriminative lesions which were not detected at first but should be considered for final diagnosis. We apply the method to grading of two retinal diseases in color fundus images: diabetic retinopathy (DR) and age-related macular degeneration (AMD). We evaluate the generated visual evidence and the performance of weakly-supervised localization of different types of DR and AMD abnormalities, both qualitatively and quantitatively. We show that the augmented visual evidence of the predictions highlights the biomarkers considered by experts for diagnosis and improves the final localization performance. It results in a relative increase of 11.2±2.0% per image regarding sensitivity averaged at 10 false positives/image on average, when applied to different classification tasks, visual attribution techniques and network architectures. This makes the proposed method a useful tool for exhaustive visual support of DL classifiers in medical imaging. |
Suzanne C. Wetstein, Allison M. Onken, Christina Luffman, Gabrielle M. Baker, Michael E. Pyle, Kevin H. Kensler, Ying Liu, Bart Bakker, Ruud Vlutters, Marinus B. van Leeuwen, Laura C. Collins, Stuart J. Schnitt, Josien P. W. Pluim, Rulla M. Tamimi, Yujing J. Heng, Mitko Veta Deep learning assessment of breast terminal duct lobular unit involution: Towards automated prediction of breast cancer risk Journal Article PLoS ONE, 15 (4), pp. e0231653, 2020. @article{TDLUpaper, title = {Deep learning assessment of breast terminal duct lobular unit involution: Towards automated prediction of breast cancer risk}, author = {Suzanne C. Wetstein, Allison M. Onken, Christina Luffman, Gabrielle M. Baker, Michael E. Pyle, Kevin H. Kensler, Ying Liu, Bart Bakker, Ruud Vlutters, Marinus B. van Leeuwen, Laura C. Collins, Stuart J. Schnitt, Josien P. W. Pluim, Rulla M. Tamimi, Yujing J. Heng, Mitko Veta}, editor = {Ulas Bagci}, url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0231653}, doi = {10.1371/journal.pone.0231653}, year = {2020}, date = {2020-04-15}, journal = {PLoS ONE}, volume = {15}, number = {4}, pages = {e0231653}, abstract = {Terminal duct lobular unit (TDLU) involution is the regression of milk-producing structures in the breast. Women with less TDLU involution are more likely to develop breast cancer. A major bottleneck in studying TDLU involution in large cohort studies is the need for labor-intensive manual assessment of TDLUs. We developed a computational pathology solution to automatically capture TDLU involution measures. Whole slide images (WSIs) of benign breast biopsies were obtained from the Nurses’ Health Study. A set of 92 WSIs was annotated for acini, TDLUs and adipose tissue to train deep convolutional neural network (CNN) models for detection of acini, and segmentation of TDLUs and adipose tissue. These networks were integrated into a single computational method to capture TDLU involution measures including number of TDLUs per tissue area, median TDLU span and median number of acini per TDLU. We validated our method on 40 additional WSIs by comparing with manually acquired measures. Our CNN models detected acini with an F1 score of 0.73±0.07, and segmented TDLUs and adipose tissue with Dice scores of 0.84±0.13 and 0.87±0.04, respectively. The inter-observer ICC scores for manual assessments on 40 WSIs of number of TDLUs per tissue area, median TDLU span, and median acini count per TDLU were 0.71, 0.81 and 0.73, respectively. Intra-observer reliability was evaluated on 10/40 WSIs with ICC scores of >0.8. Inter-observer ICC scores between automated results and the mean of the two observers were: 0.80 for number of TDLUs per tissue area, 0.57 for median TDLU span, and 0.80 for median acini count per TDLU. TDLU involution measures evaluated by manual and automated assessment were inversely associated with age and menopausal status. We developed a computational pathology method to measure TDLU involution. This technology eliminates the labor-intensiveness and subjectivity of manual TDLU assessment, and can be applied to future breast cancer risk studies.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Terminal duct lobular unit (TDLU) involution is the regression of milk-producing structures in the breast. Women with less TDLU involution are more likely to develop breast cancer. A major bottleneck in studying TDLU involution in large cohort studies is the need for labor-intensive manual assessment of TDLUs. We developed a computational pathology solution to automatically capture TDLU involution measures. Whole slide images (WSIs) of benign breast biopsies were obtained from the Nurses’ Health Study. A set of 92 WSIs was annotated for acini, TDLUs and adipose tissue to train deep convolutional neural network (CNN) models for detection of acini, and segmentation of TDLUs and adipose tissue. These networks were integrated into a single computational method to capture TDLU involution measures including number of TDLUs per tissue area, median TDLU span and median number of acini per TDLU. We validated our method on 40 additional WSIs by comparing with manually acquired measures. Our CNN models detected acini with an F1 score of 0.73±0.07, and segmented TDLUs and adipose tissue with Dice scores of 0.84±0.13 and 0.87±0.04, respectively. The inter-observer ICC scores for manual assessments on 40 WSIs of number of TDLUs per tissue area, median TDLU span, and median acini count per TDLU were 0.71, 0.81 and 0.73, respectively. Intra-observer reliability was evaluated on 10/40 WSIs with ICC scores of >0.8. Inter-observer ICC scores between automated results and the mean of the two observers were: 0.80 for number of TDLUs per tissue area, 0.57 for median TDLU span, and 0.80 for median acini count per TDLU. TDLU involution measures evaluated by manual and automated assessment were inversely associated with age and menopausal status. We developed a computational pathology method to measure TDLU involution. This technology eliminates the labor-intensiveness and subjectivity of manual TDLU assessment, and can be applied to future breast cancer risk studies. |
Coen de Vente, Pieter Vos, Matin Hosseinzadeh, Josien Pluim, Mitko Veta Deep Learning Regression for Prostate Cancer Detection and Grading in Bi-parametric MRI Journal Article Forthcoming IEEE Transactions on Biomedical Engineering, Forthcoming. @article{de2020deep, title = {Deep Learning Regression for Prostate Cancer Detection and Grading in Bi-parametric MRI}, author = {Coen de Vente, Pieter Vos, Matin Hosseinzadeh, Josien Pluim, Mitko Veta}, url = {https://ieeexplore.ieee.org/abstract/document/9090311}, year = {2020}, date = {2020-05-08}, journal = {IEEE Transactions on Biomedical Engineering}, abstract = {One of the most common types of cancer in men is prostate cancer (PCa). Biopsies guided by bi-parametric magnetic resonance imaging (MRI) can aid PCa diagnosis. Previous works have mostly focused on either detection or classification of PCa from MRI. In this work, however, we present a neural network that simultaneously detects and grades cancer tissue in an end-to-end fashion. This is more clinically relevant than the classification goal of the ProstateX-2 challenge. We used the dataset of this challenge for training and testing. We use a 2D U-Net with MRI slices as input and lesion segmentation maps that encode the Gleason Grade Group (GGG), a measure for cancer aggressiveness, as output. We propose a method for encoding the GGG in the model target that takes advantage of the fact that the classes are ordinal. Furthermore, we evaluate methods for incorporating prostate zone segmentations as prior information, and ensembling techniques. The model scored a voxel-wise weighted kappa of 0.446 ± 0.082 and a Dice similarity coefficient for segmenting clinically significant cancer of 0.370 ± 0.046, obtained using 5-fold cross-validation. The lesion-wise weighted kappa on the ProstateX-2 challenge test set was 0.13 ± 0.27. We show that our proposed model target outperforms standard multiclass classification and multi-label ordinal regression. Additionally, we present a comparison of methods for further improvement of the model performance.}, keywords = {}, pubstate = {forthcoming}, tppubtype = {article} } One of the most common types of cancer in men is prostate cancer (PCa). Biopsies guided by bi-parametric magnetic resonance imaging (MRI) can aid PCa diagnosis. Previous works have mostly focused on either detection or classification of PCa from MRI. In this work, however, we present a neural network that simultaneously detects and grades cancer tissue in an end-to-end fashion. This is more clinically relevant than the classification goal of the ProstateX-2 challenge. We used the dataset of this challenge for training and testing. We use a 2D U-Net with MRI slices as input and lesion segmentation maps that encode the Gleason Grade Group (GGG), a measure for cancer aggressiveness, as output. We propose a method for encoding the GGG in the model target that takes advantage of the fact that the classes are ordinal. Furthermore, we evaluate methods for incorporating prostate zone segmentations as prior information, and ensembling techniques. The model scored a voxel-wise weighted kappa of 0.446 ± 0.082 and a Dice similarity coefficient for segmenting clinically significant cancer of 0.370 ± 0.046, obtained using 5-fold cross-validation. The lesion-wise weighted kappa on the ProstateX-2 challenge test set was 0.13 ± 0.27. We show that our proposed model target outperforms standard multiclass classification and multi-label ordinal regression. Additionally, we present a comparison of methods for further improvement of the model performance. |
Gerda Bortsova, Daniel Bos, Florian Dubost, Meike W. Vernooij, M. Kamran Ikram, Gijs van Tulder, Marleen de Bruijne Automated Assessment of Intracranial Carotid Artery Calcification in Non-Contrast CT Using Deep Learning Journal Article Forthcoming Forthcoming. @article{Bortsova2020a, title = {Automated Assessment of Intracranial Carotid Artery Calcification in Non-Contrast CT Using Deep Learning}, author = {Gerda Bortsova, Daniel Bos, Florian Dubost, Meike W. Vernooij, M. Kamran Ikram, Gijs van Tulder, Marleen de Bruijne}, year = {2020}, date = {2020-08-20}, keywords = {}, pubstate = {forthcoming}, tppubtype = {article} } |
Friso G. Heslinga, Mark Alberti, Josien P.W. Pluim, Javier Cabrerizo, Mitko Veta Quantifying Graft Detachment after Descemet's Membrane Endothelial Keratoplasty with Deep Convolutional Neural Networks Journal Article Translational Vision Science & Technology, 9 (48), 2020. @article{heslinga2020quantifying, title = {Quantifying Graft Detachment after Descemet's Membrane Endothelial Keratoplasty with Deep Convolutional Neural Networks}, author = {Friso G. Heslinga, Mark Alberti, Josien P.W. Pluim, Javier Cabrerizo, Mitko Veta}, url = {https://tvst.arvojournals.org/article.aspx?articleid=2770687}, year = {2020}, date = {2020-08-01}, journal = {Translational Vision Science & Technology}, volume = {9}, number = {48}, abstract = {Purpose: We developed a method to automatically locate and quantify graft detachment after Descemet's Membrane Endothelial Keratoplasty (DMEK) in Anterior Segment Optical Coherence Tomography (AS-OCT) scans. Methods: 1280 AS-OCT B-scans were annotated by a DMEK expert. Using the annotations, a deep learning pipeline was developed to localize scleral spur, center the AS-OCT B-scans and segment the detached graft sections. Detachment segmentation model performance was evaluated per B-scan by comparing (1) length of detachment and (2) horizontal projection of the detached sections with the expert annotations. Horizontal projections were used to construct graft detachment maps. All final evaluations were done on a test set that was set apart during training of the models. A second DMEK expert annotated the test set to determine inter-rater performance. Results: Mean scleral spur localization error was 0.155 mm, whereas the inter-rater difference was 0.090 mm. The estimated graft detachment lengths were in 69% of the cases within a 10-pixel (~150{mu}m) difference from the ground truth (77% for the second DMEK expert). Dice scores for the horizontal projections of all B-scans with detachments were 0.896 and 0.880 for our model and the second DMEK expert respectively. Conclusion: Our deep learning model can be used to automatically and instantly localize graft detachment in AS-OCT B-scans. Horizontal detachment projections can be determined with the same accuracy as a human DMEK expert, allowing for the construction of accurate graft detachment maps. Translational Relevance: Automated localization and quantification of graft detachment can support DMEK research and standardize clinical decision making.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Purpose: We developed a method to automatically locate and quantify graft detachment after Descemet's Membrane Endothelial Keratoplasty (DMEK) in Anterior Segment Optical Coherence Tomography (AS-OCT) scans. Methods: 1280 AS-OCT B-scans were annotated by a DMEK expert. Using the annotations, a deep learning pipeline was developed to localize scleral spur, center the AS-OCT B-scans and segment the detached graft sections. Detachment segmentation model performance was evaluated per B-scan by comparing (1) length of detachment and (2) horizontal projection of the detached sections with the expert annotations. Horizontal projections were used to construct graft detachment maps. All final evaluations were done on a test set that was set apart during training of the models. A second DMEK expert annotated the test set to determine inter-rater performance. Results: Mean scleral spur localization error was 0.155 mm, whereas the inter-rater difference was 0.090 mm. The estimated graft detachment lengths were in 69% of the cases within a 10-pixel (~150{mu}m) difference from the ground truth (77% for the second DMEK expert). Dice scores for the horizontal projections of all B-scans with detachments were 0.896 and 0.880 for our model and the second DMEK expert respectively. Conclusion: Our deep learning model can be used to automatically and instantly localize graft detachment in AS-OCT B-scans. Horizontal detachment projections can be determined with the same accuracy as a human DMEK expert, allowing for the construction of accurate graft detachment maps. Translational Relevance: Automated localization and quantification of graft detachment can support DMEK research and standardize clinical decision making. |
S. Bruns, J.M. Wolterink, R.A.P. Takx, R.W. van Hamersvelt, D. Suchá, M.A. Viergever, T. Leiner, I. Išgum Deep learning from dual-energy information for whole-heart segmentation in dual-energy and single-energy non-contrast-enhanced cardiac CT Journal Article Medical Physics (in press), 2020. @article{Bruns2020b, title = {Deep learning from dual-energy information for whole-heart segmentation in dual-energy and single-energy non-contrast-enhanced cardiac CT}, author = {S. Bruns, J.M. Wolterink, R.A.P. Takx, R.W. van Hamersvelt, D. Suchá, M.A. Viergever, T. Leiner, I. Išgum }, url = {https://aapm.onlinelibrary.wiley.com/doi/abs/10.1002/mp.14451}, doi = {10.1002/mp.14451}, year = {2020}, date = {2020-08-05}, journal = {Medical Physics (in press)}, abstract = {Purpose Deep learning-based whole-heart segmentation in coronary CT angiography (CCTA) allows the extraction of quantitative imaging measures for cardiovascular risk prediction. Automatic extraction of these measures in patients undergoing only non-contrast-enhanced CT (NCCT) scanning would be valuable, but defining a manual reference standard that would allow training a deep learning-based method for whole-heart segmentation in NCCT is challenging, if not impossible. In this work, we leverage dual-energy information provided by a dual-layer detector CT scanner to obtain a reference standard in virtual non-contrast (VNC) CT images mimicking NCCT images, and train a 3D convolutional neural network (CNN) for the segmentation of VNC as well as NCCT images. Methods Eighteen patients were scanned with and without contrast enhancement on a dual-layer detector CT scanner. Contrast-enhanced acquisitions were reconstructed into a CCTA and a perfectly aligned VNC image. In each CCTA image, manual reference segmentations of the left ventricular (LV) myocardium, LV cavity, right ventricle, left atrium, right atrium, ascending aorta, and pulmonary artery trunk were obtained and propagated to the corresponding VNC image. These VNC images and reference segmentations were used to train 3D CNNs in a six-fold cross-validation for automatic segmentation in either VNC images or NCCT images reconstructed from the non-contrast-enhanced acquisition. Automatic segmentation in VNC images was evaluated using the Dice similarity coefficient (DSC) and average symmetric surface distance (ASSD). Automatically determined volumes of the cardiac chambers and LV myocardium in NCCT were compared to reference volumes of the same patient in CCTA by Bland-Altman analysis. An additional independent multi-vendor multi-center set of single-energy NCCT images from 290 patients was used for qualitative analysis, in which two observers graded segmentations on a five-point scale. Results Automatic segmentations in VNC images showed good agreement with reference segmentations, with an average DSC of 0.897 ± 0.034 and an average ASSD of 1.42 ± 0.45 mm. Volume differences [95% confidence interval] between automatic NCCT and reference CCTA segmentations were -19 [-67; 30] mL for LV myocardium, -25 [-78; 29] mL for LV cavity, -29 [-73; 14] mL for right ventricle, -20 [-62; 21] mL for left atrium, and -19 [-73; 34] mL for right atrium, respectively. In 214 (74%) NCCT images from the independent multi-vendor multi-center set, both observers agreed that the automatic segmentation was mostly accurate (grade 3) or better. Conclusion Our automatic method produced accurate whole-heart segmentations in NCCT images using a CNN trained with VNC images from a dual-layer detector CT scanner. This method might enable quantification of additional cardiac measures from NCCT images for improved cardiovascular risk prediction. }, keywords = {}, pubstate = {published}, tppubtype = {article} } Purpose Deep learning-based whole-heart segmentation in coronary CT angiography (CCTA) allows the extraction of quantitative imaging measures for cardiovascular risk prediction. Automatic extraction of these measures in patients undergoing only non-contrast-enhanced CT (NCCT) scanning would be valuable, but defining a manual reference standard that would allow training a deep learning-based method for whole-heart segmentation in NCCT is challenging, if not impossible. In this work, we leverage dual-energy information provided by a dual-layer detector CT scanner to obtain a reference standard in virtual non-contrast (VNC) CT images mimicking NCCT images, and train a 3D convolutional neural network (CNN) for the segmentation of VNC as well as NCCT images. Methods Eighteen patients were scanned with and without contrast enhancement on a dual-layer detector CT scanner. Contrast-enhanced acquisitions were reconstructed into a CCTA and a perfectly aligned VNC image. In each CCTA image, manual reference segmentations of the left ventricular (LV) myocardium, LV cavity, right ventricle, left atrium, right atrium, ascending aorta, and pulmonary artery trunk were obtained and propagated to the corresponding VNC image. These VNC images and reference segmentations were used to train 3D CNNs in a six-fold cross-validation for automatic segmentation in either VNC images or NCCT images reconstructed from the non-contrast-enhanced acquisition. Automatic segmentation in VNC images was evaluated using the Dice similarity coefficient (DSC) and average symmetric surface distance (ASSD). Automatically determined volumes of the cardiac chambers and LV myocardium in NCCT were compared to reference volumes of the same patient in CCTA by Bland-Altman analysis. An additional independent multi-vendor multi-center set of single-energy NCCT images from 290 patients was used for qualitative analysis, in which two observers graded segmentations on a five-point scale. Results Automatic segmentations in VNC images showed good agreement with reference segmentations, with an average DSC of 0.897 ± 0.034 and an average ASSD of 1.42 ± 0.45 mm. Volume differences [95% confidence interval] between automatic NCCT and reference CCTA segmentations were -19 [-67; 30] mL for LV myocardium, -25 [-78; 29] mL for LV cavity, -29 [-73; 14] mL for right ventricle, -20 [-62; 21] mL for left atrium, and -19 [-73; 34] mL for right atrium, respectively. In 214 (74%) NCCT images from the independent multi-vendor multi-center set, both observers agreed that the automatic segmentation was mostly accurate (grade 3) or better. Conclusion Our automatic method produced accurate whole-heart segmentations in NCCT images using a CNN trained with VNC images from a dual-layer detector CT scanner. This method might enable quantification of additional cardiac measures from NCCT images for improved cardiovascular risk prediction. |
J.M.H. Noothout, B.D. de Vos, J.M. Wolterink, E.M. Postma, P.A.M. Smeets, R.A.P. Takx, T. Leiner, M.A. Viergever, I. Išgum Deep learning-based regression and classification for automatic landmark localization in medical images Journal Article IEEE Transactions on Medical Imaging (in press), 2020. @article{Noothout2020b, title = {Deep learning-based regression and classification for automatic landmark localization in medical images}, author = {J.M.H. Noothout, B.D. de Vos, J.M. Wolterink, E.M. Postma, P.A.M. Smeets, R.A.P. Takx, T. Leiner, M.A. Viergever, I. Išgum}, url = {https://arxiv.org/pdf/2007.05295.pdf}, doi = {10.1109/TMI.2020.3009002}, year = {2020}, date = {2020-07-09}, journal = {IEEE Transactions on Medical Imaging (in press)}, abstract = {In this study, we propose a fast and accurate method to automatically localize anatomical landmarks in medical images. We employ a global-to-local localization approach using fully convolutional neural networks (FCNNs). First, a global FCNN localizes multiple landmarks through the analysis of image patches, performing regression and classification simultaneously. In regression, displacement vectors pointing from the center of image patches towards landmark locations are determined. In classification, presence of landmarks of interest in the patch is established. Global landmark locations are obtained by averaging the predicted displacement vectors, where the contribution of each displacement vector is weighted by the posterior classification probability of the patch that it is pointing from. Subsequently, for each landmark localized with global localization, local analysis is performed. Specialized FCNNs refine the global landmark locations by analyzing local sub-images in a similar manner, i.e. by performing regression and classification simultaneously and combining the results. Evaluation was performed through localization of 8 anatomical landmarks in CCTA scans, 2 landmarks in olfactory MR scans, and 19 landmarks in cephalometric X-rays. We demonstrate that the method performs similarly to a second observer and is able to localize landmarks in a diverse set of medical images, differing in image modality, image dimensionality, and anatomical coverage. }, keywords = {}, pubstate = {published}, tppubtype = {article} } In this study, we propose a fast and accurate method to automatically localize anatomical landmarks in medical images. We employ a global-to-local localization approach using fully convolutional neural networks (FCNNs). First, a global FCNN localizes multiple landmarks through the analysis of image patches, performing regression and classification simultaneously. In regression, displacement vectors pointing from the center of image patches towards landmark locations are determined. In classification, presence of landmarks of interest in the patch is established. Global landmark locations are obtained by averaging the predicted displacement vectors, where the contribution of each displacement vector is weighted by the posterior classification probability of the patch that it is pointing from. Subsequently, for each landmark localized with global localization, local analysis is performed. Specialized FCNNs refine the global landmark locations by analyzing local sub-images in a similar manner, i.e. by performing regression and classification simultaneously and combining the results. Evaluation was performed through localization of 8 anatomical landmarks in CCTA scans, 2 landmarks in olfactory MR scans, and 19 landmarks in cephalometric X-rays. We demonstrate that the method performs similarly to a second observer and is able to localize landmarks in a diverse set of medical images, differing in image modality, image dimensionality, and anatomical coverage. |
Kevin H. Kensler; Emily Z.F. Liu; Suzanne C. Wetstein; Allison M. Onken; Christina I. Luffman; Gabrielle M. Baker; Laura C. Collins; Stuart J. Schnitt; Vanessa C. Bret-Mounet; Mitko Veta; Josien P.W. Pluim; Ying Liu; Graham A. Colditz; A. Heather Eliassen; Susan E. Hankinson; Rulla M. Tamimi; Yujing J. Heng Automated quantitative measures of terminal duct lobular unit involution and breast cancer risk Journal Article Cancer epidemiology, biomarkers & prevention, 29 (11), 2020. @article{kensler2020automated, title = {Automated quantitative measures of terminal duct lobular unit involution and breast cancer risk}, author = {Kevin H. Kensler and Emily Z.F. Liu and Suzanne C. Wetstein and Allison M. Onken and Christina I. Luffman and Gabrielle M. Baker and Laura C. Collins and Stuart J. Schnitt and Vanessa C. Bret-Mounet and Mitko Veta and Josien P.W. Pluim and Ying Liu and Graham A. Colditz and A. Heather Eliassen and Susan E. Hankinson and Rulla M. Tamimi and Yujing J. Heng}, url = {https://cebp.aacrjournals.org/content/29/11/2358.abstract}, year = {2020}, date = {2020-11-01}, journal = {Cancer epidemiology, biomarkers & prevention}, volume = {29}, number = {11}, abstract = {Background: Manual qualitative and quantitative measures of terminal duct lobular unit (TDLU) involution were previously reported to be inversely associated with breast cancer risk. We developed and applied a deep learning method to yield quantitative measures of TDLU involution in normal breast tissue. We assessed the associations of these automated measures with breast cancer risk factors and risk. Methods: We obtained eight quantitative measures from whole slide images from a benign breast disease (BBD) nested case–control study within the Nurses’ Health Studies (287 breast cancer cases and 1,083 controls). Qualitative assessments of TDLU involution were available for 177 cases and 857 controls. The associations between risk factors and quantitative measures among controls were assessed using analysis of covariance adjusting for age. The relationship between each measure and risk was evaluated using unconditional logistic regression, adjusting for the matching factors, BBD subtypes, parity, and menopausal status. Qualitative measures and breast cancer risk were evaluated accounting for matching factors and BBD subtypes. Results: Menopausal status and parity were significantly associated with all eight measures; select TDLU measures were associated with BBD histologic subtype, body mass index, and birth index (P < 0.05). No measure was correlated with body size at ages 5–10 years, age at menarche, age at first birth, or breastfeeding history (P > 0.05). Neither quantitative nor qualitative measures were associated with breast cancer risk. Conclusions: Among Nurses’ Health Studies women diagnosed with BBD, TDLU involution is not a biomarker of subsequent breast cancer. Impact: TDLU involution may not impact breast cancer risk as previously thought.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background: Manual qualitative and quantitative measures of terminal duct lobular unit (TDLU) involution were previously reported to be inversely associated with breast cancer risk. We developed and applied a deep learning method to yield quantitative measures of TDLU involution in normal breast tissue. We assessed the associations of these automated measures with breast cancer risk factors and risk. Methods: We obtained eight quantitative measures from whole slide images from a benign breast disease (BBD) nested case–control study within the Nurses’ Health Studies (287 breast cancer cases and 1,083 controls). Qualitative assessments of TDLU involution were available for 177 cases and 857 controls. The associations between risk factors and quantitative measures among controls were assessed using analysis of covariance adjusting for age. The relationship between each measure and risk was evaluated using unconditional logistic regression, adjusting for the matching factors, BBD subtypes, parity, and menopausal status. Qualitative measures and breast cancer risk were evaluated accounting for matching factors and BBD subtypes. Results: Menopausal status and parity were significantly associated with all eight measures; select TDLU measures were associated with BBD histologic subtype, body mass index, and birth index (P < 0.05). No measure was correlated with body size at ages 5–10 years, age at menarche, age at first birth, or breastfeeding history (P > 0.05). Neither quantitative nor qualitative measures were associated with breast cancer risk. Conclusions: Among Nurses’ Health Studies women diagnosed with BBD, TDLU involution is not a biomarker of subsequent breast cancer. Impact: TDLU involution may not impact breast cancer risk as previously thought. |
Suzanne C Wetstein, Nikolas Stathonikos, Josien PW Pluim, Yujing J Heng, Natalie D ter Hoeve, Celien PH Vreuls, Paul J van Diest, Mitko Veta Deep Learning-Based Grading of Ductal Carcinoma In Situ in Breast Histopathology Images Journal Article Forthcoming arXiv, Forthcoming. @article{Wetstein2020deep, title = {Deep Learning-Based Grading of Ductal Carcinoma In Situ in Breast Histopathology Images}, author = {Suzanne C Wetstein, Nikolas Stathonikos, Josien PW Pluim, Yujing J Heng, Natalie D ter Hoeve, Celien PH Vreuls, Paul J van Diest, Mitko Veta}, url = {https://arxiv.org/abs/2010.03244}, year = {2020}, date = {2020-10-15}, journal = {arXiv}, abstract = {Ductal carcinoma in situ (DCIS) is a non-invasive breast cancer that can progress into invasive ductal carcinoma (IDC). Studies suggest DCIS is often overtreated since a considerable part of DCIS lesions may never progress into IDC. Lower grade lesions have a lower progression speed and risk, possibly allowing treatment de-escalation. However, studies show significant inter-observer variation in DCIS grading. Automated image analysis may provide an objective solution to address high subjectivity of DCIS grading by pathologists. In this study, we developed a deep learning-based DCIS grading system. It was developed using the consensus DCIS grade of three expert observers on a dataset of 1186 DCIS lesions from 59 patients. The inter-observer agreement, measured by quadratic weighted Cohen's kappa, was used to evaluate the system and compare its performance to that of expert observers. We present an analysis of the lesion-level and patient-level inter-observer agreement on an independent test set of 1001 lesions from 50 patients. The deep learning system (dl) achieved on average slightly higher inter-observer agreement to the observers (o1, o2 and o3) (κo1,dl=0.81,κo2,dl=0.53,κo3,dl=0.40) than the observers amongst each other (κo1,o2=0.58,κo1,o3=0.50,κo2,o3=0.42) at the lesion-level. At the patient-level, the deep learning system achieved similar agreement to the observers (κo1,dl=0.77,κo2,dl=0.75,κo3,dl=0.70) as the observers amongst each other (κo1,o2=0.77,κo1,o3=0.75,κo2,o3=0.72). In conclusion, we developed a deep learning-based DCIS grading system that achieved a performance similar to expert observers. We believe this is the first automated system that could assist pathologists by providing robust and reproducible second opinions on DCIS grade.}, keywords = {}, pubstate = {forthcoming}, tppubtype = {article} } Ductal carcinoma in situ (DCIS) is a non-invasive breast cancer that can progress into invasive ductal carcinoma (IDC). Studies suggest DCIS is often overtreated since a considerable part of DCIS lesions may never progress into IDC. Lower grade lesions have a lower progression speed and risk, possibly allowing treatment de-escalation. However, studies show significant inter-observer variation in DCIS grading. Automated image analysis may provide an objective solution to address high subjectivity of DCIS grading by pathologists. In this study, we developed a deep learning-based DCIS grading system. It was developed using the consensus DCIS grade of three expert observers on a dataset of 1186 DCIS lesions from 59 patients. The inter-observer agreement, measured by quadratic weighted Cohen's kappa, was used to evaluate the system and compare its performance to that of expert observers. We present an analysis of the lesion-level and patient-level inter-observer agreement on an independent test set of 1001 lesions from 50 patients. The deep learning system (dl) achieved on average slightly higher inter-observer agreement to the observers (o1, o2 and o3) (κo1,dl=0.81,κo2,dl=0.53,κo3,dl=0.40) than the observers amongst each other (κo1,o2=0.58,κo1,o3=0.50,κo2,o3=0.42) at the lesion-level. At the patient-level, the deep learning system achieved similar agreement to the observers (κo1,dl=0.77,κo2,dl=0.75,κo3,dl=0.70) as the observers amongst each other (κo1,o2=0.77,κo1,o3=0.75,κo2,o3=0.72). In conclusion, we developed a deep learning-based DCIS grading system that achieved a performance similar to expert observers. We believe this is the first automated system that could assist pathologists by providing robust and reproducible second opinions on DCIS grade. |
Inproceedings |
B.D. de Vos, B.H.M. van der Velden, J. Sander, K.G.A. Gilhuijs, M. Staring, I. Išgum Mutual information for unsupervised deep learning image registration Inproceedings SPIE Medical Imaging, in press, 2020. @inproceedings{deVos2020, title = {Mutual information for unsupervised deep learning image registration}, author = {B.D. de Vos, B.H.M. van der Velden, J. Sander, K.G.A. Gilhuijs, M. Staring, I. Išgum}, url = {https://spie.org/MI/conferencedetails/medical-image-processing#2549729}, year = {2020}, date = {2020-02-18}, booktitle = {SPIE Medical Imaging, in press}, abstract = {Current unsupervised deep learning-based image registration methods are trained with mean squares or normalized cross correlation as a similarity metric. These metrics are suitable for registration of images where a linear relation between image intensities exists. When such a relation is absent knowledge from conventional image registration literature suggests the use of mutual information. In this work we investigate whether mutual information can be used as a loss for unsupervised deep learning image registration by evaluating it on two datasets: breast dynamic contrast-enhanced MR and cardiac MR images. The results show that training with mutual information as a loss gives on par performance compared with conventional image registration in contrast enhanced images, and the results show that it is generally applicable since it has on par performance compared with normalized cross correlation in single-modality registration.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Current unsupervised deep learning-based image registration methods are trained with mean squares or normalized cross correlation as a similarity metric. These metrics are suitable for registration of images where a linear relation between image intensities exists. When such a relation is absent knowledge from conventional image registration literature suggests the use of mutual information. In this work we investigate whether mutual information can be used as a loss for unsupervised deep learning image registration by evaluating it on two datasets: breast dynamic contrast-enhanced MR and cardiac MR images. The results show that training with mutual information as a loss gives on par performance compared with conventional image registration in contrast enhanced images, and the results show that it is generally applicable since it has on par performance compared with normalized cross correlation in single-modality registration. |
F.G. Heslinga, J.P.W. Pluim, A.J. Houben, M.T. Schram, R.M. Henry, C.D. Stehouwer, M.J. Van Greevenbroek, T.T. Berendschot, M. Veta Direct Classification of Type 2 Diabetes From Retinal Fundus Images in a Population-based Sample From The Maastricht Study Inproceedings Medical Imaging 2020: Computer-Aided Diagnosis, pp. 113141N, International Society for Optics and Photonics, 2020. @inproceedings{heslinga2020direct, title = {Direct Classification of Type 2 Diabetes From Retinal Fundus Images in a Population-based Sample From The Maastricht Study}, author = {F.G. Heslinga, J.P.W. Pluim, A.J. Houben, M.T. Schram, R.M. Henry, C.D. Stehouwer, M.J. Van Greevenbroek, T.T. Berendschot, M. Veta}, url = {https://www.spiedigitallibrary.org/conference-proceedings-of-spie/11314/113141N/Direct-classification-of-type-2-diabetes-from-retinal-fundus-images/10.1117/12.2549574.full?SSO=1}, year = {2020}, date = {2020-03-16}, booktitle = {Medical Imaging 2020: Computer-Aided Diagnosis}, volume = {11314}, pages = {113141N}, publisher = {International Society for Optics and Photonics}, abstract = {Type 2 Diabetes (T2D) is a chronic metabolic disorder that can lead to blindness and cardiovascular disease. Information about early stage T2D might be present in retinal fundus images, but to what extent these images can be used for a screening setting is still unknown. In this study, deep neural networks were employed to differentiate between fundus images from individuals with and without T2D. We investigated three methods to achieve high classification performance, measured by the area under the receiver operating curve (ROC-AUC). A multi-target learning approach to simultaneously output retinal biomarkers as well as T2D works best (AUC = 0.746 [±0.001]). Furthermore, the classification performance can be improved when images with high prediction uncertainty are referred to a specialist. We also show that the combination of images of the left and right eye per individual can further improve the classification performance (AUC = 0.758 [±0.003]), using a simple averaging approach. The results are promising, suggesting the feasibility of screening for T2D from retinal fundus images.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Type 2 Diabetes (T2D) is a chronic metabolic disorder that can lead to blindness and cardiovascular disease. Information about early stage T2D might be present in retinal fundus images, but to what extent these images can be used for a screening setting is still unknown. In this study, deep neural networks were employed to differentiate between fundus images from individuals with and without T2D. We investigated three methods to achieve high classification performance, measured by the area under the receiver operating curve (ROC-AUC). A multi-target learning approach to simultaneously output retinal biomarkers as well as T2D works best (AUC = 0.746 [±0.001]). Furthermore, the classification performance can be improved when images with high prediction uncertainty are referred to a specialist. We also show that the combination of images of the left and right eye per individual can further improve the classification performance (AUC = 0.758 [±0.003]), using a simple averaging approach. The results are promising, suggesting the feasibility of screening for T2D from retinal fundus images. |
J.M.H. Noothout, E.M. Postma, S. Boesveldt, B.D. de Vos, P.A.M. Smeets, I. Išgum Automatic segmentation of the olfactory bulbs in MRI Inproceedings SPIE Medical Imaging (in press), 2020. @inproceedings{Noothout2020, title = {Automatic segmentation of the olfactory bulbs in MRI}, author = {J.M.H. Noothout, E.M. Postma, S. Boesveldt, B.D. de Vos, P.A.M. Smeets, I. Išgum}, year = {2020}, date = {2020-10-14}, booktitle = {SPIE Medical Imaging (in press)}, abstract = {A decrease in volume of the olfactory bulbs is an early marker for neurodegenerative diseases, such as Parkinson’s and Alzheimer’s disease. Recently, asymmetric volumes of olfactory bulbs present in postmortem MRIs of COVID-19 patients indicate that the olfactory bulbs might play an important role in the entrance of the disease in the central nervous system. Hence, volumetric assessment of the olfactory bulbs can be valuable for various conditions. Given that manual annotation of the olfactory bulbs in MRI to determine their volume is tedious, we propose a method for their automatic segmentation. To mitigate the class imbalance caused by the small volume of the olfactory bulbs, we first localize the center of each OB in a scan using convolutional neural networks (CNNs). We use these center locations to extract a bounding box containing both olfactory bulbs. Subsequently, the slices present in the bounding box are analyzed by a segmentation CNN that classifies each voxel as left OB, right OB, or background. The method achieved median (IQR) Dice coefficients of 0.84 (0.08) and 0.83 (0.08), and Average Symmetrical Surface Distances of 0.12 (0.08) and 0.13 (0.08) mm for the left and the right OB, respectively. Wilcoxon Signed Rank tests showed no significant difference between the volumes computed from the reference annotation and the automatic segmentations. Analysis took only 0.20 second per scan and the results indicate that the proposed method could be a first step towards large-scale studies analyzing pathology and morphology of the olfactory bulbs.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } A decrease in volume of the olfactory bulbs is an early marker for neurodegenerative diseases, such as Parkinson’s and Alzheimer’s disease. Recently, asymmetric volumes of olfactory bulbs present in postmortem MRIs of COVID-19 patients indicate that the olfactory bulbs might play an important role in the entrance of the disease in the central nervous system. Hence, volumetric assessment of the olfactory bulbs can be valuable for various conditions. Given that manual annotation of the olfactory bulbs in MRI to determine their volume is tedious, we propose a method for their automatic segmentation. To mitigate the class imbalance caused by the small volume of the olfactory bulbs, we first localize the center of each OB in a scan using convolutional neural networks (CNNs). We use these center locations to extract a bounding box containing both olfactory bulbs. Subsequently, the slices present in the bounding box are analyzed by a segmentation CNN that classifies each voxel as left OB, right OB, or background. The method achieved median (IQR) Dice coefficients of 0.84 (0.08) and 0.83 (0.08), and Average Symmetrical Surface Distances of 0.12 (0.08) and 0.13 (0.08) mm for the left and the right OB, respectively. Wilcoxon Signed Rank tests showed no significant difference between the volumes computed from the reference annotation and the automatic segmentations. Analysis took only 0.20 second per scan and the results indicate that the proposed method could be a first step towards large-scale studies analyzing pathology and morphology of the olfactory bulbs. |
S. Bruns, J.M. Wolterink, T.P.W. van den Boogert, J.P. Henriques, J. Baan, R.N. Planken, I. Išgum Automatic whole-heart segmentation in 4D TAVI treatment planning CT Inproceedings SPIE Medical Imaging (in press), 2020. @inproceedings{Bruns2020, title = {Automatic whole-heart segmentation in 4D TAVI treatment planning CT }, author = {S. Bruns, J.M. Wolterink, T.P.W. van den Boogert, J.P. Henriques, J. Baan, R.N. Planken, I. Išgum}, year = {2020}, date = {2020-10-14}, booktitle = {SPIE Medical Imaging (in press)}, abstract = {4D cardiac CT angiography (CCTA) images acquired for transcatheter aortic valve implantation (TAVI) planning provide a wealth of information about the morphology of the heart throughout the cardiac cycle. We propose a deep learning method to automatically segment the cardiac chambers and myocardium in 4D CCTA. We obtain automatic segmentations in 472 patients and use these to automatically identify end-systolic (ES) and end-diastolic (ED) phases, and to determine the left ventricular ejection fraction (LVEF). Our results show that automatic segmentation of cardiac structures through the cardiac cycle is feasible (median Dice similarity coefficient 0.908, median average symmetric surface distance 1.59 mm). Moreover, we demonstrate that these segmentations can be used to accurately identify ES and ED phases (bias [limits of agreement] of 1.81 [-11.0; 14.7]% and -0.02 [-14.1; 14.1]%). Finally, we show that there is correspondence between LVEF values determined from CCTA and echocardiography (-1.71 [-25.0; 21.6]%). Our automatic deep learning approach to segmentation has the potential to routinely extract functional information from 4D CCTA.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } 4D cardiac CT angiography (CCTA) images acquired for transcatheter aortic valve implantation (TAVI) planning provide a wealth of information about the morphology of the heart throughout the cardiac cycle. We propose a deep learning method to automatically segment the cardiac chambers and myocardium in 4D CCTA. We obtain automatic segmentations in 472 patients and use these to automatically identify end-systolic (ES) and end-diastolic (ED) phases, and to determine the left ventricular ejection fraction (LVEF). Our results show that automatic segmentation of cardiac structures through the cardiac cycle is feasible (median Dice similarity coefficient 0.908, median average symmetric surface distance 1.59 mm). Moreover, we demonstrate that these segmentations can be used to accurately identify ES and ED phases (bias [limits of agreement] of 1.81 [-11.0; 14.7]% and -0.02 [-14.1; 14.1]%). Finally, we show that there is correspondence between LVEF values determined from CCTA and echocardiography (-1.71 [-25.0; 21.6]%). Our automatic deep learning approach to segmentation has the potential to routinely extract functional information from 4D CCTA. |
J. Sander, B.D. de Vos, I. Išgum Unsupervised super-resolution: creating high-resolution medical images from low-resolution anisotropic examples Inproceedings SPIE Medical Imaging (in press), 2020. @inproceedings{Sander2020, title = {Unsupervised super-resolution: creating high-resolution medical images from low-resolution anisotropic examples}, author = {J. Sander, B.D. de Vos, I. Išgum}, year = {2020}, date = {2020-10-14}, booktitle = {SPIE Medical Imaging (in press)}, abstract = {Although high resolution isotropic 3D medical images are desired in clinical practice, their acquisition is not always feasible. Instead, lower resolution images are upsampled to higher resolution using conventional interpolation methods. Sophisticated learning-based super-resolution approaches are frequently unavailable in clinical setting, because such methods require training with high-resolution isotropic examples. To address this issue, we propose a learning-based super-resolution approach that can be trained using solely anisotropic images, i.e. without high-resolution ground truth data. The method exploits the latent space, generated by autoencoders trained on anisotropic images, to increase spatial resolution in low-resolution images. The method was trained and evaluated using 100 publicly available cardiac cine MR scans from the Automated Cardiac Diagnosis Challenge (ACDC). The quantitative results show that the proposed method performs better than conventional interpolation methods. Furthermore, the qualitative results indicate that especially ner cardiac structures are synthesized with high quality. The method has the potential to be applied to other anatomies and modalities and can be easily applied to any 3D anisotropic medical image dataset. }, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Although high resolution isotropic 3D medical images are desired in clinical practice, their acquisition is not always feasible. Instead, lower resolution images are upsampled to higher resolution using conventional interpolation methods. Sophisticated learning-based super-resolution approaches are frequently unavailable in clinical setting, because such methods require training with high-resolution isotropic examples. To address this issue, we propose a learning-based super-resolution approach that can be trained using solely anisotropic images, i.e. without high-resolution ground truth data. The method exploits the latent space, generated by autoencoders trained on anisotropic images, to increase spatial resolution in low-resolution images. The method was trained and evaluated using 100 publicly available cardiac cine MR scans from the Automated Cardiac Diagnosis Challenge (ACDC). The quantitative results show that the proposed method performs better than conventional interpolation methods. Furthermore, the qualitative results indicate that especially ner cardiac structures are synthesized with high quality. The method has the potential to be applied to other anatomies and modalities and can be easily applied to any 3D anisotropic medical image dataset. |
Conferences |
Suzanne C. Wetstein, Cristina González-Gonzalo, Gerda Bortsova, Bart Liefers, Florian Dubost, Ioannis Katramados, Laurens Hogeweg, Bram van Ginneken, Josien P.W. Pluim, Marleen de Bruijne, Clara I. Sánchez, Mitko Veta Adversarial Attack Vulnerability of Medical Image Analysis Systems: Unexplored Factors Conference 2020. @conference{Adversarial2020, title = {Adversarial Attack Vulnerability of Medical Image Analysis Systems: Unexplored Factors}, author = {Suzanne C. Wetstein, Cristina González-Gonzalo, Gerda Bortsova, Bart Liefers, Florian Dubost, Ioannis Katramados, Laurens Hogeweg, Bram van Ginneken, Josien P.W. Pluim, Marleen de Bruijne, Clara I. Sánchez, Mitko Veta}, url = {https://arxiv.org/abs/2006.06356}, year = {2020}, date = {2020-06-11}, urldate = {2020-08-25}, abstract = {Adversarial attacks are considered a potentially serious security threat for machine learning systems. Medical image analysis (MedIA) systems have recently been argued to be particularly vulnerable to adversarial attacks due to strong financial incentives. In this paper, we study several previously unexplored factors affecting adversarial attack vulnerability of deep learning MedIA systems in three medical domains: ophthalmology, radiology and pathology. Firstly, we study the effect of varying the degree of adversarial perturbation on the attack performance and its visual perceptibility. Secondly, we study how pre-training on a public dataset (ImageNet) affects the models' vulnerability to attacks. Thirdly, we study the influence of data and model architecture disparity between target and attacker models. Our experiments show that the degree of perturbation significantly affects both performance and human perceptibility of attacks. Pre-training may dramatically increase the transfer of adversarial examples; the larger the performance gain achieved by pre-training, the larger the transfer. Finally, disparity in data and/or model architecture between target and attacker models substantially decreases the success of attacks. We believe that these factors should be considered when designing cybersecurity-critical MedIA systems, as well as kept in mind when evaluating their vulnerability to adversarial attacks. }, keywords = {}, pubstate = {published}, tppubtype = {conference} } Adversarial attacks are considered a potentially serious security threat for machine learning systems. Medical image analysis (MedIA) systems have recently been argued to be particularly vulnerable to adversarial attacks due to strong financial incentives. In this paper, we study several previously unexplored factors affecting adversarial attack vulnerability of deep learning MedIA systems in three medical domains: ophthalmology, radiology and pathology. Firstly, we study the effect of varying the degree of adversarial perturbation on the attack performance and its visual perceptibility. Secondly, we study how pre-training on a public dataset (ImageNet) affects the models' vulnerability to attacks. Thirdly, we study the influence of data and model architecture disparity between target and attacker models. Our experiments show that the degree of perturbation significantly affects both performance and human perceptibility of attacks. Pre-training may dramatically increase the transfer of adversarial examples; the larger the performance gain achieved by pre-training, the larger the transfer. Finally, disparity in data and/or model architecture between target and attacker models substantially decreases the success of attacks. We believe that these factors should be considered when designing cybersecurity-critical MedIA systems, as well as kept in mind when evaluating their vulnerability to adversarial attacks. |
C. González-Gonzalo, S. C. Wetstein, G. Bortsova, B. Liefers, B. van Ginneken, C. I. Sánchez European Society of Retina Specialists, 2020. @conference{Gonz20c, title = {Are adversarial attacks an actual threat for deep learning systems in real-world eye disease screening settings?}, author = {C. González-Gonzalo, S. C. Wetstein, G. Bortsova, B. Liefers, B. van Ginneken, C. I. Sánchez}, url = {https://www.euretina.org/congress/amsterdam-2020/virtual-2020-freepapers/}, year = {2020}, date = {2020-10-02}, booktitle = {European Society of Retina Specialists}, abstract = {Purpose: Deep learning (DL) systems that perform image-level classification with convolutional neural networks (CNNs) have been shown to provide high-performance solutions for automated screening of eye diseases. Nevertheless, adversarial attacks have been recently screening settings, where there is restricted access to the systems and limited knowledge about certain factors, such as their CNN architecture or the data used for development. Setting: Deep learning for automated screening of eye diseases. Methods: We used the Kaggle dataset for diabetic retinopathy detection. It contains 88,702 manually-labelled color fundus images, which we split into test (12%) and development (88%). Development data were split into two equally-sized sets (d1 and d2); a third set (d3) was generated using half of the images in d2. In each development set, 80%/20% of the images were used for training/validation. All splits were done randomly at patient-level. As attacked system, we developed a randomly-initialized CNN based on the Inception-v3 architecture using d1. We performed the attacks (1) in a white-box (WB) setting, with full access to the attacked system to generate the adversarial images, and (2) in black-box (BB) settings, without access to the attacked system and using a surrogate system to craft the attacks. We simulated different BB settings, sequentially decreasing the available knowledge about the attacked system: same architecture, using d1 (BB-1); different architecture (randomly-initialized DenseNet-121), using d1 (BB-2); same architecture, using d2 (BB-3); different architecture, using d2 (BB-4); different architecture, using d3 (BB-5). In each setting, adversarial images containing non-perceptible noise were generated by applying the fast gradient sign method to each image of the test set and processed by the attacked system. Results: The performance of the attacked system to detect referable diabetic retinopathy without attacks and under the different attack settings was measured on the test set using the area under the receiver operating characteristic curve (AUC). Without attacks, the system achieved an AUC of 0.88. In each attack setting, the relative decrease in AUC with respect to the original performance was computed. In the WB setting, there was a 99.9% relative decrease in performance. In the BB-1 setting, the relative decrease in AUC was 67.3%. In the BB-2 setting, the AUC suffered a 40.2% relative decrease. In the BB-3 setting, the relative decrease was 37.9%. In the BB-4 setting, the relative decrease in AUC was 34.1%. Lastly, in the BB-5 setting, the performance of the attacked system decreased 3.8% regarding its original performance. Conclusions: The results obtained in the different settings show a drastic decrease of the attacked DL system's vulnerability to adversarial attacks when the access and knowledge about it are limited. The impact on performance is extremely reduced when restricting the direct access to the system (from the WB to the BB-1 setting). The attacks become slightly less effective when not having access to the same development data (BB-3), compared to not using the same CNN architecture (BB-2). Attacks' effectiveness further decreases when both factors are unknown (BB-4). If the amount of development data is additionally reduced (BB-5), the original performance barely deteriorates. This last setting is the most similar to realistic screening settings, since most systems are currently closed source and use additional large private datasets for development. In conclusion, these factors should be acknowledged for future development of robust DL systems, as well as considered when evaluating the vulnerability of currently-available systems to adversarial attacks. Having limited access and knowledge about the systems determines the actual threat these attacks pose. We believe awareness about this matter will increase experts' trust and facilitate the integration of DL systems in real-world settings.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Purpose: Deep learning (DL) systems that perform image-level classification with convolutional neural networks (CNNs) have been shown to provide high-performance solutions for automated screening of eye diseases. Nevertheless, adversarial attacks have been recently screening settings, where there is restricted access to the systems and limited knowledge about certain factors, such as their CNN architecture or the data used for development. Setting: Deep learning for automated screening of eye diseases. Methods: We used the Kaggle dataset for diabetic retinopathy detection. It contains 88,702 manually-labelled color fundus images, which we split into test (12%) and development (88%). Development data were split into two equally-sized sets (d1 and d2); a third set (d3) was generated using half of the images in d2. In each development set, 80%/20% of the images were used for training/validation. All splits were done randomly at patient-level. As attacked system, we developed a randomly-initialized CNN based on the Inception-v3 architecture using d1. We performed the attacks (1) in a white-box (WB) setting, with full access to the attacked system to generate the adversarial images, and (2) in black-box (BB) settings, without access to the attacked system and using a surrogate system to craft the attacks. We simulated different BB settings, sequentially decreasing the available knowledge about the attacked system: same architecture, using d1 (BB-1); different architecture (randomly-initialized DenseNet-121), using d1 (BB-2); same architecture, using d2 (BB-3); different architecture, using d2 (BB-4); different architecture, using d3 (BB-5). In each setting, adversarial images containing non-perceptible noise were generated by applying the fast gradient sign method to each image of the test set and processed by the attacked system. Results: The performance of the attacked system to detect referable diabetic retinopathy without attacks and under the different attack settings was measured on the test set using the area under the receiver operating characteristic curve (AUC). Without attacks, the system achieved an AUC of 0.88. In each attack setting, the relative decrease in AUC with respect to the original performance was computed. In the WB setting, there was a 99.9% relative decrease in performance. In the BB-1 setting, the relative decrease in AUC was 67.3%. In the BB-2 setting, the AUC suffered a 40.2% relative decrease. In the BB-3 setting, the relative decrease was 37.9%. In the BB-4 setting, the relative decrease in AUC was 34.1%. Lastly, in the BB-5 setting, the performance of the attacked system decreased 3.8% regarding its original performance. Conclusions: The results obtained in the different settings show a drastic decrease of the attacked DL system's vulnerability to adversarial attacks when the access and knowledge about it are limited. The impact on performance is extremely reduced when restricting the direct access to the system (from the WB to the BB-1 setting). The attacks become slightly less effective when not having access to the same development data (BB-3), compared to not using the same CNN architecture (BB-2). Attacks' effectiveness further decreases when both factors are unknown (BB-4). If the amount of development data is additionally reduced (BB-5), the original performance barely deteriorates. This last setting is the most similar to realistic screening settings, since most systems are currently closed source and use additional large private datasets for development. In conclusion, these factors should be acknowledged for future development of robust DL systems, as well as considered when evaluating the vulnerability of currently-available systems to adversarial attacks. Having limited access and knowledge about the systems determines the actual threat these attacks pose. We believe awareness about this matter will increase experts' trust and facilitate the integration of DL systems in real-world settings. |
2019
Journal Articles |
R.W. van Hamersvelt, I. Išgum, P.A. de Jong, M.J. Cramer, G.E. Leenders, M.J. Willemink, M. Voskuil, T. Leiner Application of speCtraL computed tomogrAphy to impRove specIficity of cardiac compuTed tomographY (CLARITY study): Rationale and Design Journal Article BMJ Open, 9 (3), pp. e025793, 2019. @article{vanHamersvelt2019, title = {Application of speCtraL computed tomogrAphy to impRove specIficity of cardiac compuTed tomographY (CLARITY study): Rationale and Design}, author = {R.W. van Hamersvelt, I. Išgum, P.A. de Jong, M.J. Cramer, G.E. Leenders, M.J. Willemink, M. Voskuil, T. Leiner}, url = {https://www.ncbi.nlm.nih.gov/pubmed/30826767}, year = {2019}, date = {2019-01-25}, journal = {BMJ Open}, volume = {9}, number = {3}, pages = {e025793}, abstract = {INTRODUCTION: Anatomic stenosis evaluation on coronary CT angiography (CCTA) lacks specificity in indicating the functional significance of a stenosis. Recent developments in CT techniques (including dual-layer spectral detector CT [SDCT] and static stress CT perfusion [CTP]) and image analyses (including fractional flow reserve [FFR] derived from CCTA images [FFRCT] and deep learning analysis [DL]) are potential strategies to increase the specificity of CCTA by combining both anatomical and functional information in one investigation. The aim of the current study is to assess the diagnostic performance of (combinations of) SDCT, CTP, FFRCT and DL for the identification of functionally significant coronary artery stenosis. METHODS AND ANALYSIS: Seventy-five patients aged 18 years and older with stable angina and known coronary artery disease and scheduled to undergo clinically indicated invasive FFR will be enrolled. All subjects will undergo the following SDCT scans: coronary calcium scoring, static stress CTP, rest CCTA and if indicated (history of myocardial infarction) a delayed enhancement acquisition. Invasive FFR of ≤0.80, measured within 30 days after the SDCT scans, will be used as reference to indicate a functionally significant stenosis. The primary study endpoint is the diagnostic performance of SDCT (including CTP) for the identification of functionally significant coronary artery stenosis. Secondary study endpoint is the diagnostic performance of SDCT, CTP, FFRCT and DL separately and combined for the identification of functionally significant coronary artery stenosis. ETHICS AND DISSEMINATION: Ethical approval was obtained. All subjects will provide written informed consent. Study findings will be disseminated through peer-reviewed conference presentations and journal publications.}, keywords = {}, pubstate = {published}, tppubtype = {article} } INTRODUCTION: Anatomic stenosis evaluation on coronary CT angiography (CCTA) lacks specificity in indicating the functional significance of a stenosis. Recent developments in CT techniques (including dual-layer spectral detector CT [SDCT] and static stress CT perfusion [CTP]) and image analyses (including fractional flow reserve [FFR] derived from CCTA images [FFRCT] and deep learning analysis [DL]) are potential strategies to increase the specificity of CCTA by combining both anatomical and functional information in one investigation. The aim of the current study is to assess the diagnostic performance of (combinations of) SDCT, CTP, FFRCT and DL for the identification of functionally significant coronary artery stenosis. METHODS AND ANALYSIS: Seventy-five patients aged 18 years and older with stable angina and known coronary artery disease and scheduled to undergo clinically indicated invasive FFR will be enrolled. All subjects will undergo the following SDCT scans: coronary calcium scoring, static stress CTP, rest CCTA and if indicated (history of myocardial infarction) a delayed enhancement acquisition. Invasive FFR of ≤0.80, measured within 30 days after the SDCT scans, will be used as reference to indicate a functionally significant stenosis. The primary study endpoint is the diagnostic performance of SDCT (including CTP) for the identification of functionally significant coronary artery stenosis. Secondary study endpoint is the diagnostic performance of SDCT, CTP, FFRCT and DL separately and combined for the identification of functionally significant coronary artery stenosis. ETHICS AND DISSEMINATION: Ethical approval was obtained. All subjects will provide written informed consent. Study findings will be disseminated through peer-reviewed conference presentations and journal publications. |
M.U. Dalmış, A. Gubern-Merida, S. Vreemann, P. Bult, N. Karssemeijer, R. Mann; J. Teuwen. Investigative Radiology, 2019. @article{dalm19, title = {Artificial Intelligence Based Classification of Breast Lesions Imaged With a Multi-Parametric Breast MRI Protocol With ultrafast DCE-MRI, T2 and DWI}, author = {M.U. Dalmış, A. Gubern-Merida, S. Vreemann, P. Bult, N. Karssemeijer, R. Mann and J. Teuwen.}, doi = {10.1097/RLI.0000000000000544}, year = {2019}, date = {2019-01-15}, journal = {Investigative Radiology}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
C. González-Gonzalo, V. Sánchez-Gutiérrez, P. Hernández-Martínez, I. Contreras, Y. T. Lechanteur, A. Domanian, B. van Ginneken, C. I. Sánchez Evaluation of a deep learning system for the joint automated detection of diabetic retinopathy and age-related macular degeneration Journal Article Acta Ophthalmologica, 2019. @article{Gonz2019, title = {Evaluation of a deep learning system for the joint automated detection of diabetic retinopathy and age-related macular degeneration}, author = {C. González-Gonzalo, V. Sánchez-Gutiérrez, P. Hernández-Martínez, I. Contreras, Y. T. Lechanteur, A. Domanian, B. van Ginneken, C. I. Sánchez}, url = {https://onlinelibrary.wiley.com/doi/full/10.1111/aos.14306 https://arxiv.org/abs/1903.09555}, doi = {https://doi.org/10.1111/aos.14306}, year = {2019}, date = {2019-11-26}, journal = {Acta Ophthalmologica}, abstract = {Purpose: To validate the performance of a commercially-available, CE-certified deep learning (DL) system, RetCAD v.1.3.0 (Thirona, Nijmegen, The Netherlands), for the joint automatic detection of diabetic retinopathy (DR) and age-related macular degeneration (AMD) in color fundus (CF) images on a dataset with mixed presence of eye diseases. Methods: Evaluation of joint detection of referable DR and AMD was performed on a DR-AMD dataset with 600 images acquired during routine clinical practice, containing referable and non-referable cases of both diseases. Each image was graded for DR and AMD by an experienced ophthalmologist to establish the reference standard (RS), and by four independent observers for comparison with human performance. Validation was furtherly assessed on Messidor (1200 images) for individual identification of referable DR, and the Age-Related Eye Disease Study (AREDS) dataset (133821 images) for referable AMD, against the corresponding RS. Results: Regarding joint validation on the DR-AMD dataset, the system achieved an area under the ROC curve (AUC) of 95.1% for detection of referable DR (SE=90.1%, SP=90.6%). For referable AMD, the AUC was 94.9% (SE=91.8%, SP=87.5%). Average human performance for DR was SE=61.5% and SP=97.8%; for AMD, SE=76.5% and SP=96.1%. Regarding detection of referable DR in Messidor, AUC was 97.5% (SE=92.0%, SP=92.1%); for referable AMD in AREDS, AUC was 92.7% (SE=85.8%, SP=86.0%). Conclusions: The validated system performs comparably to human experts at simultaneous detection of DR and AMD. This shows that DL systems can facilitate access to joint screening of eye diseases and become a quick and reliable support for ophthalmological experts.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Purpose: To validate the performance of a commercially-available, CE-certified deep learning (DL) system, RetCAD v.1.3.0 (Thirona, Nijmegen, The Netherlands), for the joint automatic detection of diabetic retinopathy (DR) and age-related macular degeneration (AMD) in color fundus (CF) images on a dataset with mixed presence of eye diseases. Methods: Evaluation of joint detection of referable DR and AMD was performed on a DR-AMD dataset with 600 images acquired during routine clinical practice, containing referable and non-referable cases of both diseases. Each image was graded for DR and AMD by an experienced ophthalmologist to establish the reference standard (RS), and by four independent observers for comparison with human performance. Validation was furtherly assessed on Messidor (1200 images) for individual identification of referable DR, and the Age-Related Eye Disease Study (AREDS) dataset (133821 images) for referable AMD, against the corresponding RS. Results: Regarding joint validation on the DR-AMD dataset, the system achieved an area under the ROC curve (AUC) of 95.1% for detection of referable DR (SE=90.1%, SP=90.6%). For referable AMD, the AUC was 94.9% (SE=91.8%, SP=87.5%). Average human performance for DR was SE=61.5% and SP=97.8%; for AMD, SE=76.5% and SP=96.1%. Regarding detection of referable DR in Messidor, AUC was 97.5% (SE=92.0%, SP=92.1%); for referable AMD in AREDS, AUC was 92.7% (SE=85.8%, SP=86.0%). Conclusions: The validated system performs comparably to human experts at simultaneous detection of DR and AMD. This shows that DL systems can facilitate access to joint screening of eye diseases and become a quick and reliable support for ophthalmological experts. |
Ruwan Tennakoon, Gerda Bortsova, Silas Ørting, Amirali K Gostar, Mathilde MW Wille, Zaigham Saghir, Reza Hoseinnezhad, Marleen de Bruijne, Alireza Bab-Hadiashar Classification of Volumetric Images Using Multi-Instance Learning and Extreme Value Theorem Journal Article IEEE Transactions on Medical Imaging, 39 (4), pp. 854-865, 2019. @article{Tennakoon2019, title = {Classification of Volumetric Images Using Multi-Instance Learning and Extreme Value Theorem}, author = {Ruwan Tennakoon, Gerda Bortsova, Silas Ørting, Amirali K Gostar, Mathilde MW Wille, Zaigham Saghir, Reza Hoseinnezhad, Marleen de Bruijne, Alireza Bab-Hadiashar}, doi = {10.1109/TMI.2019.2936244}, year = {2019}, date = {2019-08-19}, journal = {IEEE Transactions on Medical Imaging}, volume = {39}, number = {4}, pages = {854-865}, abstract = {Volumetric imaging is an essential diagnostic tool for medical practitioners. The use of popular techniques such as convolutional neural networks (CNN) for analysis of volumetric images is constrained by the availability of detailed (with local annotations) training data and GPU memory. In this paper, the volumetric image classification problem is posed as a multi-instance classification problem and a novel method is proposed to adaptively select positive instances from positive bags during the training phase. This method uses the extreme value theory to model the feature distribution of the images without a pathology and use it to identify positive instances of an imaged pathology. The experimental results, on three separate image classification tasks (i.e. classify retinal OCT images according to the presence or absence of fluid build-ups, emphysema detection in pulmonary 3D-CT images and detection of cancerous regions in 2D histopathology images) show that the proposed method produces classifiers that have similar performance to fully supervised methods and achieves the state of the art performance in all examined test cases.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Volumetric imaging is an essential diagnostic tool for medical practitioners. The use of popular techniques such as convolutional neural networks (CNN) for analysis of volumetric images is constrained by the availability of detailed (with local annotations) training data and GPU memory. In this paper, the volumetric image classification problem is posed as a multi-instance classification problem and a novel method is proposed to adaptively select positive instances from positive bags during the training phase. This method uses the extreme value theory to model the feature distribution of the images without a pathology and use it to identify positive instances of an imaged pathology. The experimental results, on three separate image classification tasks (i.e. classify retinal OCT images according to the presence or absence of fluid build-ups, emphysema detection in pulmonary 3D-CT images and detection of cancerous regions in 2D histopathology images) show that the proposed method produces classifiers that have similar performance to fully supervised methods and achieves the state of the art performance in all examined test cases. |
Inproceedings |
S. Bruns, J.M. Wolterink, R.W. van Hamersvelt, M. Zreik, T. Leiner, I. Išgum Improving myocardium segmentation in cardiac CT angiography using spectral information Inproceedings SPIE Medical Imaging, 2019. @inproceedings{Bruns2019, title = {Improving myocardium segmentation in cardiac CT angiography using spectral information}, author = {S. Bruns, J.M. Wolterink, R.W. van Hamersvelt, M. Zreik, T. Leiner, I. Išgum}, url = {https://arxiv.org/abs/1810.03968}, year = {2019}, date = {2019-02-17}, booktitle = {SPIE Medical Imaging}, abstract = {Left ventricle myocardium segmentation in cardiac CT angiography (CCTA) is essential for the assessment of myocardial perfusion. Since deep-learning methods for segmentation in CCTA suffer from differences in contrast-agent attenuation, we propose training a 3D CNN with augmentation using virtual mono-energetic reconstructions from a spectral CT scanner. We compare this with augmentation by linear intensity scaling, and combine both augmentations. We train a network with 10 conventional CCTA images and corresponding virtual mono-energetic images acquired on a spectral CT scanner and evaluate on 40 conventional CCTA images. We show that data augmentation with virtual mono-energetic images significantly improves the segmentation.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Left ventricle myocardium segmentation in cardiac CT angiography (CCTA) is essential for the assessment of myocardial perfusion. Since deep-learning methods for segmentation in CCTA suffer from differences in contrast-agent attenuation, we propose training a 3D CNN with augmentation using virtual mono-energetic reconstructions from a spectral CT scanner. We compare this with augmentation by linear intensity scaling, and combine both augmentations. We train a network with 10 conventional CCTA images and corresponding virtual mono-energetic images acquired on a spectral CT scanner and evaluate on 40 conventional CCTA images. We show that data augmentation with virtual mono-energetic images significantly improves the segmentation. |
J. Sander, B.D. de Vos, J.M. Wolterink, I. Išgum Towards increased trustworthiness of deep learning segmentation methods on cardiac MRI Inproceedings SPIE Medical Imaging, 2019. @inproceedings{Sander2019, title = {Towards increased trustworthiness of deep learning segmentation methods on cardiac MRI}, author = {J. Sander, B.D. de Vos, J.M. Wolterink, I. Išgum}, url = {https://arxiv.org/pdf/1809.10430.pdf}, year = {2019}, date = {2019-02-17}, booktitle = {SPIE Medical Imaging}, abstract = {Current state-of-the-art deep learning segmentation methods have not yet made a broad entrance into the clinical setting in spite of high demand for such automatic methods. One important reason is the lack of reliability caused by models that fail unnoticed and often locally produce anatomically implausible results that medical experts would not make. This paper presents an automatic image segmentation method based on (Bayesian) dilated convolutional networks (DCNN) that generate segmentation masks and spatial uncertainty maps for the input image at hand. The method was trained and evaluated using segmentation of the left ventricle (LV) cavity, right ventricle (RV) endocardium and myocardium (Myo) at end-diastole (ED) and end-systole (ES) in 100 cardiac 2D MR scans from the MICCAI 2017 Challenge (ACDC). Combining segmentations and uncertainty maps and employing a human-in-the-loop setting, we provide evidence that image areas indicated as highly uncertain regarding the obtained segmentation almost entirely cover regions of incorrect segmentations. The fused information can be harnessed to increase segmentation performance. Our results reveal that we can obtain valuable spatial uncertainty maps with low computational effort using DCNNs.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Current state-of-the-art deep learning segmentation methods have not yet made a broad entrance into the clinical setting in spite of high demand for such automatic methods. One important reason is the lack of reliability caused by models that fail unnoticed and often locally produce anatomically implausible results that medical experts would not make. This paper presents an automatic image segmentation method based on (Bayesian) dilated convolutional networks (DCNN) that generate segmentation masks and spatial uncertainty maps for the input image at hand. The method was trained and evaluated using segmentation of the left ventricle (LV) cavity, right ventricle (RV) endocardium and myocardium (Myo) at end-diastole (ED) and end-systole (ES) in 100 cardiac 2D MR scans from the MICCAI 2017 Challenge (ACDC). Combining segmentations and uncertainty maps and employing a human-in-the-loop setting, we provide evidence that image areas indicated as highly uncertain regarding the obtained segmentation almost entirely cover regions of incorrect segmentations. The fused information can be harnessed to increase segmentation performance. Our results reveal that we can obtain valuable spatial uncertainty maps with low computational effort using DCNNs. |
E. Calli, E. Sogancioglu, E.T. Scholten, K. Murphy; B. van Ginneken. Handling label noise through model confidence and uncertainty: application to chest radiograph classification Inproceedings Medical Imaging of Proceedings of the SPIE, 2019. @inproceedings{call19, title = {Handling label noise through model confidence and uncertainty: application to chest radiograph classification}, author = {E. Calli, E. Sogancioglu, E.T. Scholten, K. Murphy and B. van Ginneken.}, year = {2019}, date = {2019-02-14}, publisher = {Medical Imaging of Proceedings of the SPIE}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Suzanne C. Wetstein, Allison M. Onken, Gabrielle M. Baker, Michael E. Pyle, Josien P. W. Pluim, Rulla M. Tamimi, Yujing J. Heng, Mitko Veta Detection of acini in histopathology slides: towards automated prediction of breast cancer risk Inproceedings SPIE Medical Imaging, 2019. @inproceedings{Wetstein2019, title = {Detection of acini in histopathology slides: towards automated prediction of breast cancer risk}, author = {Suzanne C. Wetstein, Allison M. Onken, Gabrielle M. Baker, Michael E. Pyle, Josien P. W. Pluim, Rulla M. Tamimi, Yujing J. Heng, Mitko Veta}, url = {https://www.spiedigitallibrary.org/conference-proceedings-of-spie/10956/109560Q/Detection-of-acini-in-histopathology-slides--towards-automated-prediction/10.1117/12.2511408.full}, year = {2019}, date = {2019-03-18}, booktitle = {SPIE Medical Imaging}, abstract = {Terminal duct lobular units (TDLUs) are structures in the breast which involute with the completion of childbearing and physiological ageing. Women with less TDLU involution are more likely to develop breast cancer than those with more involution. Thus, TDLU involution may be utilized as a biomarker to predict invasive cancer risk. Manual assessment of TDLU involution is a cumbersome and subjective process. This makes it amenable for automated assessment by image analysis. In this study, we developed and evaluated an acini detection method as a first step towards automated assessment of TDLU involution using a dataset of histopathological whole-slide images (WSIs) from the Nurses’ Health Study (NHS) and NHSII. The NHS/NHSII is among the world's largest investigations of epidemiological risk factors for major chronic diseases in women. We compared three different approaches to detect acini in WSIs using the U-Net convolutional neural network architecture. The approaches differ in the target that is predicted by the network: circular mask labels, soft labels and distance maps. Our results showed that soft label targets lead to a better detection performance than the other methods. F1 scores of 0.65, 0.73 and 0.66 were obtained with circular mask labels, soft labels and distance maps, respectively. Our acini detection method was furthermore validated by applying it to measure acini count per mm2 of tissue area on an independent set of WSIs. This measure was found to be significantly negatively correlated with age.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Terminal duct lobular units (TDLUs) are structures in the breast which involute with the completion of childbearing and physiological ageing. Women with less TDLU involution are more likely to develop breast cancer than those with more involution. Thus, TDLU involution may be utilized as a biomarker to predict invasive cancer risk. Manual assessment of TDLU involution is a cumbersome and subjective process. This makes it amenable for automated assessment by image analysis. In this study, we developed and evaluated an acini detection method as a first step towards automated assessment of TDLU involution using a dataset of histopathological whole-slide images (WSIs) from the Nurses’ Health Study (NHS) and NHSII. The NHS/NHSII is among the world's largest investigations of epidemiological risk factors for major chronic diseases in women. We compared three different approaches to detect acini in WSIs using the U-Net convolutional neural network architecture. The approaches differ in the target that is predicted by the network: circular mask labels, soft labels and distance maps. Our results showed that soft label targets lead to a better detection performance than the other methods. F1 scores of 0.65, 0.73 and 0.66 were obtained with circular mask labels, soft labels and distance maps, respectively. Our acini detection method was furthermore validated by applying it to measure acini count per mm2 of tissue area on an independent set of WSIs. This measure was found to be significantly negatively correlated with age. |
Friso G. Heslinga, Josien P. W. Pluim, Behdad Dashtbozorg, Tos T. J. M. Berendschot, A. J. H. M. Houben, Ronald M. A. Henry, Mitko Veta Approximation of a pipeline of unsupervised retina image analysis methods with a CNN Inproceedings SPIE Medical Imaging, 2019. @inproceedings{Heslinga2019, title = {Approximation of a pipeline of unsupervised retina image analysis methods with a CNN}, author = {Friso G. Heslinga, Josien P. W. Pluim, Behdad Dashtbozorg, Tos T. J. M. Berendschot, A. J. H. M. Houben, Ronald M. A. Henry, Mitko Veta}, url = {https://www.spiedigitallibrary.org/conference-proceedings-of-spie/10949/109491N/Approximation-of-a-pipeline-of-unsupervised-retina-image-analysis-methods/10.1117/12.2512393.full}, year = {2019}, date = {2019-03-15}, booktitle = {SPIE Medical Imaging}, abstract = {A pipeline of unsupervised image analysis methods for extraction of geometrical features from retinal fundus images has previously been developed. Features related to vessel caliber, tortuosity and bifurcations, have been identified as potential biomarkers for a variety of diseases, including diabetes and Alzheimer’s. The current computationally expensive pipeline takes 24 minutes to process a single image, which impedes implementation in a screening setting. In this work, we approximate the pipeline with a convolutional neural network (CNN) that enables processing of a single image in a few seconds. As an additional benefit, the trained CNN is sensitive to key structures in the retina and can be used as a pretrained network for related disease classification tasks. Our model is based on the ResNet-50 architecture and outputs four biomarkers that describe global properties of the vascular tree in retinal fundus images. Intraclass correlation coefficients between the predictions of the CNN and the results of the pipeline showed strong agreement (0.86 - 0.91) for three of four biomarkers and moderate agreement (0.42) for one biomarker. Class activation maps were created to illustrate the attention of the network. The maps show qualitatively that the activations of the network overlap with the biomarkers of interest, and that the network is able to distinguish venules from arterioles. Moreover, local high and low tortuous regions are clearly identified, confirming that a CNN is sensitive to key structures in the retina.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } A pipeline of unsupervised image analysis methods for extraction of geometrical features from retinal fundus images has previously been developed. Features related to vessel caliber, tortuosity and bifurcations, have been identified as potential biomarkers for a variety of diseases, including diabetes and Alzheimer’s. The current computationally expensive pipeline takes 24 minutes to process a single image, which impedes implementation in a screening setting. In this work, we approximate the pipeline with a convolutional neural network (CNN) that enables processing of a single image in a few seconds. As an additional benefit, the trained CNN is sensitive to key structures in the retina and can be used as a pretrained network for related disease classification tasks. Our model is based on the ResNet-50 architecture and outputs four biomarkers that describe global properties of the vascular tree in retinal fundus images. Intraclass correlation coefficients between the predictions of the CNN and the results of the pipeline showed strong agreement (0.86 - 0.91) for three of four biomarkers and moderate agreement (0.42) for one biomarker. Class activation maps were created to illustrate the attention of the network. The maps show qualitatively that the activations of the network overlap with the biomarkers of interest, and that the network is able to distinguish venules from arterioles. Moreover, local high and low tortuous regions are clearly identified, confirming that a CNN is sensitive to key structures in the retina. |
J.M. Wolterink, T. Leiner, I. Išgum Graph convolutional networks for coronary artery segmentation in cardiac CT angiography Inproceedings 1st International Workshop on Graph Learning in Medical Image (GLMI 2019), in press, 2019. @inproceedings{Wolterink2019, title = {Graph convolutional networks for coronary artery segmentation in cardiac CT angiography}, author = {J.M. Wolterink, T. Leiner, I. Išgum}, year = {2019}, date = {2019-08-14}, booktitle = {1st International Workshop on Graph Learning in Medical Image (GLMI 2019), in press}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Gerda Bortsova, Florian Dubost, Laurens Hogeweg, Ioannis Katramados, Marleen de Bruijne Semi-supervised medical image segmentation via learning consistency under transformations Inproceedings International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 810-818, Springer, Cham, 2019. @inproceedings{Bortsova2019, title = {Semi-supervised medical image segmentation via learning consistency under transformations}, author = {Gerda Bortsova, Florian Dubost, Laurens Hogeweg, Ioannis Katramados, Marleen de Bruijne}, url = {https://arxiv.org/abs/1911.01218}, doi = {https://doi.org/10.1007/978-3-030-32226-7_90}, year = {2019}, date = {2019-10-13}, urldate = {2020-08-20}, booktitle = {International Conference on Medical Image Computing and Computer-Assisted Intervention}, pages = {810-818}, publisher = {Springer, Cham}, abstract = {The scarcity of labeled data often limits the application of supervised deep learning techniques for medical image segmentation. This has motivated the development of semi-supervised techniques that learn from a mixture of labeled and unlabeled images. In this paper, we propose a novel semi-supervised method that, in addition to supervised learning on labeled training images, learns to predict segmentations consistent under a given class of transformations on both labeled and unlabeled images. More specifically, in this work we explore learning equivariance to elastic deformations. We implement this through: (1) a Siamese architecture with two identical branches, each of which receives a differently transformed image, and (2) a composite loss function with a supervised segmentation loss term and an unsupervised term that encourages segmentation consistency between the predictions of the two branches. We evaluate the method on a public dataset of chest radiographs with segmentations of anatomical structures using 5-fold cross-validation. The proposed method reaches significantly higher segmentation accuracy compared to supervised learning. This is due to learning transformation consistency on both labeled and unlabeled images, with the latter contributing the most. We achieve the performance comparable to state-of-the-art chest X-ray segmentation methods while using substantially fewer labeled images.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The scarcity of labeled data often limits the application of supervised deep learning techniques for medical image segmentation. This has motivated the development of semi-supervised techniques that learn from a mixture of labeled and unlabeled images. In this paper, we propose a novel semi-supervised method that, in addition to supervised learning on labeled training images, learns to predict segmentations consistent under a given class of transformations on both labeled and unlabeled images. More specifically, in this work we explore learning equivariance to elastic deformations. We implement this through: (1) a Siamese architecture with two identical branches, each of which receives a differently transformed image, and (2) a composite loss function with a supervised segmentation loss term and an unsupervised term that encourages segmentation consistency between the predictions of the two branches. We evaluate the method on a public dataset of chest radiographs with segmentations of anatomical structures using 5-fold cross-validation. The proposed method reaches significantly higher segmentation accuracy compared to supervised learning. This is due to learning transformation consistency on both labeled and unlabeled images, with the latter contributing the most. We achieve the performance comparable to state-of-the-art chest X-ray segmentation methods while using substantially fewer labeled images. |
Kimberlin MH van Wijnen, Florian Dubost, Pinar Yilmaz, M Arfan Ikram, Wiro J Niessen, Hieab Adams, Meike W Vernooij, Marleen de Bruijne Automated lesion detection by regressing intensity-based distance with a neural network Inproceedings International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 234-242, Springer, Cham, 2019. @inproceedings{Wijnen2019, title = {Automated lesion detection by regressing intensity-based distance with a neural network}, author = {Kimberlin MH van Wijnen, Florian Dubost, Pinar Yilmaz, M Arfan Ikram, Wiro J Niessen, Hieab Adams, Meike W Vernooij, Marleen de Bruijne}, url = {https://arxiv.org/pdf/1907.12452.pdf}, doi = {https://doi.org/10.1007/978-3-030-32251-9_26}, year = {2019}, date = {2019-10-13}, booktitle = {International Conference on Medical Image Computing and Computer-Assisted Intervention}, pages = {234-242}, publisher = {Springer, Cham}, abstract = {Localization of focal vascular lesions on brain MRI is an important component of research on the etiology of neurological disorders. However, manual annotation of lesions can be challenging, time-consuming and subject to observer bias. Automated detection methods often need voxel-wise annotations for training. We propose a novel approach for automated lesion detection that can be trained on scans only annotated with a dot per lesion instead of a full segmentation. From the dot annotations and their corresponding intensity images we compute various distance maps (DMs), indicating the distance to a lesion based on spatial distance, intensity distance, or both. We train a fully convolutional neural network (FCN) to predict these DMs for unseen intensity images. The local optima in the predicted DMs are expected to correspond to lesion locations. We show the potential of this approach to detect enlarged perivascular spaces in white matter on a large brain MRI dataset with an independent test set of 1000 scans. Our method matches the intra-rater performance of the expert rater that was computed on an independent set. We compare the different types of distance maps, showing that incorporating intensity information in the distance maps used to train an FCN greatly improves performance. }, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Localization of focal vascular lesions on brain MRI is an important component of research on the etiology of neurological disorders. However, manual annotation of lesions can be challenging, time-consuming and subject to observer bias. Automated detection methods often need voxel-wise annotations for training. We propose a novel approach for automated lesion detection that can be trained on scans only annotated with a dot per lesion instead of a full segmentation. From the dot annotations and their corresponding intensity images we compute various distance maps (DMs), indicating the distance to a lesion based on spatial distance, intensity distance, or both. We train a fully convolutional neural network (FCN) to predict these DMs for unseen intensity images. The local optima in the predicted DMs are expected to correspond to lesion locations. We show the potential of this approach to detect enlarged perivascular spaces in white matter on a large brain MRI dataset with an independent test set of 1000 scans. Our method matches the intra-rater performance of the expert rater that was computed on an independent set. We compare the different types of distance maps, showing that incorporating intensity information in the distance maps used to train an FCN greatly improves performance. |
Conferences |
Allison M. Onken, Suzanne Wetstein, Michael Pyle, Josien Pluim, Stuart J. Schnitt, Gabrielle M Baker, Laura C. Collins, Rulla Tamimi, Mitko Veta, Yujing Jan Heng Deep Learning Networks to Segment and Detect Breast Terminal Duct Lobular Units, Acini, and Adipose Tissue: A Step Toward the Automated Analysis of Lobular Involution as a Marker for Breast Cancer Risk Conference United States and Canadian Academy of Pathology (USCAP), 2019. @conference{OnkenWetstein2019, title = {Deep Learning Networks to Segment and Detect Breast Terminal Duct Lobular Units, Acini, and Adipose Tissue: A Step Toward the Automated Analysis of Lobular Involution as a Marker for Breast Cancer Risk}, author = {Allison M. Onken, Suzanne Wetstein, Michael Pyle, Josien Pluim, Stuart J. Schnitt, Gabrielle M Baker, Laura C. Collins, Rulla Tamimi, Mitko Veta, Yujing Jan Heng}, year = {2019}, date = {2019-03-16}, booktitle = {United States and Canadian Academy of Pathology (USCAP)}, abstract = {Background: Terminal duct lobular unit (TDLU) involution is the physiological process whereby Type 2 and 3 lobules revert to Type 1 after child-bearing years. TDLU involution (quantitatively assessed by TDLU count per mm2, TDLU span, and acini count per TDLU) is inversely associated with breast cancer risk. The manual assessment of involution is time-consuming and subjective, making it impractical to perform on large epidemiological studies. Deep learning algorithms such as convolutional neural networks (CNNs) could be utilized for rapid and automated assessment of TDLU involution. We designed two CNNs to segment TDLUs and detect acini as the first step toward large-scale assessment of TDLU involution, and a third CNN to segment adipose tissue. Design: Whole slide images (WSIs; n=50) were obtained from the Nurses’ Health Study Incident Benign Breast Disease Study. For each WSI, TDLUs, acini, and adipose tissue were annotated within a region of interest comprising approximately 10% of the total tissue area. In order to assess involution in histologically normal breast parenchyma only, TDLUs with proliferative or metaplastic changes were excluded from manual evaluation. CNNs were engineered to recognize TDLUs, acini, and adipose tissue using 60% of the WSIs for training, 20% as a test set, and 20% for validation. F1 and Dice scores were calculated as accuracy measures to compare CNN segmentation to manual assessment. Results: Our CNNs detected acini, segmented TDLUs, and segmented adipose tissue with accuracy measures of 0.73, 0.84, and 0.86, respectively. Two primary causes of discordance with manual assessment were identified: 1) complex clustering of TDLUs where our CNN had difficulty predicting TDLU boundaries and 2) acini with proliferative or metaplastic changes which our CNN frequently detected as acini but which were intentionally excluded from manual annotation. Conclusion: We have developed a series of deep learning networks to segment and detect TDLUs, acini, and adipose tissue on WSIs. With accuracy measures of >0.7, our CNNs are sufficiently robust to be integrated into a computational pipeline for automated assessment of the quantitative features of TDLU involution, and will be further refined to address sources of discordance with manual assessment. This is the first step toward the large-scale quantification of TDLU involution which, when applied to patient samples, could be used to better determine the breast cancer risk associated with lobule type and degree of involution.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Background: Terminal duct lobular unit (TDLU) involution is the physiological process whereby Type 2 and 3 lobules revert to Type 1 after child-bearing years. TDLU involution (quantitatively assessed by TDLU count per mm2, TDLU span, and acini count per TDLU) is inversely associated with breast cancer risk. The manual assessment of involution is time-consuming and subjective, making it impractical to perform on large epidemiological studies. Deep learning algorithms such as convolutional neural networks (CNNs) could be utilized for rapid and automated assessment of TDLU involution. We designed two CNNs to segment TDLUs and detect acini as the first step toward large-scale assessment of TDLU involution, and a third CNN to segment adipose tissue. Design: Whole slide images (WSIs; n=50) were obtained from the Nurses’ Health Study Incident Benign Breast Disease Study. For each WSI, TDLUs, acini, and adipose tissue were annotated within a region of interest comprising approximately 10% of the total tissue area. In order to assess involution in histologically normal breast parenchyma only, TDLUs with proliferative or metaplastic changes were excluded from manual evaluation. CNNs were engineered to recognize TDLUs, acini, and adipose tissue using 60% of the WSIs for training, 20% as a test set, and 20% for validation. F1 and Dice scores were calculated as accuracy measures to compare CNN segmentation to manual assessment. Results: Our CNNs detected acini, segmented TDLUs, and segmented adipose tissue with accuracy measures of 0.73, 0.84, and 0.86, respectively. Two primary causes of discordance with manual assessment were identified: 1) complex clustering of TDLUs where our CNN had difficulty predicting TDLU boundaries and 2) acini with proliferative or metaplastic changes which our CNN frequently detected as acini but which were intentionally excluded from manual annotation. Conclusion: We have developed a series of deep learning networks to segment and detect TDLUs, acini, and adipose tissue on WSIs. With accuracy measures of >0.7, our CNNs are sufficiently robust to be integrated into a computational pipeline for automated assessment of the quantitative features of TDLU involution, and will be further refined to address sources of discordance with manual assessment. This is the first step toward the large-scale quantification of TDLU involution which, when applied to patient samples, could be used to better determine the breast cancer risk associated with lobule type and degree of involution. |
C. González-Gonzalo, B. Liefers, A. Vaidyanathan, H. J. van Zeeland, C. C. W. Klaver, C. I. Sánchez Opening the “black box” of deep learning in automated screening of eye diseases Conference Association for Research in Vision and Ophthalmology Annual Meeting. ARVO Vancouver, 2019. @conference{Gonz2019a, title = {Opening the “black box” of deep learning in automated screening of eye diseases}, author = {C. González-Gonzalo, B. Liefers, A. Vaidyanathan, H. J. van Zeeland, C. C. W. Klaver, C. I. Sánchez}, url = {https://iovs.arvojournals.org/article.aspx?articleid=2746850&resultClick=1}, year = {2019}, date = {2019-04-30}, booktitle = {Association for Research in Vision and Ophthalmology Annual Meeting. ARVO Vancouver}, abstract = {Purpose: Systems based on deep learning (DL) have demonstrated to provide a scalable and high-performance solution for screening of eye diseases. However, DL is usually considered a “black box” due to lack of interpretability. We propose a deep visualization framework to explain the decisions made by a DL system, iteratively unveiling abnormalities responsible for referable predictions without needing lesion-level annotations. We apply the framework to automated screening of diabetic retinopathy (DR) in color fundus images (CFIs). Methods: The proposed framework consists of a baseline deep convolutional neural network to classify CFIs by DR stage. For each CFI classified as referable DR, the framework extracts initial visual evidence of the predicted stage by computing a saliency map, which indicates regions in the image that would contribute the most to changes in the prediction if modified. This provides localization of abnormalities that are then removed through selective inpainting. The image is again classified, expecting reduced referability. We iteratively apply this procedure to increase attention to less discriminative areas and generate refined visual evidence. The Kaggle DR database, with CFIs graded regarding DR severity (stages 0 and 1: non-referable DR, stages 2 to 4: referable DR), is used for training and validation of the image-level classification task. For validation of the obtained visual evidence, we used the DiaretDB1 dataset, which contains CFIs with manually-delineated areas for 4 types of lesions: hemorrhages, microaneurysms, hard and soft exudates. Results: The baseline classifier obtained an area under the Receiver Operating Characteristic (ROC) curve of 0.93 and a quadratic weighted kappa of 0.77 on the Kaggle test set (53576 CFIs). Free-response ROC (FROC) curves (Figure 2) analyze the correspondence between highlighted areas and each type of lesion for those images classified as referable DR in the DiaretDB1 dataset (62 CFIs), comparing between initial and refined visual evidence. Conclusions: The proposed framework provides visual evidence for the decisions made by a DL system, iteratively unveiling abnormalities in CFIs based on the prediction of a classifier trained only with image-level labels. This provides a “key” to open the “black box” of artificial intelligence in screening of eye diseases, aiming to increase experts’ trust and facilitate its integration in screening settings.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Purpose: Systems based on deep learning (DL) have demonstrated to provide a scalable and high-performance solution for screening of eye diseases. However, DL is usually considered a “black box” due to lack of interpretability. We propose a deep visualization framework to explain the decisions made by a DL system, iteratively unveiling abnormalities responsible for referable predictions without needing lesion-level annotations. We apply the framework to automated screening of diabetic retinopathy (DR) in color fundus images (CFIs). Methods: The proposed framework consists of a baseline deep convolutional neural network to classify CFIs by DR stage. For each CFI classified as referable DR, the framework extracts initial visual evidence of the predicted stage by computing a saliency map, which indicates regions in the image that would contribute the most to changes in the prediction if modified. This provides localization of abnormalities that are then removed through selective inpainting. The image is again classified, expecting reduced referability. We iteratively apply this procedure to increase attention to less discriminative areas and generate refined visual evidence. The Kaggle DR database, with CFIs graded regarding DR severity (stages 0 and 1: non-referable DR, stages 2 to 4: referable DR), is used for training and validation of the image-level classification task. For validation of the obtained visual evidence, we used the DiaretDB1 dataset, which contains CFIs with manually-delineated areas for 4 types of lesions: hemorrhages, microaneurysms, hard and soft exudates. Results: The baseline classifier obtained an area under the Receiver Operating Characteristic (ROC) curve of 0.93 and a quadratic weighted kappa of 0.77 on the Kaggle test set (53576 CFIs). Free-response ROC (FROC) curves (Figure 2) analyze the correspondence between highlighted areas and each type of lesion for those images classified as referable DR in the DiaretDB1 dataset (62 CFIs), comparing between initial and refined visual evidence. Conclusions: The proposed framework provides visual evidence for the decisions made by a DL system, iteratively unveiling abnormalities in CFIs based on the prediction of a classifier trained only with image-level labels. This provides a “key” to open the “black box” of artificial intelligence in screening of eye diseases, aiming to increase experts’ trust and facilitate its integration in screening settings. |
J. Engelberts, C. González-Gonzalo, C. I. Sanchez, M. van Grinsven Association for Research in Vision and Ophthalmology Annual Meeting. ARVO Vancouver, 2019. @conference{Engelberts2019a, title = {Automatic Segmentation of Drusen and Exudates on Color Fundus Images using Generative Adversarial Networks}, author = {J. Engelberts, C. González-Gonzalo, C. I. Sanchez, M. van Grinsven}, url = {https://iovs.arvojournals.org/article.aspx?articleid=2745936&resultClick=1}, year = {2019}, date = {2019-04-30}, booktitle = {Association for Research in Vision and Ophthalmology Annual Meeting. ARVO Vancouver}, abstract = {Purpose: The presence of drusen and exudates, visible as bright lesions on color fundus images, is one of the early signs of visual threatening diseases such as Age-related Macular Degeneration and Diabetic Retinopathy. Accurate detection and quantification of these lesions during screening can help identify patients that would benefit from treatment. We developed a method based on generative adversarial networks (GANs) to segment bright lesions on color fundus images. Methods: We used 4179 color fundus images that were acquired during clinical routine. The images were contrast enhanced to increase the contrast between bright lesions and the background. All bright lesions were manually annotated by marking the center point of the lesions. The GAN was trained to estimate the image without bright lesions. The final segmentation was obtained by taking the difference between the input image and the estimated output. Results: This method was applied to an independent test set of 52 color fundus images with non-advanced stages of AMD from the European Genetic Database, which were fully segmented for bright lesions by two trained human observers. The method achieved Dice scores of 0.4862 and 0.4849 when compared to the observers, whereas the inter-observer Dice score was 0.5043. The total segmented bright lesion area per image was evaluated using the intraclass correlation (ICC). The method scored 0.8537 and 0.8352 when compared to the observers, whereas the inter-observer ICC was 0.8893. Conclusions: The results show the performance is close to the agreement between trained observers. This automatic segmentation of bright lesions can help early diagnosis of visual threatening diseases and opens the way for large scale clinical trials.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Purpose: The presence of drusen and exudates, visible as bright lesions on color fundus images, is one of the early signs of visual threatening diseases such as Age-related Macular Degeneration and Diabetic Retinopathy. Accurate detection and quantification of these lesions during screening can help identify patients that would benefit from treatment. We developed a method based on generative adversarial networks (GANs) to segment bright lesions on color fundus images. Methods: We used 4179 color fundus images that were acquired during clinical routine. The images were contrast enhanced to increase the contrast between bright lesions and the background. All bright lesions were manually annotated by marking the center point of the lesions. The GAN was trained to estimate the image without bright lesions. The final segmentation was obtained by taking the difference between the input image and the estimated output. Results: This method was applied to an independent test set of 52 color fundus images with non-advanced stages of AMD from the European Genetic Database, which were fully segmented for bright lesions by two trained human observers. The method achieved Dice scores of 0.4862 and 0.4849 when compared to the observers, whereas the inter-observer Dice score was 0.5043. The total segmented bright lesion area per image was evaluated using the intraclass correlation (ICC). The method scored 0.8537 and 0.8352 when compared to the observers, whereas the inter-observer ICC was 0.8893. Conclusions: The results show the performance is close to the agreement between trained observers. This automatic segmentation of bright lesions can help early diagnosis of visual threatening diseases and opens the way for large scale clinical trials. |
S. Bruns, J.M. Wolterink, R.W. van Hamersvelt, T. Leiner, I. Išgum CNN-based segmentation of the cardiac chambers and great vessels in non-contrast-enhanced cardiac CT Conference Medical Imaging with Deep Learning. MIDL London, 2019. @conference{Bruns2019b, title = {CNN-based segmentation of the cardiac chambers and great vessels in non-contrast-enhanced cardiac CT}, author = {S. Bruns, J.M. Wolterink, R.W. van Hamersvelt, T. Leiner, I. Išgum}, url = {https://openreview.net/forum?id=SJeqoqAaFV}, year = {2019}, date = {2019-07-08}, booktitle = {Medical Imaging with Deep Learning. MIDL London}, abstract = {Quantication of cardiac structures in non-contrast CT (NCCT) could improve cardiovascular risk stratication. However, setting a manual reference to train a fully convolutional network (FCN) for automatic segmentation of NCCT images is hardly feasible, and an FCN trained on coronary CT angiography (CCTA) images would not generalize to NCCT. Therefore, we propose to train an FCN with virtual non-contrast (VNC) images from a dual-layer detector CT scanner and a reference standard obtained on perfectly aligned CCTA images.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Quantication of cardiac structures in non-contrast CT (NCCT) could improve cardiovascular risk stratication. However, setting a manual reference to train a fully convolutional network (FCN) for automatic segmentation of NCCT images is hardly feasible, and an FCN trained on coronary CT angiography (CCTA) images would not generalize to NCCT. Therefore, we propose to train an FCN with virtual non-contrast (VNC) images from a dual-layer detector CT scanner and a reference standard obtained on perfectly aligned CCTA images. |
Nikolas Lessmann, Jelmer M. Wolterink, Majd Zreik, Max A. Viergever, Bram van Ginneken, Ivana Išgum Vertebra partitioning with thin-plate spline surfaces steered by a convolutional neural network Conference Medical Imaging with Deep Learning. MIDL London, 2019. @conference{Less19c, title = {Vertebra partitioning with thin-plate spline surfaces steered by a convolutional neural network}, author = {Nikolas Lessmann, Jelmer M. Wolterink, Majd Zreik, Max A. Viergever, Bram van Ginneken, Ivana Išgum}, url = {https://openreview.net/forum?id=B1eQv5INqV}, year = {2019}, date = {2019-07-08}, booktitle = {Medical Imaging with Deep Learning. MIDL London}, abstract = {Thin-plate splines can be used for interpolation of image values, but can also be used to represent a smooth surface, such as the boundary between two structures. We present a method for partitioning vertebra segmentation masks into two substructures, the vertebral body and the posterior elements, using a convolutional neural network that predicts the boundary between the two structures. This boundary is modeled as a thin-plate spline surface dened by a set of control points predicted by the network. The neural network is trained using the reconstruction error of a convolutional autoencoder to enable the use of unpaired data.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Thin-plate splines can be used for interpolation of image values, but can also be used to represent a smooth surface, such as the boundary between two structures. We present a method for partitioning vertebra segmentation masks into two substructures, the vertebral body and the posterior elements, using a convolutional neural network that predicts the boundary between the two structures. This boundary is modeled as a thin-plate spline surface dened by a set of control points predicted by the network. The neural network is trained using the reconstruction error of a convolutional autoencoder to enable the use of unpaired data. |
Julia M.H. Noothout, Bob D. de Vos, Jelmer M. Wolterink, Richard A.P. Takx, Tim Leiner, Ivana Išgum Deep Learning for Automatic Landmark Localization in CTA for Transcatheter Aortic Valve Implantation Conference Radiological Society of North America, 105th Annual Meeting, 2019. @conference{Noothout2019, title = {Deep Learning for Automatic Landmark Localization in CTA for Transcatheter Aortic Valve Implantation}, author = {Julia M.H. Noothout, Bob D. de Vos, Jelmer M. Wolterink, Richard A.P. Takx, Tim Leiner, Ivana Išgum}, url = {http://dlmedia.eu/landmarks_rsna2019_final-3/}, year = {2019}, date = {2019-12-03}, booktitle = {Radiological Society of North America, 105th Annual Meeting}, abstract = {PURPOSE Fast and accurate automatic landmark localization in CT angiography (CTA) scans can aid treatment planning for patients undergoing transcatheter aortic valve implantation (TAVI). Manual localization of landmarks can be time-consuming and cumbersome. Automatic landmark localization can potentially reduce post-processing time and interobserver variability. Hence, this study evaluates the performance of deep learning for automatic aortic root landmark localization in CTA. METHOD AND MATERIALS This study included 672 retrospectively gated CTA scans acquired as part of clinical routine (Philips Brilliance iCT-256 scanner, 0.9mm slice thickness, 0.45mm increment, 80-140kVp, 210-300mAs, contrast). Reference standard was defined by manual localization of the left (LH), non-coronary (NCH) and right (RH) aortic valve hinge points, and the right (RO) and left (LO) coronary ostia. To develop and evaluate the automatic method, 412 training, 60 validation, and 200 test CTAs were randomly selected. 100/200 test CTAs were annotated twice by the same observer and once by a second observer to estimate intra- and interobserver agreement. Five CNNs with identical architectures were trained, one for the localization of each landmark. For treatment planning of TAVI, distances between landmark points are used, hence performance was evaluated on subvoxel level with the Euclidean distance between reference and automatically predicted landmark locations. RESULTS Median (IQR) distance errors for the LH, NCH and RH were 2.44 (1.79), 3.01 (1.82) and 2.98 (2.09)mm, respectively. Repeated annotation of the first observer led to distance errors of 2.06 (1.43), 2.57 (2.22) and 2.58 (2.30)mm, and for the second observer to 1.80 (1.32), 1.99 (1.28) and 1.81 (1.68)mm, respectively. Median (IQR) distance errors for the RO and LO were 1.65 (1.33) and 1.91 (1.58)mm, respectively. Repeated annotation of the first observer led to distance errors of 1.43 (1.05) and 1.92 (1.44)mm, and for the second observer to 1.78 (1.55) and 2.35 (1.56)mm, respectively. On average, analysis took 0.3s/CTA. CONCLUSION Automatic landmark localization in CTA approaches second observer performance and thus enables automatic, accurate and reproducible landmark localization without additional reading time. CLINICAL RELEVANCE/APPLICATION Automatic landmark localization in CTA can aid in reducing post-processing time and interobserver variability in treatment planning for patients undergoing TAVI.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } PURPOSE Fast and accurate automatic landmark localization in CT angiography (CTA) scans can aid treatment planning for patients undergoing transcatheter aortic valve implantation (TAVI). Manual localization of landmarks can be time-consuming and cumbersome. Automatic landmark localization can potentially reduce post-processing time and interobserver variability. Hence, this study evaluates the performance of deep learning for automatic aortic root landmark localization in CTA. METHOD AND MATERIALS This study included 672 retrospectively gated CTA scans acquired as part of clinical routine (Philips Brilliance iCT-256 scanner, 0.9mm slice thickness, 0.45mm increment, 80-140kVp, 210-300mAs, contrast). Reference standard was defined by manual localization of the left (LH), non-coronary (NCH) and right (RH) aortic valve hinge points, and the right (RO) and left (LO) coronary ostia. To develop and evaluate the automatic method, 412 training, 60 validation, and 200 test CTAs were randomly selected. 100/200 test CTAs were annotated twice by the same observer and once by a second observer to estimate intra- and interobserver agreement. Five CNNs with identical architectures were trained, one for the localization of each landmark. For treatment planning of TAVI, distances between landmark points are used, hence performance was evaluated on subvoxel level with the Euclidean distance between reference and automatically predicted landmark locations. RESULTS Median (IQR) distance errors for the LH, NCH and RH were 2.44 (1.79), 3.01 (1.82) and 2.98 (2.09)mm, respectively. Repeated annotation of the first observer led to distance errors of 2.06 (1.43), 2.57 (2.22) and 2.58 (2.30)mm, and for the second observer to 1.80 (1.32), 1.99 (1.28) and 1.81 (1.68)mm, respectively. Median (IQR) distance errors for the RO and LO were 1.65 (1.33) and 1.91 (1.58)mm, respectively. Repeated annotation of the first observer led to distance errors of 1.43 (1.05) and 1.92 (1.44)mm, and for the second observer to 1.78 (1.55) and 2.35 (1.56)mm, respectively. On average, analysis took 0.3s/CTA. CONCLUSION Automatic landmark localization in CTA approaches second observer performance and thus enables automatic, accurate and reproducible landmark localization without additional reading time. CLINICAL RELEVANCE/APPLICATION Automatic landmark localization in CTA can aid in reducing post-processing time and interobserver variability in treatment planning for patients undergoing TAVI. |
Christina I. Luffman, Suzanne C. Wetstein, Allison M. Onken, Michael E. Pyle, Kevin H. Kensler, Ying Liu, Josien P. Pluim, Mitko Veta, Stuart J. Schnitt, Rulla M. Tamimi, Gabrielle M. Baker, Laura C. Collins, Yu Jing Heng Assessing Breast Terminal Duct Lobular Unit Involution: A Computational Pathology Approach Conference Abstracts and Case Studies From the College of American Pathologists 2019 Annual Meeting (CAP19), 143 (9), Archives of Pathology & Laboratory Medicine, 2019. @conference{https://doi.org/10.5858/arpa.2019-0901-AB, title = {Assessing Breast Terminal Duct Lobular Unit Involution: A Computational Pathology Approach}, author = {Christina I. Luffman, Suzanne C. Wetstein, Allison M. Onken, Michael E. Pyle, Kevin H. Kensler, Ying Liu, Josien P. Pluim, Mitko Veta, Stuart J. Schnitt, Rulla M. Tamimi, Gabrielle M. Baker, Laura C. Collins, Yu Jing Heng}, doi = {10.5858/arpa.2019-0901-AB}, year = {2019}, date = {2019-09-01}, booktitle = {Abstracts and Case Studies From the College of American Pathologists 2019 Annual Meeting (CAP19)}, volume = {143}, number = {9}, pages = {e2-e226}, publisher = {Archives of Pathology & Laboratory Medicine}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
PhD Theses |
R.W. van Hamersvelt New dimensions in cardiovascular CT PhD Thesis Utrecht University, The Netherlands, 2019, ISBN: 978-90-393-7092-6. @phdthesis{vanHamersvelt2019b, title = {New dimensions in cardiovascular CT}, author = {R.W. van Hamersvelt}, isbn = {978-90-393-7092-6}, year = {2019}, date = {2019-03-14}, school = {Utrecht University, The Netherlands}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
2018
Inproceedings |
J.M.H. Noothout, B.D de Vos, J.M. Wolterink, T. Leiner, I. Isgum CNN-based Landmark Detection in Cardiac CTA Scans Inproceedings Medical Imaging with Deep Learning. MIDL Amsterdam, 2018. @inproceedings{Noothout2018b, title = {CNN-based Landmark Detection in Cardiac CTA Scans}, author = {J.M.H. Noothout, B.D de Vos, J.M. Wolterink, T. Leiner, I. Isgum}, url = {https://openreview.net/forum?id=r1malb3jz}, year = {2018}, date = {2018-05-20}, booktitle = {Medical Imaging with Deep Learning. MIDL Amsterdam}, abstract = {Fast and accurate anatomical landmark detection can benefit many medical image analysis methods. Here, we propose a method to automatically detect anatomical landmarks in medical images. Automatic landmark detection is performed with a patch-based fully convolutional neural network (FCNN) that combines regression and classification. For any given image patch, regression is used to predict the 3D displacement vector from the image patch to the landmark. Simultaneously, classification is used to identify patches that contain the landmark. Under the assumption that patches close to a landmark can determine the landmark location more precisely than patches further from it, only those patches that contain the landmark according to classification are used to determine the landmark location. The landmark location is obtained by calculating the average landmark location using the computed 3D displacement vectors. The method is evaluated using detection of six clinically relevant landmarks in coronary CT angiography (CCTA) scans : the right and left ostium, the bifurcation of the left main coronary artery (LM) into the left anterior descending and the left circumflex artery, and the origin of the right, non-coronary, and left aortic valve commissure. The proposed method achieved an average Euclidean distance error of 2.19 mm and 2.88 mm for the right and left ostium respectively, 3.78 mm for the bifurcation of the LM, and 1.82 mm, 2.10 mm and 1.89 mm for the origin of the right, non-coronary, and left aortic valve commissure respectively, demonstrating accurate performance. The proposed combination of regression and classification can be used to accurately detect landmarks in CCTA scans.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Fast and accurate anatomical landmark detection can benefit many medical image analysis methods. Here, we propose a method to automatically detect anatomical landmarks in medical images. Automatic landmark detection is performed with a patch-based fully convolutional neural network (FCNN) that combines regression and classification. For any given image patch, regression is used to predict the 3D displacement vector from the image patch to the landmark. Simultaneously, classification is used to identify patches that contain the landmark. Under the assumption that patches close to a landmark can determine the landmark location more precisely than patches further from it, only those patches that contain the landmark according to classification are used to determine the landmark location. The landmark location is obtained by calculating the average landmark location using the computed 3D displacement vectors. The method is evaluated using detection of six clinically relevant landmarks in coronary CT angiography (CCTA) scans : the right and left ostium, the bifurcation of the left main coronary artery (LM) into the left anterior descending and the left circumflex artery, and the origin of the right, non-coronary, and left aortic valve commissure. The proposed method achieved an average Euclidean distance error of 2.19 mm and 2.88 mm for the right and left ostium respectively, 3.78 mm for the bifurcation of the LM, and 1.82 mm, 2.10 mm and 1.89 mm for the origin of the right, non-coronary, and left aortic valve commissure respectively, demonstrating accurate performance. The proposed combination of regression and classification can be used to accurately detect landmarks in CCTA scans. |
J.M. Wolterink, T. Leiner, I. Isgum Blood vessel geometry synthesis using generative adversarial networks Inproceedings Medical Imaging with Deep Learning. MIDL Amsterdam, 2018. @inproceedings{Wolterink2018b, title = {Blood vessel geometry synthesis using generative adversarial networks}, author = {J.M. Wolterink, T. Leiner, I. Isgum}, url = {https://openreview.net/forum?id=SJ4N7isiG}, year = {2018}, date = {2018-05-20}, booktitle = {Medical Imaging with Deep Learning. MIDL Amsterdam}, abstract = {Computationally synthesized blood vessels can be used for training and evaluationof medical image analysis applications. We propose a deep generative model to synthesize blood vessel geometries, with an application to coronary arteries in cardiac CT angiography (CCTA). In the proposed method, a Wasserstein generative adversarial network (GAN) consisting of a generator and a discriminator network is trained. While the generator tries to synthesize realistic blood vessel geometries, the discriminator tries to distinguish synthesized geometries from those of real blood vessels. Both real and synthesized blood vessel geometries are parametrized as 1D signals based on the central vessel axis. The generator can optionally be provided with an attribute vector to synthesize vessels with particular characteristics. The GAN was optimized using a reference database with parametrizations of 4,412 real coronary artery geometries extracted from CCTA scans. After training, plausible coronary artery geometries could be synthesized based on random vectors sampled from a latent space. A qualitative analysis showed strong similarities between real and synthesized coronary arteries. A detailed analysis of the latent space showed that the diversity present in coronary artery anatomy was accurately captured by the generator. Results show that Wasserstein generative adversarial networks can be used to synthesize blood vessel geometries.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Computationally synthesized blood vessels can be used for training and evaluationof medical image analysis applications. We propose a deep generative model to synthesize blood vessel geometries, with an application to coronary arteries in cardiac CT angiography (CCTA). In the proposed method, a Wasserstein generative adversarial network (GAN) consisting of a generator and a discriminator network is trained. While the generator tries to synthesize realistic blood vessel geometries, the discriminator tries to distinguish synthesized geometries from those of real blood vessels. Both real and synthesized blood vessel geometries are parametrized as 1D signals based on the central vessel axis. The generator can optionally be provided with an attribute vector to synthesize vessels with particular characteristics. The GAN was optimized using a reference database with parametrizations of 4,412 real coronary artery geometries extracted from CCTA scans. After training, plausible coronary artery geometries could be synthesized based on random vectors sampled from a latent space. A qualitative analysis showed strong similarities between real and synthesized coronary arteries. A detailed analysis of the latent space showed that the diversity present in coronary artery anatomy was accurately captured by the generator. Results show that Wasserstein generative adversarial networks can be used to synthesize blood vessel geometries. |
J. M. Tomczak, M. Ilse, M. Welling, M. Jansen, H.G. Coleman, M. Lucas, K. de Laat, M. de Bruin, H. Marquering, M. J. van der Wel, O. J. de Boer, C. D. Savci-Heijink, S. L. Meijer Histopathological classification of precursor lesions of esophageal adenocarcinoma: A Deep Multiple Instance Learning Approach Inproceedings Medical Imaging with Deep Learning. MIDL Amsterdam, 2018. @inproceedings{Tomczak2018, title = {Histopathological classification of precursor lesions of esophageal adenocarcinoma: A Deep Multiple Instance Learning Approach}, author = {J. M. Tomczak, M. Ilse, M. Welling, M. Jansen, H.G. Coleman, M. Lucas, K. de Laat, M. de Bruin, H. Marquering, M. J. van der Wel, O. J. de Boer, C. D. Savci-Heijink, S. L. Meijer}, url = {https://openreview.net/pdf?id=HyNf-UcsM}, year = {2018}, date = {2018-04-10}, booktitle = {Medical Imaging with Deep Learning. MIDL Amsterdam}, journal = {Medical Imaging with Deep Learning. MIDL Amsterdam}, abstract = {In this paper, we hypothesize that morphological properties of nuclei are crucial for classifying dysplastic changes. Therefore, we propose to represent a whole histopathology slide as a collection of smaller images containing patches of nuclei and adjacent tissue. For this purpose, we use a deep multiple instance learning approach. Within this framework we first embed patches in a low-dimensional space using convolutional and fully-connected layers. Next, we combine the low-dimensional embeddings using a multiple instance learning pooling operator and eventually we use fully-connected layers to provide a classification. We evaluate our approach on esophagus cancer histopathology dataset.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } In this paper, we hypothesize that morphological properties of nuclei are crucial for classifying dysplastic changes. Therefore, we propose to represent a whole histopathology slide as a collection of smaller images containing patches of nuclei and adjacent tissue. For this purpose, we use a deep multiple instance learning approach. Within this framework we first embed patches in a low-dimensional space using convolutional and fully-connected layers. Next, we combine the low-dimensional embeddings using a multiple instance learning pooling operator and eventually we use fully-connected layers to provide a classification. We evaluate our approach on esophagus cancer histopathology dataset. |
M. Ilse, J. M. Tomczak, M. Welling Attention-based deep multiple instance learning Inproceedings International Conference on Machine Learning. ICML, 2018. @inproceedings{Ilse2018, title = {Attention-based deep multiple instance learning}, author = {M. Ilse, J. M. Tomczak, M. Welling}, url = {https://arxiv.org/abs/1802.04712}, year = {2018}, date = {2018-02-13}, booktitle = {International Conference on Machine Learning. ICML}, abstract = {Multiple instance learning (MIL) is a variation of supervised learning where a single class label is assigned to a bag of instances. In this paper, we state the MIL problem as learning the Bernoulli distribution of the bag label where the bag label probability is fully parameterized by neural networks. Furthermore, we propose a neural network-based permutation-invariant aggregation operator that corresponds to the attention mechanism. Notably, an application of the proposed attention-based operator provides insight into the contribution of each instance to the bag label. We show empirically that our approach achieves comparable performance to the best MIL methods on benchmark MIL datasets and it outperforms other methods on a MNIST-based MIL dataset and two real-life histopathology datasets without sacrificing interpretability.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Multiple instance learning (MIL) is a variation of supervised learning where a single class label is assigned to a bag of instances. In this paper, we state the MIL problem as learning the Bernoulli distribution of the bag label where the bag label probability is fully parameterized by neural networks. Furthermore, we propose a neural network-based permutation-invariant aggregation operator that corresponds to the attention mechanism. Notably, an application of the proposed attention-based operator provides insight into the contribution of each instance to the bag label. We show empirically that our approach achieves comparable performance to the best MIL methods on benchmark MIL datasets and it outperforms other methods on a MNIST-based MIL dataset and two real-life histopathology datasets without sacrificing interpretability. |
G. Bortsova, F. Dubost, S. Ørting, I. Katramados, L. Hogeweg, L. Thomsen, M. Wille, M. de Bruijne Deep learning from label proportions for emphysema quantification Inproceedings International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 768–776, Springer, Cham, 2018. @inproceedings{Bortsova2018, title = {Deep learning from label proportions for emphysema quantification}, author = {G. Bortsova, F. Dubost, S. Ørting, I. Katramados, L. Hogeweg, L. Thomsen, M. Wille, M. de Bruijne}, url = {https://arxiv.org/pdf/1807.08601.pdf}, doi = {https://doi.org/10.1007/978-3-030-00934-2_85}, year = {2018}, date = {2018-09-26}, booktitle = {International Conference on Medical Image Computing and Computer-Assisted Intervention}, pages = {768--776}, publisher = {Springer, Cham}, abstract = {We propose an end-to-end deep learning method that learns to estimate emphysema extent from proportions of the diseased tissue. These proportions were visually estimated by experts using a standard grading system, in which grades correspond to intervals (label example: 1-5% of diseased tissue). The proposed architecture encodes the knowledge that the labels represent a volumetric proportion. A custom loss is designed to learn with intervals. Thus, during training, our network learns to segment the diseased tissue such that its proportions fit the ground truth intervals. Our architecture and loss combined improve the performance substantially (8% ICC) compared to a more conventional regression network. We outperform traditional lung densitometry and two recently published methods for emphysema quantification by a large margin (at least 7% AUC and 15% ICC), and achieve near-human-level performance. Moreover, our method generates emphysema segmentations that predict the spatial distribution of emphysema at human level. }, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We propose an end-to-end deep learning method that learns to estimate emphysema extent from proportions of the diseased tissue. These proportions were visually estimated by experts using a standard grading system, in which grades correspond to intervals (label example: 1-5% of diseased tissue). The proposed architecture encodes the knowledge that the labels represent a volumetric proportion. A custom loss is designed to learn with intervals. Thus, during training, our network learns to segment the diseased tissue such that its proportions fit the ground truth intervals. Our architecture and loss combined improve the performance substantially (8% ICC) compared to a more conventional regression network. We outperform traditional lung densitometry and two recently published methods for emphysema quantification by a large margin (at least 7% AUC and 15% ICC), and achieve near-human-level performance. Moreover, our method generates emphysema segmentations that predict the spatial distribution of emphysema at human level. |
Conferences |
J. van Vugt, E. Marchiori, R. Mann, A. Gubern-Mérida, N. Moriakov; J. Teuwen. Vendor-independent soft tissue lesion detection using weakly supervised and unsupervised adversarial domain adaptation Conference 2018. @conference{vugt18, title = {Vendor-independent soft tissue lesion detection using weakly supervised and unsupervised adversarial domain adaptation}, author = {J. van Vugt, E. Marchiori, R. Mann, A. Gubern-Mérida, N. Moriakov and J. Teuwen.}, year = {2018}, date = {2018-08-24}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
E. Sogancioglu, S. Hu, D. Belli; B. van Ginneken. Chest X-ray Inpainting with Deep Generative Models Conference arXiv:1809.01471, 2018. @conference{soga18, title = {Chest X-ray Inpainting with Deep Generative Models}, author = {E. Sogancioglu, S. Hu, D. Belli and B. van Ginneken. }, year = {2018}, date = {2018-08-29}, publisher = {arXiv:1809.01471}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
C. González-Gonzalo, B. Liefers, B. van Ginneken, C. I. Sánchez Improving weakly-supervised lesion localization with iterative saliency map refinement Conference Medical Imaging with Deep Learning. MIDL Amsterdam, 2018. @conference{Gonz2018, title = {Improving weakly-supervised lesion localization with iterative saliency map refinement}, author = {C. González-Gonzalo, B. Liefers, B. van Ginneken, C. I. Sánchez}, url = {https://openreview.net/forum?id=r15c8gnoG}, year = {2018}, date = {2018-05-20}, booktitle = {Medical Imaging with Deep Learning. MIDL Amsterdam}, abstract = {Interpretability of deep neural networks in medical imaging is becoming an important technique to understand network classification decisions and increase doctors' trust. Available methods for visual interpretation, though, tend to highlight only the most discriminant areas, which is suboptimal for clinical output. We propose a novel deep visualization framework for improving weakly-supervised lesion localization. The framework applies an iterative approach where, in each step, the interpretation maps focus on different, less discriminative areas of the images, but still important for the final classification, reaching a more refined localization of abnormalities. We evaluate the performance of the method for the localization of diabetic retinopathy lesions in color fundus images. The results show the obtained visualization maps are able to detect more lesions after the iterative procedure in the case of more severely affected retinas.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Interpretability of deep neural networks in medical imaging is becoming an important technique to understand network classification decisions and increase doctors' trust. Available methods for visual interpretation, though, tend to highlight only the most discriminant areas, which is suboptimal for clinical output. We propose a novel deep visualization framework for improving weakly-supervised lesion localization. The framework applies an iterative approach where, in each step, the interpretation maps focus on different, less discriminative areas of the images, but still important for the final classification, reaching a more refined localization of abnormalities. We evaluate the performance of the method for the localization of diabetic retinopathy lesions in color fundus images. The results show the obtained visualization maps are able to detect more lesions after the iterative procedure in the case of more severely affected retinas. |
2017
Inproceedings |
J. M. Tomczak, M. Ilse, M. Welling Deep Learning with Permutation-invariant Operator for Multiple-instance Histopathology Classification Inproceedings Medical Imaging meets NIPS Workshop, 2017. @inproceedings{Tomczak2017, title = {Deep Learning with Permutation-invariant Operator for Multiple-instance Histopathology Classification}, author = {J. M. Tomczak, M. Ilse, M. Welling}, url = {https://arxiv.org/abs/1712.00310}, year = {2017}, date = {2017-12-01}, booktitle = {Medical Imaging meets NIPS Workshop}, abstract = {The computer-aided analysis of medical scans is a longstanding goal in the medical imaging field. Currently, deep learning has became a dominant methodology for supporting pathologists and radiologist. Deep learning algorithms have been successfully applied to digital pathology and radiology, nevertheless, there are still practical issues that prevent these tools to be widely used in practice. The main obstacles are low number of available cases and large size of images (a.k.a. the small n, large p problem in machine learning), and a very limited access to annotation at a pixel level that can lead to severe overfitting and large computational requirements. We propose to handle these issues by introducing a framework that processes a medical image as a collection of small patches using a single, shared neural network. The final diagnosis is provided by combining scores of individual patches using a permutation-invariant operator (combination). In machine learning community such approach is called a multi-instance learning (MIL).}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The computer-aided analysis of medical scans is a longstanding goal in the medical imaging field. Currently, deep learning has became a dominant methodology for supporting pathologists and radiologist. Deep learning algorithms have been successfully applied to digital pathology and radiology, nevertheless, there are still practical issues that prevent these tools to be widely used in practice. The main obstacles are low number of available cases and large size of images (a.k.a. the small n, large p problem in machine learning), and a very limited access to annotation at a pixel level that can lead to severe overfitting and large computational requirements. We propose to handle these issues by introducing a framework that processes a medical image as a collection of small patches using a single, shared neural network. The final diagnosis is provided by combining scores of individual patches using a permutation-invariant operator (combination). In machine learning community such approach is called a multi-instance learning (MIL). |