{"id":796,"date":"2021-12-19T21:12:27","date_gmt":"2021-12-19T13:12:27","guid":{"rendered":"https:\/\/511cvlab.sinkers.cn\/?p=796"},"modified":"2025-10-17T17:04:25","modified_gmt":"2025-10-17T09:04:25","slug":"sar","status":"publish","type":"post","link":"https:\/\/cv.nirc.top\/zh\/2021\/sar\/","title":{"rendered":"SAR: Spatial-Aware Regression for 3D Hand Pose and Mesh Reconstruction from a Monocular RGB Image"},"content":{"rendered":"<div class=\"wp-block-group has-global-padding is-layout-constrained wp-container-core-group-is-layout-f00c8009 wp-block-group-is-layout-constrained\" style=\"padding-right:5%;padding-left:5%\">\n<div class=\"wp-block-group has-global-padding is-layout-constrained wp-block-group-is-layout-constrained\">\n    <div\n        class=\"wp-block-buttons is-content-justification-center is-layout-flex wp-container-core-buttons-is-layout-1 wp-block-buttons-is-layout-flex\">\n        <div class=\"wp-block-button\" style=\"line-height: 1.5;\">\n            <a class=\"wp-block-button__link wp-element-button\"\n                href=\"https:\/\/ieeexplore.ieee.org\/document\/9583792\" target=\"_blank\"\n                style=\"padding-right: var(--wp--preset--spacing--40); padding-left: var(--wp--preset--spacing--40); display: flex; align-items: center; gap: 8px;\">\n                <div>\n                    <svg class=\"svg-inline--fa fa-file-pdf fa-w-12\" aria-hidden=\"true\" focusable=\"false\"\n                        data-prefix=\"fas\" data-icon=\"file-pdf\" role=\"img\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\"\n                        viewbox=\"0 0 384 512\" style=\"height: 1em; width: 1em;\">\n                        <path fill=\"#FFFFFF\"\n                            d=\"M181.9 256.1c-5-16-4.9-46.9-2-46.9 8.4 0 7.6 36.9 2 46.9zm-1.7 47.2c-7.7 20.2-17.3 43.3-28.4 62.7 18.3-7 39-17.2 62.9-21.9-12.7-9.6-24.9-23.4-34.5-40.8zM86.1 428.1c0 .8 13.2-5.4 34.9-40.2-6.7 6.3-29.1 24.5-34.9 40.2zM248 160h136v328c0 13.3-10.7 24-24 24H24c-13.3 0-24-10.7-24-24V24C0 10.7 10.7 0 24 0h200v136c0 13.2 10.8 24 24 24zm-8 171.8c-20-12.2-33.3-29-42.7-53.8 4.5-18.5 11.6-46.6 6.2-64.2-4.7-29.4-42.4-26.5-47.8-6.8-5 18.3-.4 44.1 8.1 77-11.6 27.6-28.7 64.6-40.8 85.8-.1 0-.1.1-.2.1-27.1 13.9-73.6 44.5-54.5 68 5.6 6.9 16 10 21.5 10 17.9 0 35.7-18 61.1-61.8 25.8-8.5 54.1-19.1 79-23.2 21.7 11.8 47.1 19.5 64 19.5 29.2 0 31.2-32 19.7-43.4-13.9-13.6-54.3-9.7-73.6-7.2zM377 105L279 7c-4.5-4.5-10.6-7-17-7h-6v128h128v-6.1c0-6.3-2.5-12.4-7-16.9zm-74.1 255.3c4.1-2.7-2.5-11.9-42.8-9 37.1 15.8 42.8 9 42.8 9z\">\n                        <\/path>\n                    <\/svg>\n                <\/div>\n                <div>Paper<\/div>\n            <\/a>\n        <\/div>\n\n        <div class=\"wp-block-button\" style=\"line-height: 1.5;\">\n            <a class=\"wp-block-button__link wp-element-button\" href=\"https:\/\/github.com\/zxz267\/SAR\" target=\"_blank\"\n                style=\"padding-right: var(--wp--preset--spacing--40); padding-left: var(--wp--preset--spacing--40); display: flex; align-items: center; gap: 8px;\">\n                <div>\n                    <svg class=\"svg-inline--fa fa-github fa-w-16\" aria-hidden=\"true\" focusable=\"false\" data-prefix=\"fab\"\n                        data-icon=\"github\" role=\"img\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" viewbox=\"0 0 496 512\" data-fa-i2svg=\"\"\n                        style=\"height: 1em; width: 1em;\">\n                        <path fill=\"#FFFFFF\"\n                            d=\"M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z\">\n                        <\/path>\n                    <\/svg>\n                <\/div>\n                <div>Code<\/div>\n            <\/a>\n        <\/div>\n\n\n    <\/div>\n<\/div>\n\n\n\n<figure data-wp-context=\"{&quot;imageId&quot;:&quot;69f8bd39dfa43&quot;}\" data-wp-interactive=\"core\/image\" data-wp-key=\"69f8bd39dfa43\" class=\"wp-block-image aligncenter size-large is-resized wp-lightbox-container\" style=\"margin-top:var(--wp--preset--spacing--50)\"><img decoding=\"async\" data-wp-class--hide=\"state.isContentHidden\" data-wp-class--show=\"state.isContentVisible\" data-wp-init=\"callbacks.setButtonStyles\" data-wp-on--click=\"actions.showLightbox\" data-wp-on--load=\"callbacks.setButtonStyles\" data-wp-on-window--resize=\"callbacks.setButtonStyles\" src=\"https:\/\/sinkers-pic.oss-cn-beijing.aliyuncs.com\/img\/SAR-WechatIMG102.jpg\" alt=\"\" style=\"width:600px\"\/><button\n\t\t\tclass=\"lightbox-trigger\"\n\t\t\ttype=\"button\"\n\t\t\taria-haspopup=\"dialog\"\n\t\t\taria-label=\"\u653e\u5927\"\n\t\t\tdata-wp-init=\"callbacks.initTriggerButton\"\n\t\t\tdata-wp-on--click=\"actions.showLightbox\"\n\t\t\tdata-wp-style--right=\"state.imageButtonRight\"\n\t\t\tdata-wp-style--top=\"state.imageButtonTop\"\n\t\t>\n\t\t\t<svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"12\" height=\"12\" fill=\"none\" viewbox=\"0 0 12 12\">\n\t\t\t\t<path fill=\"#fff\" d=\"M2 0a2 2 0 0 0-2 2v2h1.5V2a.5.5 0 0 1 .5-.5h2V0H2Zm2 10.5H2a.5.5 0 0 1-.5-.5V8H0v2a2 2 0 0 0 2 2h2v-1.5ZM8 12v-1.5h2a.5.5 0 0 0 .5-.5V8H12v2a2 2 0 0 1-2 2H8Zm2-12a2 2 0 0 1 2 2v2h-1.5V2a.5.5 0 0 0-.5-.5H8V0h2Z\" \/>\n\t\t\t<\/svg>\n\t\t<\/button><\/figure>\n\n\n\n<p class=\"has-text-align-center has-x-small-font-size\">Illustration of 3D hand reconstruction from a monocular RGB image input. From the camera input (left), we reconstruct 3D hand mesh (upper right) and 3D hand pose (lower right). Because of our good balance of accuracy and efficiency, our method has more potential for real-world applications in VR\/AR scenarios.<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">Abstract<\/h2>\n\n\n\n<p class=\"text-justify\">3D hand reconstruction is a popular research topic in recent years, which has great potential for VR\/AR applications. However, due to the limited computational resource of VR\/AR equipment, the reconstruction algorithm must balance accuracy and efficiency to make the users have a good experience. Nevertheless, current methods are not doing well in balancing accuracy and efficiency. Therefore, this paper proposes a novel framework that can achieve a fast and accurate 3D hand reconstruction. Our framework relies on three essential modules, including spatial-aware initial graph building (SAIGB), graph convolutional network (GCN) based belief maps regression (GBBMR), and pose-guided refinement (PGR). At first, given image feature maps extracted by convolutional neural networks, SAIGB builds a spatial-aware and compact initial feature graph. Each node in this graph represents a vertex of the mesh and has vertex-specific spatial information that is helpful for accurate and efficient regression. After that, GBBMR first utilizes adaptive-GCN to introduce interactions between vertices to capture short-range and long-range dependencies between vertices efficiently and flexibly. Then, it maps vertices\u2019 features to belief maps that can model the uncertainty of predictions for more accurate predictions. Finally, we apply PGR to compress the redundant vertices\u2019 belief maps to compact joints\u2019 belief maps with the pose guidance and use these joints\u2019 belief maps to refine previous predictions better to obtain more accurate and robust reconstruction results. Our method achieves state-of-the-art performance on four public benchmarks, FreiHAND, HO-3D, RHD, and STB. Moreover, our method can run at a speed of two to three times that of previous state-of-the-art methods.<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">Overview<\/h2>\n\n\n\n<figure data-wp-context=\"{&quot;imageId&quot;:&quot;69f8bd39dff8b&quot;}\" data-wp-interactive=\"core\/image\" data-wp-key=\"69f8bd39dff8b\" class=\"wp-block-image size-large wp-lightbox-container\"><img decoding=\"async\" data-wp-class--hide=\"state.isContentHidden\" data-wp-class--show=\"state.isContentVisible\" data-wp-init=\"callbacks.setButtonStyles\" data-wp-on--click=\"actions.showLightbox\" data-wp-on--load=\"callbacks.setButtonStyles\" data-wp-on-window--resize=\"callbacks.setButtonStyles\" src=\"https:\/\/sinkers-pic.oss-cn-beijing.aliyuncs.com\/img\/SAR-WechatIMG101.jpg\" alt=\"\"\/><button\n\t\t\tclass=\"lightbox-trigger\"\n\t\t\ttype=\"button\"\n\t\t\taria-haspopup=\"dialog\"\n\t\t\taria-label=\"\u653e\u5927\"\n\t\t\tdata-wp-init=\"callbacks.initTriggerButton\"\n\t\t\tdata-wp-on--click=\"actions.showLightbox\"\n\t\t\tdata-wp-style--right=\"state.imageButtonRight\"\n\t\t\tdata-wp-style--top=\"state.imageButtonTop\"\n\t\t>\n\t\t\t<svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"12\" height=\"12\" fill=\"none\" viewbox=\"0 0 12 12\">\n\t\t\t\t<path fill=\"#fff\" d=\"M2 0a2 2 0 0 0-2 2v2h1.5V2a.5.5 0 0 1 .5-.5h2V0H2Zm2 10.5H2a.5.5 0 0 1-.5-.5V8H0v2a2 2 0 0 0 2 2h2v-1.5ZM8 12v-1.5h2a.5.5 0 0 0 .5-.5V8H12v2a2 2 0 0 1-2 2H8Zm2-12a2 2 0 0 1 2 2v2h-1.5V2a.5.5 0 0 0-.5-.5H8V0h2Z\" \/>\n\t\t\t<\/svg>\n\t\t<\/button><figcaption class=\"wp-element-caption\">Illustration of our framework.<\/figcaption><\/figure>\n\n\n\n<p class=\"text-justify\">1\ufe0f\u20e3 We first utilize convolutional neural networks to extract feature maps from the input image. <\/p>\n\n\n\n<p class=\"text-justify\">2\ufe0f\u20e3 Then, we use <strong>SAIGB<\/strong> module to <strong>build a mesh initial feature graph<\/strong> with every vertex contains several feature maps. <\/p>\n\n\n\n<p class=\"text-justify\">3\ufe0f\u20e3 After that, we use <strong>GBBMR<\/strong> module to map the initial vertex feature to vertex belief maps with <strong>Adaptive-GCN<\/strong>. Here, we can obtain coarse coordinates with those coarse belief maps. <\/p>\n\n\n\n<p class=\"text-justify\">4\ufe0f\u20e3 Subsequently, we employ <strong>PGR<\/strong> to refine those coarse belief maps to obtain fine ones. More specifically, PGR first obtains pose joints\u2019 belief maps from mesh vertices\u2019 belief maps and then concatenates pose belief maps and the last stage features for refinement. <\/p>\n\n\n\n<p class=\"text-justify\">5\ufe0f\u20e3 Finally, we use these refined coordinates as our output.<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">Qualitative Results<\/h2>\n\n\n\n<figure data-wp-context=\"{&quot;imageId&quot;:&quot;69f8bd39e02e1&quot;}\" data-wp-interactive=\"core\/image\" data-wp-key=\"69f8bd39e02e1\" class=\"wp-block-image size-large wp-lightbox-container\"><img decoding=\"async\" data-wp-class--hide=\"state.isContentHidden\" data-wp-class--show=\"state.isContentVisible\" data-wp-init=\"callbacks.setButtonStyles\" data-wp-on--click=\"actions.showLightbox\" data-wp-on--load=\"callbacks.setButtonStyles\" data-wp-on-window--resize=\"callbacks.setButtonStyles\" src=\"https:\/\/sinkers-pic.oss-cn-beijing.aliyuncs.com\/img\/SAR-WechatIMG106.jpg\" alt=\"\"\/><button\n\t\t\tclass=\"lightbox-trigger\"\n\t\t\ttype=\"button\"\n\t\t\taria-haspopup=\"dialog\"\n\t\t\taria-label=\"\u653e\u5927\"\n\t\t\tdata-wp-init=\"callbacks.initTriggerButton\"\n\t\t\tdata-wp-on--click=\"actions.showLightbox\"\n\t\t\tdata-wp-style--right=\"state.imageButtonRight\"\n\t\t\tdata-wp-style--top=\"state.imageButtonTop\"\n\t\t>\n\t\t\t<svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"12\" height=\"12\" fill=\"none\" viewbox=\"0 0 12 12\">\n\t\t\t\t<path fill=\"#fff\" d=\"M2 0a2 2 0 0 0-2 2v2h1.5V2a.5.5 0 0 1 .5-.5h2V0H2Zm2 10.5H2a.5.5 0 0 1-.5-.5V8H0v2a2 2 0 0 0 2 2h2v-1.5ZM8 12v-1.5h2a.5.5 0 0 0 .5-.5V8H12v2a2 2 0 0 1-2 2H8Zm2-12a2 2 0 0 1 2 2v2h-1.5V2a.5.5 0 0 0-.5-.5H8V0h2Z\" \/>\n\t\t\t<\/svg>\n\t\t<\/button><figcaption class=\"wp-element-caption\">Visual results on FreiHAND, HO-3D, RHD, STB, and real-world images. We show the input image, predicted 3D pose mapped in the input image, predicted 3D mesh mapped in the input image, and two different views of 3D mesh in order.<\/figcaption><\/figure>\n\n\n\n<p class=\"text-justify\">1\ufe0f\u20e3 On FreiHAND, we can see that our method can solve unusual viewpoints and complex poses. <\/p>\n\n\n\n<p class=\"text-justify\">2\ufe0f\u20e3 On HO-3D, our method can predict reasonable results under the occlusions caused by hand-object interaction.<\/p>\n\n\n\n<p class=\"text-justify\">3\ufe0f\u20e3 Results on RHD and STB datasets show that our method can generalize well for other datasets.<\/p>\n\n\n\n<p class=\"text-justify\">4\ufe0f\u20e3 Results in real-world prove our method has the cross-domain generalization ability.<\/p>\n\n\n\n<p>These results prove our method has the cross-domain generalization ability and great potential for VR\/AR applications.<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">Comparisons with SOTA<\/h2>\n\n\n\n<figure data-wp-context=\"{&quot;imageId&quot;:&quot;69f8bd39e0625&quot;}\" data-wp-interactive=\"core\/image\" data-wp-key=\"69f8bd39e0625\" class=\"wp-block-image size-large wp-lightbox-container\"><img decoding=\"async\" data-wp-class--hide=\"state.isContentHidden\" data-wp-class--show=\"state.isContentVisible\" data-wp-init=\"callbacks.setButtonStyles\" data-wp-on--click=\"actions.showLightbox\" data-wp-on--load=\"callbacks.setButtonStyles\" data-wp-on-window--resize=\"callbacks.setButtonStyles\" src=\"https:\/\/sinkers-pic.oss-cn-beijing.aliyuncs.com\/img\/SAR-WechatIMG103.jpg\" alt=\"\"\/><button\n\t\t\tclass=\"lightbox-trigger\"\n\t\t\ttype=\"button\"\n\t\t\taria-haspopup=\"dialog\"\n\t\t\taria-label=\"\u653e\u5927\"\n\t\t\tdata-wp-init=\"callbacks.initTriggerButton\"\n\t\t\tdata-wp-on--click=\"actions.showLightbox\"\n\t\t\tdata-wp-style--right=\"state.imageButtonRight\"\n\t\t\tdata-wp-style--top=\"state.imageButtonTop\"\n\t\t>\n\t\t\t<svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"12\" height=\"12\" fill=\"none\" viewbox=\"0 0 12 12\">\n\t\t\t\t<path fill=\"#fff\" d=\"M2 0a2 2 0 0 0-2 2v2h1.5V2a.5.5 0 0 1 .5-.5h2V0H2Zm2 10.5H2a.5.5 0 0 1-.5-.5V8H0v2a2 2 0 0 0 2 2h2v-1.5ZM8 12v-1.5h2a.5.5 0 0 0 .5-.5V8H12v2a2 2 0 0 1-2 2H8Zm2-12a2 2 0 0 1 2 2v2h-1.5V2a.5.5 0 0 0-.5-.5H8V0h2Z\" \/>\n\t\t\t<\/svg>\n\t\t<\/button><figcaption class=\"wp-element-caption\">Comparisons with state-of-the-arts on FreiHAND.<\/figcaption><\/figure>\n\n\n\n<p class=\"text-justify\"><strong>FreiHAND<\/strong>: Our method <strong>outperforms other methods<\/strong> in terms of all the aforementioned metrics. Moreover, we compare the inference speed of our method with other methods using the same computer. Our method has a much <strong>faster inference speed<\/strong> due to the simplicity of our method.<\/p>\n\n\n\n<figure data-wp-context=\"{&quot;imageId&quot;:&quot;69f8bd39e0974&quot;}\" data-wp-interactive=\"core\/image\" data-wp-key=\"69f8bd39e0974\" class=\"wp-block-image size-large wp-lightbox-container\"><img decoding=\"async\" data-wp-class--hide=\"state.isContentHidden\" data-wp-class--show=\"state.isContentVisible\" data-wp-init=\"callbacks.setButtonStyles\" data-wp-on--click=\"actions.showLightbox\" data-wp-on--load=\"callbacks.setButtonStyles\" data-wp-on-window--resize=\"callbacks.setButtonStyles\" src=\"https:\/\/sinkers-pic.oss-cn-beijing.aliyuncs.com\/img\/SAR-WechatIMG104.jpg\" alt=\"\"\/><button\n\t\t\tclass=\"lightbox-trigger\"\n\t\t\ttype=\"button\"\n\t\t\taria-haspopup=\"dialog\"\n\t\t\taria-label=\"\u653e\u5927\"\n\t\t\tdata-wp-init=\"callbacks.initTriggerButton\"\n\t\t\tdata-wp-on--click=\"actions.showLightbox\"\n\t\t\tdata-wp-style--right=\"state.imageButtonRight\"\n\t\t\tdata-wp-style--top=\"state.imageButtonTop\"\n\t\t>\n\t\t\t<svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"12\" height=\"12\" fill=\"none\" viewbox=\"0 0 12 12\">\n\t\t\t\t<path fill=\"#fff\" d=\"M2 0a2 2 0 0 0-2 2v2h1.5V2a.5.5 0 0 1 .5-.5h2V0H2Zm2 10.5H2a.5.5 0 0 1-.5-.5V8H0v2a2 2 0 0 0 2 2h2v-1.5ZM8 12v-1.5h2a.5.5 0 0 0 .5-.5V8H12v2a2 2 0 0 1-2 2H8Zm2-12a2 2 0 0 1 2 2v2h-1.5V2a.5.5 0 0 0-.5-.5H8V0h2Z\" \/>\n\t\t\t<\/svg>\n\t\t<\/button><figcaption class=\"wp-element-caption\">Comparisons with state-of-the-arts on HO-3D. + denotes training with weights pre-trained on FreiHAND.<\/figcaption><\/figure>\n\n\n\n<p class=\"text-justify\"><strong>FreiHAND<\/strong>: We find that the model training on this dataset tends to overfit. Therefore, we utilize weights pre-trained on FreiHAND for regularization. In this way, our method outperforms four recent works in <strong>a large margin<\/strong>. When directly training on this dataset, our method can <strong>still achieve state-of-the-art performance<\/strong>.<\/p>\n\n\n\n<figure data-wp-context=\"{&quot;imageId&quot;:&quot;69f8bd39e0c26&quot;}\" data-wp-interactive=\"core\/image\" data-wp-key=\"69f8bd39e0c26\" class=\"wp-block-image size-large wp-lightbox-container\" style=\"margin-bottom:0\"><img decoding=\"async\" data-wp-class--hide=\"state.isContentHidden\" data-wp-class--show=\"state.isContentVisible\" data-wp-init=\"callbacks.setButtonStyles\" data-wp-on--click=\"actions.showLightbox\" data-wp-on--load=\"callbacks.setButtonStyles\" data-wp-on-window--resize=\"callbacks.setButtonStyles\" src=\"https:\/\/sinkers-pic.oss-cn-beijing.aliyuncs.com\/img\/SAR-WechatIMG122.jpg\" alt=\"\"\/><button\n\t\t\tclass=\"lightbox-trigger\"\n\t\t\ttype=\"button\"\n\t\t\taria-haspopup=\"dialog\"\n\t\t\taria-label=\"\u653e\u5927\"\n\t\t\tdata-wp-init=\"callbacks.initTriggerButton\"\n\t\t\tdata-wp-on--click=\"actions.showLightbox\"\n\t\t\tdata-wp-style--right=\"state.imageButtonRight\"\n\t\t\tdata-wp-style--top=\"state.imageButtonTop\"\n\t\t>\n\t\t\t<svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"12\" height=\"12\" fill=\"none\" viewbox=\"0 0 12 12\">\n\t\t\t\t<path fill=\"#fff\" d=\"M2 0a2 2 0 0 0-2 2v2h1.5V2a.5.5 0 0 1 .5-.5h2V0H2Zm2 10.5H2a.5.5 0 0 1-.5-.5V8H0v2a2 2 0 0 0 2 2h2v-1.5ZM8 12v-1.5h2a.5.5 0 0 0 .5-.5V8H12v2a2 2 0 0 1-2 2H8Zm2-12a2 2 0 0 1 2 2v2h-1.5V2a.5.5 0 0 0-.5-.5H8V0h2Z\" \/>\n\t\t\t<\/svg>\n\t\t<\/button><\/figure>\n\n\n\n<div class=\"wp-block-columns is-layout-flex wp-container-core-columns-is-layout-ec2e7a44 wp-block-columns-is-layout-flex\" style=\"padding-top:0;padding-bottom:0\">\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\">\n<p class=\"has-text-align-center has-x-small-font-size\">Comparisons of 3D PCK on RHD dataset.<\/p>\n<\/div>\n\n\n\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\">\n<p class=\"has-text-align-center has-x-small-font-size\">Comparisons of 3D PCK on STB dataset.<\/p>\n<\/div>\n<\/div>\n\n\n\n<p class=\"text-justify\"><strong>RHD and STB<\/strong>: Our method is a <strong>general framework<\/strong> that can adapt to pose estimation task. our method <strong>outperforms all other methods on RHD dataset<\/strong> and <strong>achieve comparable performance on STB dataset<\/strong> with other methods. Because STB is relatively easier than other datasets, recent methods all perform similarly on this dataset.<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">Conclusion<\/h2>\n\n\n\n<p class=\"text-justify\">In this paper, we propose a simple and effective framework for 3D hand reconstruction from a monocular RGB image. Our method <strong>introduces three impressive ideas of detection-based methods to regression-based methods framework<\/strong>, including <strong>preserving spatial information<\/strong>, <strong>modeling uncertainty<\/strong>, and <strong>introducing refinement strategy<\/strong>. We achieve these three ideas with our proposed SAIGB, GBBMR, and PGR, respectively. Extensive experiments on four public benchmarks demonstrate that our method achieves <strong>state-of-the-art performance<\/strong> as well as <strong>the highest inference speed<\/strong>. Because of this good balance of accuracy and efficiency, our method has more potential for real-world applications in VR\/AR scenarios.<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">Bibtex<\/h2>\n\n\n\n<pre class=\"wp-block-code\"><code>@inproceedings{zheng2021sar,\n  title={Sar: Spatial-aware regression for 3d hand pose and mesh reconstruction from a monocular rgb image},\n  author={Zheng, Xiaozheng and Ren, Pengfei and Sun, Haifeng and Wang, Jingyu and Qi, Qi and Liao, Jianxin},\n  booktitle={2021 IEEE International Symposium on Mixed and Augmented Reality (ISMAR)},\n  pages={99--108},\n  year={2021},\n  organization={IEEE}\n}<\/code><\/pre>\n\n\n\n<p><\/p>\n<\/div>","protected":false},"excerpt":{"rendered":"<p>Paper Code Illustration of 3D hand reconstruction from a monocular RGB image input. From the camera input (left), we reconstruct 3D hand mesh (upper right) and 3D hand pose (lower right). Because of our good balance of accuracy and efficiency, our method has more potential for real-world applications in VR\/AR scenarios. Abstract 3D hand reconstruction [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":797,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"_acf_changed":false,"footnotes":"","_links_to":"","_links_to_target":""},"categories":[17],"tags":[],"class_list":["post-796","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-rgb-based-3d-hand-pose-estimation"],"acf":{"writer":{"simple_value_formatted":"<code><em>This data type is not supported! Please contact the author for help.<\/em><\/code>","value_formatted":[{"writer_link":{"simple_value_formatted":"<a href=\"https:\/\/scholar.google.com\/citations?user=3hSD41oAAAAJ\" target=\"_blank\" rel=\"noreferrer noopener\">Xiaozheng Zheng<\/a>","value_formatted":{"title":"Xiaozheng Zheng","url":"https:\/\/scholar.google.com\/citations?user=3hSD41oAAAAJ","target":"_blank"},"value":{"title":"Xiaozheng Zheng","url":"https:\/\/scholar.google.com\/citations?user=3hSD41oAAAAJ","target":"_blank"},"field":{"ID":2368,"key":"field_687f0c15f3394","label":"\u4f5c\u8005\u4e0e\u4f5c\u8005\u4e3b\u9875","name":"writer_link","aria-label":"","prefix":"acf","type":"link","value":null,"menu_order":0,"instructions":"","required":1,"id":"","class":"","conditional_logic":0,"parent":2366,"wrapper":{"width":"","class":"","id":""},"return_format":"array","allow_in_bindings":1,"_name":"writer_link","_valid":1,"parent_repeater":"field_687f08dfb7e07"}}},{"writer_link":{"simple_value_formatted":"<a href=\"https:\/\/pengfeiren96.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">Pengfei Ren<\/a>","value_formatted":{"title":"Pengfei Ren","url":"https:\/\/pengfeiren96.github.io\/","target":"_blank"},"value":{"title":"Pengfei Ren","url":"https:\/\/pengfeiren96.github.io\/","target":"_blank"},"field":{"ID":2368,"key":"field_687f0c15f3394","label":"\u4f5c\u8005\u4e0e\u4f5c\u8005\u4e3b\u9875","name":"writer_link","aria-label":"","prefix":"acf","type":"link","value":null,"menu_order":0,"instructions":"","required":1,"id":"","class":"","conditional_logic":0,"parent":2366,"wrapper":{"width":"","class":"","id":""},"return_format":"array","allow_in_bindings":1,"_name":"writer_link","_valid":1,"parent_repeater":"field_687f08dfb7e07"}}},{"writer_link":{"simple_value_formatted":"<a href=\"https:\/\/scholar.google.com\/citations?user=dwhbTsEAAAAJ&hl=en\" target=\"_blank\" rel=\"noreferrer noopener\">Haifeng Sun<\/a>","value_formatted":{"title":"Haifeng Sun","url":"https:\/\/scholar.google.com\/citations?user=dwhbTsEAAAAJ&hl=en","target":"_blank"},"value":{"title":"Haifeng Sun","url":"https:\/\/scholar.google.com\/citations?user=dwhbTsEAAAAJ&hl=en","target":"_blank"},"field":{"ID":2368,"key":"field_687f0c15f3394","label":"\u4f5c\u8005\u4e0e\u4f5c\u8005\u4e3b\u9875","name":"writer_link","aria-label":"","prefix":"acf","type":"link","value":null,"menu_order":0,"instructions":"","required":1,"id":"","class":"","conditional_logic":0,"parent":2366,"wrapper":{"width":"","class":"","id":""},"return_format":"array","allow_in_bindings":1,"_name":"writer_link","_valid":1,"parent_repeater":"field_687f08dfb7e07"}}},{"writer_link":{"simple_value_formatted":"<a href=\"https:\/\/scholar.google.com\/citations?user=H441DjwAAAAJ\" target=\"_blank\" rel=\"noreferrer noopener\">Jingyu Wang<\/a>","value_formatted":{"title":"Jingyu Wang","url":"https:\/\/scholar.google.com\/citations?user=H441DjwAAAAJ","target":"_blank"},"value":{"title":"Jingyu Wang","url":"https:\/\/scholar.google.com\/citations?user=H441DjwAAAAJ","target":"_blank"},"field":{"ID":2368,"key":"field_687f0c15f3394","label":"\u4f5c\u8005\u4e0e\u4f5c\u8005\u4e3b\u9875","name":"writer_link","aria-label":"","prefix":"acf","type":"link","value":null,"menu_order":0,"instructions":"","required":1,"id":"","class":"","conditional_logic":0,"parent":2366,"wrapper":{"width":"","class":"","id":""},"return_format":"array","allow_in_bindings":1,"_name":"writer_link","_valid":1,"parent_repeater":"field_687f08dfb7e07"}}},{"writer_link":{"simple_value_formatted":"<a href=\"https:\/\/scholar.google.com\/citations?user=2W2h0SwAAAAJ\" target=\"_blank\" rel=\"noreferrer noopener\">Qi Qi<\/a>","value_formatted":{"title":"Qi Qi","url":"https:\/\/scholar.google.com\/citations?user=2W2h0SwAAAAJ","target":"_blank"},"value":{"title":"Qi Qi","url":"https:\/\/scholar.google.com\/citations?user=2W2h0SwAAAAJ","target":"_blank"},"field":{"ID":2368,"key":"field_687f0c15f3394","label":"\u4f5c\u8005\u4e0e\u4f5c\u8005\u4e3b\u9875","name":"writer_link","aria-label":"","prefix":"acf","type":"link","value":null,"menu_order":0,"instructions":"","required":1,"id":"","class":"","conditional_logic":0,"parent":2366,"wrapper":{"width":"","class":"","id":""},"return_format":"array","allow_in_bindings":1,"_name":"writer_link","_valid":1,"parent_repeater":"field_687f08dfb7e07"}}},{"writer_link":{"simple_value_formatted":"<a href=\"https:\/\/dblp.org\/pid\/60\/4951.html\" target=\"_blank\" rel=\"noreferrer noopener\">Jianxin Liao<\/a>","value_formatted":{"title":"Jianxin Liao","url":"https:\/\/dblp.org\/pid\/60\/4951.html","target":"_blank"},"value":{"title":"Jianxin Liao","url":"https:\/\/dblp.org\/pid\/60\/4951.html","target":"_blank"},"field":{"ID":2368,"key":"field_687f0c15f3394","label":"\u4f5c\u8005\u4e0e\u4f5c\u8005\u4e3b\u9875","name":"writer_link","aria-label":"","prefix":"acf","type":"link","value":null,"menu_order":0,"instructions":"","required":1,"id":"","class":"","conditional_logic":0,"parent":2366,"wrapper":{"width":"","class":"","id":""},"return_format":"array","allow_in_bindings":1,"_name":"writer_link","_valid":1,"parent_repeater":"field_687f08dfb7e07"}}}],"value":[{"field_687f0c15f3394":{"title":"Xiaozheng Zheng","url":"https:\/\/scholar.google.com\/citations?user=3hSD41oAAAAJ","target":"_blank"}},{"field_687f0c15f3394":{"title":"Pengfei Ren","url":"https:\/\/pengfeiren96.github.io\/","target":"_blank"}},{"field_687f0c15f3394":{"title":"Haifeng Sun","url":"https:\/\/scholar.google.com\/citations?user=dwhbTsEAAAAJ&hl=en","target":"_blank"}},{"field_687f0c15f3394":{"title":"Jingyu Wang","url":"https:\/\/scholar.google.com\/citations?user=H441DjwAAAAJ","target":"_blank"}},{"field_687f0c15f3394":{"title":"Qi Qi","url":"https:\/\/scholar.google.com\/citations?user=2W2h0SwAAAAJ","target":"_blank"}},{"field_687f0c15f3394":{"title":"Jianxin Liao","url":"https:\/\/dblp.org\/pid\/60\/4951.html","target":"_blank"}}],"field":{"ID":2366,"key":"field_687f08dfb7e07","label":"\u4f5c\u8005","name":"writer","aria-label":"","prefix":"acf","type":"repeater","value":null,"menu_order":0,"instructions":"","required":1,"id":"","class":"","conditional_logic":0,"parent":52,"wrapper":{"width":"","class":"","id":""},"acfe_repeater_stylised_button":0,"layout":"row","pagination":0,"min":0,"max":0,"collapsed":"","button_label":"Add Row","rows_per_page":20,"_name":"writer","_valid":1,"sub_fields":[{"ID":2368,"key":"field_687f0c15f3394","label":"\u4f5c\u8005\u4e0e\u4f5c\u8005\u4e3b\u9875","name":"writer_link","aria-label":"","prefix":"acf","type":"link","value":null,"menu_order":0,"instructions":"","required":1,"id":"","class":"","conditional_logic":0,"parent":2366,"wrapper":{"width":"","class":"","id":""},"return_format":"array","allow_in_bindings":1,"_name":"writer_link","_valid":1,"parent_repeater":"field_687f08dfb7e07"}]}},"\u4f1a\u8bae\u540d\u79f0":{"simple_value_formatted":"ISMAR","value_formatted":"ISMAR","value":"ISMAR","field":{"ID":53,"key":"field_6759c5b33fdb3","label":"\u4f1a\u8bae\u540d\u79f0","name":"\u4f1a\u8bae\u540d\u79f0","aria-label":"","prefix":"acf","type":"text","value":null,"menu_order":1,"instructions":"","required":1,"id":"","class":"","conditional_logic":0,"parent":52,"wrapper":{"width":"","class":"","id":""},"default_value":"\u586b\u5199\u4f1a\u8bae","maxlength":"","allow_in_bindings":1,"placeholder":"","prepend":"","append":"","_name":"\u4f1a\u8bae\u540d\u79f0","_valid":1}},"\u5e74":{"simple_value_formatted":"2021","value_formatted":"2021","value":"2021","field":{"ID":254,"key":"field_675b036a7706e","label":"\u5e74","name":"\u5e74","aria-label":"","prefix":"acf","type":"text","value":null,"menu_order":2,"instructions":"","required":1,"id":"","class":"","conditional_logic":0,"parent":52,"wrapper":{"width":"","class":"","id":""},"default_value":2024,"maxlength":"","allow_in_bindings":1,"placeholder":"","prepend":"","append":"","_name":"\u5e74","_valid":1}},"code":{"simple_value_formatted":"<a href=\"https:\/\/github.com\/zxz267\/SAR\" target=\"_blank\" rel=\"noreferrer noopener\">Code<\/a>","value_formatted":{"title":"Code","url":"https:\/\/github.com\/zxz267\/SAR","target":"_blank"},"value":{"title":"Code","url":"https:\/\/github.com\/zxz267\/SAR","target":"_blank"},"field":{"ID":54,"key":"field_6759c5dc3fdb4","label":"code","name":"code","aria-label":"","prefix":"acf","type":"link","value":null,"menu_order":3,"instructions":"","required":0,"id":"","class":"","conditional_logic":0,"parent":52,"wrapper":{"width":"","class":"","id":""},"return_format":"array","allow_in_bindings":1,"_name":"code","_valid":1}},"arxiv":{"simple_value_formatted":"","value_formatted":"","value":"","field":{"ID":55,"key":"field_6759c5f83fdb5","label":"arXiv","name":"arxiv","aria-label":"","prefix":"acf","type":"link","value":null,"menu_order":4,"instructions":"","required":0,"id":"","class":"","conditional_logic":0,"parent":52,"wrapper":{"width":"","class":"","id":""},"return_format":"array","allow_in_bindings":0,"_name":"arxiv","_valid":1}},"pdf":{"simple_value_formatted":"<a href=\"https:\/\/ieeexplore.ieee.org\/document\/9583792\" target=\"_blank\" rel=\"noreferrer noopener\">PDF<\/a>","value_formatted":{"title":"PDF","url":"https:\/\/ieeexplore.ieee.org\/document\/9583792","target":"_blank"},"value":{"title":"PDF","url":"https:\/\/ieeexplore.ieee.org\/document\/9583792","target":"_blank"},"field":{"ID":56,"key":"field_6759c6b83fdb6","label":"pdf","name":"pdf","aria-label":"","prefix":"acf","type":"link","value":null,"menu_order":5,"instructions":"","required":0,"id":"","class":"","conditional_logic":0,"parent":52,"wrapper":{"width":"","class":"","id":""},"return_format":"array","allow_in_bindings":0,"_name":"pdf","_valid":1}},"rank":{"simple_value_formatted":"CCF-B","value_formatted":"CCF-B","value":"CCF-B","field":{"ID":2316,"key":"field_686b28a2069eb","label":"\u4f1a\u8bae\/\u671f\u520a\u7ea7\u522b","name":"rank","aria-label":"","prefix":"acf","type":"text","value":null,"menu_order":6,"instructions":"","required":0,"id":"","class":"","conditional_logic":0,"parent":52,"wrapper":{"width":"","class":"","id":""},"default_value":"CCF-A","maxlength":"","allow_in_bindings":0,"placeholder":"","prepend":"","append":"","_name":"rank","_valid":1}}},"_links":{"self":[{"href":"https:\/\/cv.nirc.top\/zh\/wp-json\/wp\/v2\/posts\/796","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/cv.nirc.top\/zh\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/cv.nirc.top\/zh\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/cv.nirc.top\/zh\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/cv.nirc.top\/zh\/wp-json\/wp\/v2\/comments?post=796"}],"version-history":[{"count":30,"href":"https:\/\/cv.nirc.top\/zh\/wp-json\/wp\/v2\/posts\/796\/revisions"}],"predecessor-version":[{"id":2543,"href":"https:\/\/cv.nirc.top\/zh\/wp-json\/wp\/v2\/posts\/796\/revisions\/2543"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/cv.nirc.top\/zh\/wp-json\/wp\/v2\/media\/797"}],"wp:attachment":[{"href":"https:\/\/cv.nirc.top\/zh\/wp-json\/wp\/v2\/media?parent=796"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/cv.nirc.top\/zh\/wp-json\/wp\/v2\/categories?post=796"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/cv.nirc.top\/zh\/wp-json\/wp\/v2\/tags?post=796"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}