diff --git a/New_2_Quantize_TinyLlama (2).ipynb b/New_2_Quantize_TinyLlama (2).ipynb new file mode 100644 index 0000000..127f180 --- /dev/null +++ b/New_2_Quantize_TinyLlama (2).ipynb @@ -0,0 +1,5846 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "ef495d16dd1a4f23be512d86f4b2800b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a71f86b74b7f42d88266be2bc79a2c30", + "IPY_MODEL_cb557d42a7d345c0934604036a93bc10", + "IPY_MODEL_c95e9deefff24265bb7aaefcee32a044" + ], + "layout": "IPY_MODEL_7c4e99924b9449bc8a930002d8ba83fc" + } + }, + "a71f86b74b7f42d88266be2bc79a2c30": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_750d241ac10e4d06903aad102164d983", + "placeholder": "​", + "style": "IPY_MODEL_aa9cce0280794b9aae6074653ec61636", + "value": "Fetching 10 files: 100%" + } + }, + "cb557d42a7d345c0934604036a93bc10": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1946cfddd06743b588ae787a0cea42aa", + "max": 10, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c0625a79528f48dc8ee64459d2909b79", + "value": 10 + } + }, + "c95e9deefff24265bb7aaefcee32a044": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_adb22899471442589285fd41312a503f", + "placeholder": "​", + "style": "IPY_MODEL_a6e55e6f062143e7823fc585a530fc56", + "value": " 10/10 [00:15<00:00,  2.67s/it]" + } + }, + "7c4e99924b9449bc8a930002d8ba83fc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "750d241ac10e4d06903aad102164d983": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "aa9cce0280794b9aae6074653ec61636": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1946cfddd06743b588ae787a0cea42aa": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c0625a79528f48dc8ee64459d2909b79": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "adb22899471442589285fd41312a503f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a6e55e6f062143e7823fc585a530fc56": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c4f91f5988cf4e2abababec9e5904a73": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_686fe40fc8924f65843cd6029d719bc5", + "IPY_MODEL_34de9719c9ee465493ae92912eff1989", + "IPY_MODEL_a97203da20354f708de631b59fc42e0e" + ], + "layout": "IPY_MODEL_e4188c6d219141f7bd1e15dd05d1bb1a" + } + }, + "686fe40fc8924f65843cd6029d719bc5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1133635a5e6947de89b7ba00339d8fc5", + "placeholder": "​", + "style": "IPY_MODEL_b68f3994adc24215bbe0b07329a7a811", + "value": ".gitattributes: " + } + }, + "34de9719c9ee465493ae92912eff1989": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b4fa7ff09fc149309150fb56acf5cb92", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6a0623311261475f989c6d8e3e17cb87", + "value": 1 + } + }, + "a97203da20354f708de631b59fc42e0e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9fc84d5a4ffd47858ee1ff674d5b2b68", + "placeholder": "​", + "style": "IPY_MODEL_351b929d9fd442328f0b9cf8af9c8f0f", + "value": " 1.52k/? [00:00<00:00, 28.6kB/s]" + } + }, + "e4188c6d219141f7bd1e15dd05d1bb1a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1133635a5e6947de89b7ba00339d8fc5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b68f3994adc24215bbe0b07329a7a811": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b4fa7ff09fc149309150fb56acf5cb92": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "6a0623311261475f989c6d8e3e17cb87": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9fc84d5a4ffd47858ee1ff674d5b2b68": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "351b929d9fd442328f0b9cf8af9c8f0f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e0a2fdf5f7c8420abea56470a099cf96": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_80b3597ba1634c1bbc511f8beabef0bb", + "IPY_MODEL_86eb445c83fc4fdd8471ae3c1190d244", + "IPY_MODEL_5f55e430c0e8427b95e0e428074359b8" + ], + "layout": "IPY_MODEL_d61162836a35450dbb43622b4d7603b1" + } + }, + "80b3597ba1634c1bbc511f8beabef0bb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8b109b26541042fdafef1ce9a0f36472", + "placeholder": "​", + "style": "IPY_MODEL_d3a13151ff334b778ad9dc3020b205cb", + "value": "README.md: " + } + }, + "86eb445c83fc4fdd8471ae3c1190d244": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3661e1e07bc1448c998c7fefd5270dab", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b3b5b109dfec414aba0eecd379a47466", + "value": 1 + } + }, + "5f55e430c0e8427b95e0e428074359b8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_38be34645710407e94c18b1f6817ed43", + "placeholder": "​", + "style": "IPY_MODEL_8b054e76588144a6ab6c2633a65c5ee0", + "value": " 3.20k/? [00:00<00:00, 74.7kB/s]" + } + }, + "d61162836a35450dbb43622b4d7603b1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8b109b26541042fdafef1ce9a0f36472": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d3a13151ff334b778ad9dc3020b205cb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3661e1e07bc1448c998c7fefd5270dab": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "b3b5b109dfec414aba0eecd379a47466": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "38be34645710407e94c18b1f6817ed43": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8b054e76588144a6ab6c2633a65c5ee0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b2719a37c1da4e4baba835fb5906e4e8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0d818aaeee424148a2316917e5c069da", + "IPY_MODEL_f54cb3dbfeac4313ad0464ff3fb96dac", + "IPY_MODEL_809986e2166947b38c5eed95a18efa8a" + ], + "layout": "IPY_MODEL_55dd322d5c1f443fbdd5a45617a91116" + } + }, + "0d818aaeee424148a2316917e5c069da": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bf434f07b35844ca87f9f5c28d16e4a9", + "placeholder": "​", + "style": "IPY_MODEL_e730394b81db4c7ca5feeac4a525f344", + "value": "eval_results.json: 100%" + } + }, + "f54cb3dbfeac4313ad0464ff3fb96dac": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8519cf30feb440eda826f6beaf1646b2", + "max": 566, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1605a710077b412c9a5176cd285d0ea6", + "value": 566 + } + }, + "809986e2166947b38c5eed95a18efa8a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ad17f55244f34f4e9fc82aafab34b538", + "placeholder": "​", + "style": "IPY_MODEL_20dd6544bee74c019ec23272cac6e0ba", + "value": " 566/566 [00:00<00:00, 7.31kB/s]" + } + }, + "55dd322d5c1f443fbdd5a45617a91116": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bf434f07b35844ca87f9f5c28d16e4a9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e730394b81db4c7ca5feeac4a525f344": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8519cf30feb440eda826f6beaf1646b2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1605a710077b412c9a5176cd285d0ea6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ad17f55244f34f4e9fc82aafab34b538": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "20dd6544bee74c019ec23272cac6e0ba": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6b770f28de264dc7bf61588443f4e865": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6100ed8b785f4550934154598059a33c", + "IPY_MODEL_862f8584e1734baa860be6f77174bbc6", + "IPY_MODEL_f189ddb799c144bcaa85484a4fe270a9" + ], + "layout": "IPY_MODEL_3a56d7bcae3646e99f0d5e44f7c62f6d" + } + }, + "6100ed8b785f4550934154598059a33c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7a6f706aaf2f4d63a43b26e592e31787", + "placeholder": "​", + "style": "IPY_MODEL_a103635d634c4aebb5fad37e83b16c6a", + "value": "config.json: 100%" + } + }, + "862f8584e1734baa860be6f77174bbc6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9588e29345ba46329262a01250007989", + "max": 608, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2afd5aabc2734fee9c4291a96f6b94b4", + "value": 608 + } + }, + "f189ddb799c144bcaa85484a4fe270a9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cf5a5122e6464c0089d6d1f0bcd654e4", + "placeholder": "​", + "style": "IPY_MODEL_2ce6e39cf73b449db00e73587e8b960d", + "value": " 608/608 [00:00<00:00, 6.30kB/s]" + } + }, + "3a56d7bcae3646e99f0d5e44f7c62f6d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7a6f706aaf2f4d63a43b26e592e31787": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a103635d634c4aebb5fad37e83b16c6a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9588e29345ba46329262a01250007989": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2afd5aabc2734fee9c4291a96f6b94b4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "cf5a5122e6464c0089d6d1f0bcd654e4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2ce6e39cf73b449db00e73587e8b960d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "658981420ec74c32b9a5aa2fd12d65fb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1680e40f7f634a4dbe26fc80d26d019f", + "IPY_MODEL_7352763c56e44cf6b01e6f35cbc45f6c", + "IPY_MODEL_1a74f2892d284c438dc9f8e979236c46" + ], + "layout": "IPY_MODEL_f2620c076e7245e9bb64c047189cef8f" + } + }, + "1680e40f7f634a4dbe26fc80d26d019f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e854978dea614a6184cfdbe908dbbb7d", + "placeholder": "​", + "style": "IPY_MODEL_62feb19f455347c9ba026157b1aa8c6c", + "value": "generation_config.json: 100%" + } + }, + "7352763c56e44cf6b01e6f35cbc45f6c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c045dbabf97248f2bc92fd511d0914ff", + "max": 124, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a8a4df10db514857b24002267415cbb4", + "value": 124 + } + }, + "1a74f2892d284c438dc9f8e979236c46": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_48a041c2e9524059b5f36edc99ae77fc", + "placeholder": "​", + "style": "IPY_MODEL_53d706caff2f4b968d8ddaed3449475c", + "value": " 124/124 [00:00<00:00, 1.56kB/s]" + } + }, + "f2620c076e7245e9bb64c047189cef8f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e854978dea614a6184cfdbe908dbbb7d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "62feb19f455347c9ba026157b1aa8c6c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c045dbabf97248f2bc92fd511d0914ff": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a8a4df10db514857b24002267415cbb4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "48a041c2e9524059b5f36edc99ae77fc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "53d706caff2f4b968d8ddaed3449475c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d22fc1e159ac4ab1a9fa1562af765cc7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ab31e4e465b445bc88ccda012e39c9e8", + "IPY_MODEL_69d7dd7f4f9b4bf29f4185ba49abe03a", + "IPY_MODEL_8982da26b3ab4a17801f13d90175a62d" + ], + "layout": "IPY_MODEL_2919034923814f11a86b9b6ff6c2f3fa" + } + }, + "ab31e4e465b445bc88ccda012e39c9e8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0fe23be8273c448bb454b37342be6085", + "placeholder": "​", + "style": "IPY_MODEL_99ebf39523d34ee7976cf685d15f2ba7", + "value": "model.safetensors: 100%" + } + }, + "69d7dd7f4f9b4bf29f4185ba49abe03a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_33f687ead6784ae7a2e15e7e0a0b1036", + "max": 2200119864, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_088a7da6b99c4c63b9a3fd15aa25ee0b", + "value": 2200119864 + } + }, + "8982da26b3ab4a17801f13d90175a62d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8b9026ff40284939b49d8d287641df28", + "placeholder": "​", + "style": "IPY_MODEL_4b1c7a48d23b46179e9cef5130df6399", + "value": " 2.20G/2.20G [00:15<00:00, 183MB/s]" + } + }, + "2919034923814f11a86b9b6ff6c2f3fa": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0fe23be8273c448bb454b37342be6085": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "99ebf39523d34ee7976cf685d15f2ba7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "33f687ead6784ae7a2e15e7e0a0b1036": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "088a7da6b99c4c63b9a3fd15aa25ee0b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8b9026ff40284939b49d8d287641df28": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4b1c7a48d23b46179e9cef5130df6399": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2643002917114763920f0421fc83bc67": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_affa587d803f4476b39dafba6b465c11", + "IPY_MODEL_0fe75d35713f4e578bb22d57124e6b91", + "IPY_MODEL_cecf91c366e643058d61e6acca9512c1" + ], + "layout": "IPY_MODEL_41125f5d4d0d48b080208754c0462524" + } + }, + "affa587d803f4476b39dafba6b465c11": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b1937d82a61f4d4fa0431a3c558748ab", + "placeholder": "​", + "style": "IPY_MODEL_53ee84296103418dbbc5f3ea0a3d2ad4", + "value": "special_tokens_map.json: 100%" + } + }, + "0fe75d35713f4e578bb22d57124e6b91": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fd61b8725e3f41e1afe7e7e29d99c417", + "max": 551, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0826103756d94005849f45cdbcb1d9fd", + "value": 551 + } + }, + "cecf91c366e643058d61e6acca9512c1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_efd79e28736c4bd48c856e7646b303ac", + "placeholder": "​", + "style": "IPY_MODEL_0265104c4e7747fd9087792de45803c8", + "value": " 551/551 [00:00<00:00, 17.6kB/s]" + } + }, + "41125f5d4d0d48b080208754c0462524": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b1937d82a61f4d4fa0431a3c558748ab": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "53ee84296103418dbbc5f3ea0a3d2ad4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fd61b8725e3f41e1afe7e7e29d99c417": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0826103756d94005849f45cdbcb1d9fd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "efd79e28736c4bd48c856e7646b303ac": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0265104c4e7747fd9087792de45803c8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8c2867ee87534277931486bd4032f7e0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_95076527f50d4d8fbcd02933393f476a", + "IPY_MODEL_0690ada1b0b44e1b8112de6813666fcf", + "IPY_MODEL_9e386007d0f3409fb650329f81f84d81" + ], + "layout": "IPY_MODEL_575806acf4a94ff4b88546d90f486f32" + } + }, + "95076527f50d4d8fbcd02933393f476a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bb91269428644df1babd1ae39341ec33", + "placeholder": "​", + "style": "IPY_MODEL_a19b5850abca49348553d87cc9695d2b", + "value": "tokenizer.json: " + } + }, + "0690ada1b0b44e1b8112de6813666fcf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e9ecc447d5aa4a5f9341c94aecdeb403", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_26b2822ca8904593aa2a95d23068fcbb", + "value": 1 + } + }, + "9e386007d0f3409fb650329f81f84d81": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5e2dd7e5d39f4645ad53cb92ee056c02", + "placeholder": "​", + "style": "IPY_MODEL_504cf356c80e40c69d80d6af2bda4d19", + "value": " 1.84M/? [00:00<00:00, 15.5MB/s]" + } + }, + "575806acf4a94ff4b88546d90f486f32": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bb91269428644df1babd1ae39341ec33": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a19b5850abca49348553d87cc9695d2b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e9ecc447d5aa4a5f9341c94aecdeb403": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "26b2822ca8904593aa2a95d23068fcbb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5e2dd7e5d39f4645ad53cb92ee056c02": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "504cf356c80e40c69d80d6af2bda4d19": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "de310b63d1c8418388992fe569036203": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_52c870a107ff45ccac57fedfe40d5135", + "IPY_MODEL_8155e6ea360e41f89e3f8432fc1a5314", + "IPY_MODEL_59c98ddd8a6e44ed88e6785bfcac8821" + ], + "layout": "IPY_MODEL_8981e41b49bf4482b5903d3852ac0f02" + } + }, + "52c870a107ff45ccac57fedfe40d5135": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e2c7a889c4704065a862f00c5b0f59d5", + "placeholder": "​", + "style": "IPY_MODEL_c2191171dd014e29816ff9da1c29003a", + "value": "tokenizer_config.json: " + } + }, + "8155e6ea360e41f89e3f8432fc1a5314": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b38f03393e084cd8a60fccc37423e806", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8493e8b0e2d2455cb98161191da9727f", + "value": 1 + } + }, + "59c98ddd8a6e44ed88e6785bfcac8821": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6e42adf16d3343cba5d35cf25777598e", + "placeholder": "​", + "style": "IPY_MODEL_81a52af510494f10a6acb9f449f41e20", + "value": " 1.29k/? [00:00<00:00, 29.3kB/s]" + } + }, + "8981e41b49bf4482b5903d3852ac0f02": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e2c7a889c4704065a862f00c5b0f59d5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c2191171dd014e29816ff9da1c29003a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b38f03393e084cd8a60fccc37423e806": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "8493e8b0e2d2455cb98161191da9727f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "6e42adf16d3343cba5d35cf25777598e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "81a52af510494f10a6acb9f449f41e20": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "06866706b5e24918b71ead17a4d85beb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_492562169b04462f9d36375a5e4bb3fb", + "IPY_MODEL_2c4edd5214de459fb2d8862944adb7da", + "IPY_MODEL_0eebb5586f1e4b649bb375e79f5bec63" + ], + "layout": "IPY_MODEL_87160fec94c049fc808034e6cbc652c1" + } + }, + "492562169b04462f9d36375a5e4bb3fb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_246acc02a5e5491c910a48b868d2a295", + "placeholder": "​", + "style": "IPY_MODEL_0fc5be4037a94e92893120f1c5d30b17", + "value": "tokenizer.model: 100%" + } + }, + "2c4edd5214de459fb2d8862944adb7da": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_35491086cd6248da9ce15da93519273a", + "max": 499723, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_af8c9143672c484da309f3e8c486a118", + "value": 499723 + } + }, + "0eebb5586f1e4b649bb375e79f5bec63": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_36b7aa8e2c7249278838c6190fd8af98", + "placeholder": "​", + "style": "IPY_MODEL_973354c51a6244a9812dcd3a62b4a55a", + "value": " 500k/500k [00:00<00:00, 3.44MB/s]" + } + }, + "87160fec94c049fc808034e6cbc652c1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "246acc02a5e5491c910a48b868d2a295": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0fc5be4037a94e92893120f1c5d30b17": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "35491086cd6248da9ce15da93519273a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "af8c9143672c484da309f3e8c486a118": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "36b7aa8e2c7249278838c6190fd8af98": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "973354c51a6244a9812dcd3a62b4a55a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "w4eS0-nNc944", + "outputId": "fcd66806-3c37-4101-e77b-5d5c43061aab" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'SpQR'...\n", + "remote: Enumerating objects: 1511, done.\u001b[K\n", + "remote: Counting objects: 100% (298/298), done.\u001b[K\n", + "remote: Compressing objects: 100% (131/131), done.\u001b[K\n", + "remote: Total 1511 (delta 216), reused 167 (delta 167), pack-reused 1213 (from 2)\u001b[K\n", + "Receiving objects: 100% (1511/1511), 22.35 MiB | 11.05 MiB/s, done.\n", + "Resolving deltas: 100% (544/544), done.\n", + "/content/SpQR\n" + ] + } + ], + "source": [ + "!git clone https://github.com/Vahe1994/SpQR.git\n", + "%cd SpQR" + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install -r requirements.txt" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gka6qcIfdZDi", + "outputId": "7f677c82-1674-4f55-da01-843ff1e99b8d" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: safetensors in /usr/local/lib/python3.11/dist-packages (from -r requirements.txt (line 1)) (0.5.3)\n", + "Collecting datasets>=2.16.1 (from -r requirements.txt (line 2))\n", + " Downloading datasets-4.0.0-py3-none-any.whl.metadata (19 kB)\n", + "Collecting sentencepiece==0.1.97 (from -r requirements.txt (line 3))\n", + " Downloading sentencepiece-0.1.97.tar.gz (524 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.7/524.7 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: transformers>=4.36 in /usr/local/lib/python3.11/dist-packages (from -r requirements.txt (line 4)) (4.53.2)\n", + "Requirement already satisfied: accelerate>=0.26.0 in /usr/local/lib/python3.11/dist-packages (from -r requirements.txt (line 5)) (1.9.0)\n", + "Requirement already satisfied: tokenizers>=0.14.1 in /usr/local/lib/python3.11/dist-packages (from -r requirements.txt (line 6)) (0.21.2)\n", + "Requirement already satisfied: torch>=2.4.0 in /usr/local/lib/python3.11/dist-packages (from -r requirements.txt (line 7)) (2.6.0+cu124)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.1->-r requirements.txt (line 2)) (3.18.0)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.1->-r requirements.txt (line 2)) (2.0.2)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.1->-r requirements.txt (line 2)) (18.1.0)\n", + "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.1->-r requirements.txt (line 2)) (0.3.7)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.1->-r requirements.txt (line 2)) (2.2.2)\n", + "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.1->-r requirements.txt (line 2)) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.1->-r requirements.txt (line 2)) (4.67.1)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.1->-r requirements.txt (line 2)) (3.5.0)\n", + "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.1->-r requirements.txt (line 2)) (0.70.15)\n", + "Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets>=2.16.1->-r requirements.txt (line 2))\n", + " Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: huggingface-hub>=0.24.0 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.1->-r requirements.txt (line 2)) (0.33.4)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.1->-r requirements.txt (line 2)) (25.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.1->-r requirements.txt (line 2)) (6.0.2)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.36->-r requirements.txt (line 4)) (2024.11.6)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from accelerate>=0.26.0->-r requirements.txt (line 5)) (5.9.5)\n", + "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->-r requirements.txt (line 7)) (4.14.1)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->-r requirements.txt (line 7)) (3.5)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->-r requirements.txt (line 7)) (3.1.6)\n", + "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.4.0->-r requirements.txt (line 7))\n", + " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.4.0->-r requirements.txt (line 7))\n", + " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.4.0->-r requirements.txt (line 7))\n", + " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.4.0->-r requirements.txt (line 7))\n", + " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.4.0->-r requirements.txt (line 7))\n", + " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.4.0->-r requirements.txt (line 7))\n", + " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-curand-cu12==10.3.5.147 (from torch>=2.4.0->-r requirements.txt (line 7))\n", + " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=2.4.0->-r requirements.txt (line 7))\n", + " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch>=2.4.0->-r requirements.txt (line 7))\n", + " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->-r requirements.txt (line 7)) (0.6.2)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->-r requirements.txt (line 7)) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->-r requirements.txt (line 7)) (12.4.127)\n", + "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch>=2.4.0->-r requirements.txt (line 7))\n", + " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->-r requirements.txt (line 7)) (3.2.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->-r requirements.txt (line 7)) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.4.0->-r requirements.txt (line 7)) (1.3.0)\n", + "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets>=2.16.1->-r requirements.txt (line 2)) (3.11.15)\n", + "Requirement already satisfied: hf-xet<2.0.0,>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.24.0->datasets>=2.16.1->-r requirements.txt (line 2)) (1.1.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets>=2.16.1->-r requirements.txt (line 2)) (3.4.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets>=2.16.1->-r requirements.txt (line 2)) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets>=2.16.1->-r requirements.txt (line 2)) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets>=2.16.1->-r requirements.txt (line 2)) (2025.7.14)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=2.4.0->-r requirements.txt (line 7)) (3.0.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets>=2.16.1->-r requirements.txt (line 2)) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets>=2.16.1->-r requirements.txt (line 2)) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets>=2.16.1->-r requirements.txt (line 2)) (2025.2)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets>=2.16.1->-r requirements.txt (line 2)) (2.6.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets>=2.16.1->-r requirements.txt (line 2)) (1.4.0)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets>=2.16.1->-r requirements.txt (line 2)) (25.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets>=2.16.1->-r requirements.txt (line 2)) (1.7.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets>=2.16.1->-r requirements.txt (line 2)) (6.6.3)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets>=2.16.1->-r requirements.txt (line 2)) (0.3.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets>=2.16.1->-r requirements.txt (line 2)) (1.20.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets>=2.16.1->-r requirements.txt (line 2)) (1.17.0)\n", + "Downloading datasets-4.0.0-py3-none-any.whl (494 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m494.8/494.8 kB\u001b[0m \u001b[31m36.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m36.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m35.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m43.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m15.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m102.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m193.6/193.6 kB\u001b[0m \u001b[31m18.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: sentencepiece\n", + " Building wheel for sentencepiece (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for sentencepiece: filename=sentencepiece-0.1.97-cp311-cp311-linux_x86_64.whl size=1244400 sha256=1781e7c94c7f20ae3b1b23f40ad38c8cd67de9accc47d7413d76dc824f44c204\n", + " Stored in directory: /root/.cache/pip/wheels/04/dd/ab/6e3d4b6b17fe7ca0f54548ae20941c40bb3f15839d66f5598c\n", + "Successfully built sentencepiece\n", + "Installing collected packages: sentencepiece, nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, fsspec, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, datasets\n", + " Attempting uninstall: sentencepiece\n", + " Found existing installation: sentencepiece 0.2.0\n", + " Uninstalling sentencepiece-0.2.0:\n", + " Successfully uninstalled sentencepiece-0.2.0\n", + " Attempting uninstall: nvidia-nvjitlink-cu12\n", + " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", + " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", + " Attempting uninstall: nvidia-curand-cu12\n", + " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", + " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", + " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", + " Attempting uninstall: nvidia-cufft-cu12\n", + " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", + " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", + " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", + " Attempting uninstall: nvidia-cuda-runtime-cu12\n", + " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", + " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cuda-cupti-cu12\n", + " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cublas-cu12\n", + " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", + " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", + " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2025.7.0\n", + " Uninstalling fsspec-2025.7.0:\n", + " Successfully uninstalled fsspec-2025.7.0\n", + " Attempting uninstall: nvidia-cusparse-cu12\n", + " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", + " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", + " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", + " Attempting uninstall: nvidia-cudnn-cu12\n", + " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", + " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", + " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", + " Attempting uninstall: nvidia-cusolver-cu12\n", + " Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n", + " Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n", + " Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n", + " Attempting uninstall: datasets\n", + " Found existing installation: datasets 2.14.4\n", + " Uninstalling datasets-2.14.4:\n", + " Successfully uninstalled datasets-2.14.4\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "gcsfs 2025.7.0 requires fsspec==2025.7.0, but you have fsspec 2025.3.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed datasets-4.0.0 fsspec-2025.3.0 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127 sentencepiece-0.1.97\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install fsspec==2025.7.0" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vuK9njwBdZBw", + "outputId": "e3041577-f9de-4832-fe2d-432ecb9bb032" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting fsspec==2025.7.0\n", + " Downloading fsspec-2025.7.0-py3-none-any.whl.metadata (12 kB)\n", + "Downloading fsspec-2025.7.0-py3-none-any.whl (199 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/199.6 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━\u001b[0m \u001b[32m194.6/199.6 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.6/199.6 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: fsspec\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2025.3.0\n", + " Uninstalling fsspec-2025.3.0:\n", + " Successfully uninstalled fsspec-2025.3.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "datasets 4.0.0 requires fsspec[http]<=2025.3.0,>=2023.1.0, but you have fsspec 2025.7.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed fsspec-2025.7.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install datasets==2.16.1" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z24jgE_udY9X", + "outputId": "b587e0c6-27e0-437d-fd72-7c1fdd9e7fb8" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting datasets==2.16.1\n", + " Downloading datasets-2.16.1-py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets==2.16.1) (3.18.0)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from datasets==2.16.1) (2.0.2)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets==2.16.1) (18.1.0)\n", + "Collecting pyarrow-hotfix (from datasets==2.16.1)\n", + " Downloading pyarrow_hotfix-0.7-py3-none-any.whl.metadata (3.6 kB)\n", + "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets==2.16.1) (0.3.7)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets==2.16.1) (2.2.2)\n", + "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.11/dist-packages (from datasets==2.16.1) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.11/dist-packages (from datasets==2.16.1) (4.67.1)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets==2.16.1) (3.5.0)\n", + "Requirement already satisfied: multiprocess in /usr/local/lib/python3.11/dist-packages (from datasets==2.16.1) (0.70.15)\n", + "Collecting fsspec<=2023.10.0,>=2023.1.0 (from fsspec[http]<=2023.10.0,>=2023.1.0->datasets==2.16.1)\n", + " Downloading fsspec-2023.10.0-py3-none-any.whl.metadata (6.8 kB)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets==2.16.1) (3.11.15)\n", + "Requirement already satisfied: huggingface-hub>=0.19.4 in /usr/local/lib/python3.11/dist-packages (from datasets==2.16.1) (0.33.4)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from datasets==2.16.1) (25.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets==2.16.1) (6.0.2)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets==2.16.1) (2.6.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets==2.16.1) (1.4.0)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets==2.16.1) (25.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets==2.16.1) (1.7.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets==2.16.1) (6.6.3)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets==2.16.1) (0.3.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets==2.16.1) (1.20.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.19.4->datasets==2.16.1) (4.14.1)\n", + "Requirement already satisfied: hf-xet<2.0.0,>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.19.4->datasets==2.16.1) (1.1.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->datasets==2.16.1) (3.4.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->datasets==2.16.1) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->datasets==2.16.1) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->datasets==2.16.1) (2025.7.14)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets==2.16.1) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets==2.16.1) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets==2.16.1) (2025.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets==2.16.1) (1.17.0)\n", + "Downloading datasets-2.16.1-py3-none-any.whl (507 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fsspec-2023.10.0-py3-none-any.whl (166 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.4/166.4 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pyarrow_hotfix-0.7-py3-none-any.whl (7.9 kB)\n", + "Installing collected packages: pyarrow-hotfix, fsspec, datasets\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2025.7.0\n", + " Uninstalling fsspec-2025.7.0:\n", + " Successfully uninstalled fsspec-2025.7.0\n", + " Attempting uninstall: datasets\n", + " Found existing installation: datasets 4.0.0\n", + " Uninstalling datasets-4.0.0:\n", + " Successfully uninstalled datasets-4.0.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "gcsfs 2025.7.0 requires fsspec==2025.7.0, but you have fsspec 2023.10.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed datasets-2.16.1 fsspec-2023.10.0 pyarrow-hotfix-0.7\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install datasets==2.19.1" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZlkUej9ddY7Q", + "outputId": "0a9ea72c-406d-4ed2-841d-51b6668b83c3" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting datasets==2.19.1\n", + " Downloading datasets-2.19.1-py3-none-any.whl.metadata (19 kB)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets==2.19.1) (3.18.0)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from datasets==2.19.1) (2.0.2)\n", + "Requirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets==2.19.1) (18.1.0)\n", + "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.11/dist-packages (from datasets==2.19.1) (0.7)\n", + "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets==2.19.1) (0.3.7)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets==2.19.1) (2.2.2)\n", + "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.11/dist-packages (from datasets==2.19.1) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.11/dist-packages (from datasets==2.19.1) (4.67.1)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets==2.19.1) (3.5.0)\n", + "Requirement already satisfied: multiprocess in /usr/local/lib/python3.11/dist-packages (from datasets==2.19.1) (0.70.15)\n", + "Requirement already satisfied: fsspec<=2024.3.1,>=2023.1.0 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]<=2024.3.1,>=2023.1.0->datasets==2.19.1) (2023.10.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets==2.19.1) (3.11.15)\n", + "Requirement already satisfied: huggingface-hub>=0.21.2 in /usr/local/lib/python3.11/dist-packages (from datasets==2.19.1) (0.33.4)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from datasets==2.19.1) (25.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets==2.19.1) (6.0.2)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets==2.19.1) (2.6.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets==2.19.1) (1.4.0)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets==2.19.1) (25.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets==2.19.1) (1.7.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets==2.19.1) (6.6.3)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets==2.19.1) (0.3.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets==2.19.1) (1.20.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.21.2->datasets==2.19.1) (4.14.1)\n", + "Requirement already satisfied: hf-xet<2.0.0,>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.21.2->datasets==2.19.1) (1.1.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->datasets==2.19.1) (3.4.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->datasets==2.19.1) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->datasets==2.19.1) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->datasets==2.19.1) (2025.7.14)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets==2.19.1) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets==2.19.1) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets==2.19.1) (2025.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets==2.19.1) (1.17.0)\n", + "Downloading datasets-2.19.1-py3-none-any.whl (542 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m542.0/542.0 kB\u001b[0m \u001b[31m11.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: datasets\n", + " Attempting uninstall: datasets\n", + " Found existing installation: datasets 2.16.1\n", + " Uninstalling datasets-2.16.1:\n", + " Successfully uninstalled datasets-2.16.1\n", + "Successfully installed datasets-2.19.1\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import importlib\n", + "\n", + "def print_package_version(package_name: str):\n", + " try:\n", + " package = importlib.import_module(package_name)\n", + "\n", + " if hasattr(package, \"__version__\"):\n", + " print(f\"{package_name} version: {package.__version__}\")\n", + " else:\n", + " print(f\"Package '{package_name}' does not have a __version__ attribute.\")\n", + " except ModuleNotFoundError:\n", + " print(f\"Package '{package_name}' is not installed.\")\n", + " except Exception as e:\n", + " print(f\"Error while checking version of '{package_name}': {e}\")" + ], + "metadata": { + "id": "HiqtOIeXdY41" + }, + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print_package_version(\"datasets\")\n", + "print_package_version(\"sentencepiece\")\n", + "print_package_version(\"transformers\")\n", + "print_package_version(\"accelerate\")\n", + "print_package_version(\"tokenizers\")\n", + "print_package_version(\"safetensors\")\n", + "print_package_version(\"torch\")\n", + "print_package_version(\"gcsfs\")\n", + "print_package_version(\"fsspec\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "G8hUGiG-dY2g", + "outputId": "b977f4ec-df6c-4d91-8e4c-3d966b3b2683" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "datasets version: 2.19.1\n", + "sentencepiece version: 0.1.97\n", + "transformers version: 4.53.2\n", + "accelerate version: 1.9.0\n", + "tokenizers version: 0.21.2\n", + "safetensors version: 0.5.3\n", + "torch version: 2.6.0+cu124\n", + "gcsfs version: 2025.7.0\n", + "fsspec version: 2023.10.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!git lfs install" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tVX9UHiddY0b", + "outputId": "19e318e3-3311-454e-f040-394831d34874" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Updated git hooks.\n", + "Git LFS initialized.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!git clone https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "iAXZ1RMkdYyU", + "outputId": "86338ef9-2a2b-44a8-e551-9427deb5daa9" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'TinyLlama-1.1B-Chat-v1.0'...\n", + "remote: Enumerating objects: 60, done.\u001b[K\n", + "remote: Counting objects: 100% (57/57), done.\u001b[K\n", + "remote: Compressing objects: 100% (57/57), done.\u001b[K\n", + "remote: Total 60 (delta 21), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n", + "Unpacking objects: 100% (60/60), 519.98 KiB | 2.71 MiB/s, done.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "print(os.getcwd())" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yQJ_Ay3WdYwB", + "outputId": "080ff8a9-112e-4738-990f-f97d0fea6081" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/SpQR\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(os.getenv(\"HF_HOME\"))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uhSZozOVld89", + "outputId": "a4ce5c8a-5318-4010-8eb4-47297df11251" + }, + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "None\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from huggingface_hub import snapshot_download\n", + "\n", + "model_repo = \"TinyLlama/TinyLlama-1.1B-Chat-v1.0\"\n", + "local_dir = snapshot_download(repo_id=model_repo)\n", + "print(f\"Model downloaded to: {local_dir}\")\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 514, + "referenced_widgets": [ + "ef495d16dd1a4f23be512d86f4b2800b", + "a71f86b74b7f42d88266be2bc79a2c30", + "cb557d42a7d345c0934604036a93bc10", + "c95e9deefff24265bb7aaefcee32a044", + "7c4e99924b9449bc8a930002d8ba83fc", + "750d241ac10e4d06903aad102164d983", + "aa9cce0280794b9aae6074653ec61636", + "1946cfddd06743b588ae787a0cea42aa", + "c0625a79528f48dc8ee64459d2909b79", + "adb22899471442589285fd41312a503f", + "a6e55e6f062143e7823fc585a530fc56", + "c4f91f5988cf4e2abababec9e5904a73", + "686fe40fc8924f65843cd6029d719bc5", + "34de9719c9ee465493ae92912eff1989", + "a97203da20354f708de631b59fc42e0e", + "e4188c6d219141f7bd1e15dd05d1bb1a", + "1133635a5e6947de89b7ba00339d8fc5", + "b68f3994adc24215bbe0b07329a7a811", + "b4fa7ff09fc149309150fb56acf5cb92", + "6a0623311261475f989c6d8e3e17cb87", + "9fc84d5a4ffd47858ee1ff674d5b2b68", + "351b929d9fd442328f0b9cf8af9c8f0f", + "e0a2fdf5f7c8420abea56470a099cf96", + "80b3597ba1634c1bbc511f8beabef0bb", + "86eb445c83fc4fdd8471ae3c1190d244", + "5f55e430c0e8427b95e0e428074359b8", + "d61162836a35450dbb43622b4d7603b1", + "8b109b26541042fdafef1ce9a0f36472", + "d3a13151ff334b778ad9dc3020b205cb", + "3661e1e07bc1448c998c7fefd5270dab", + "b3b5b109dfec414aba0eecd379a47466", + "38be34645710407e94c18b1f6817ed43", + "8b054e76588144a6ab6c2633a65c5ee0", + "b2719a37c1da4e4baba835fb5906e4e8", + "0d818aaeee424148a2316917e5c069da", + "f54cb3dbfeac4313ad0464ff3fb96dac", + "809986e2166947b38c5eed95a18efa8a", + "55dd322d5c1f443fbdd5a45617a91116", + "bf434f07b35844ca87f9f5c28d16e4a9", + "e730394b81db4c7ca5feeac4a525f344", + "8519cf30feb440eda826f6beaf1646b2", + "1605a710077b412c9a5176cd285d0ea6", + "ad17f55244f34f4e9fc82aafab34b538", + "20dd6544bee74c019ec23272cac6e0ba", + "6b770f28de264dc7bf61588443f4e865", + "6100ed8b785f4550934154598059a33c", + "862f8584e1734baa860be6f77174bbc6", + "f189ddb799c144bcaa85484a4fe270a9", + "3a56d7bcae3646e99f0d5e44f7c62f6d", + "7a6f706aaf2f4d63a43b26e592e31787", + "a103635d634c4aebb5fad37e83b16c6a", + "9588e29345ba46329262a01250007989", + "2afd5aabc2734fee9c4291a96f6b94b4", + "cf5a5122e6464c0089d6d1f0bcd654e4", + "2ce6e39cf73b449db00e73587e8b960d", + "658981420ec74c32b9a5aa2fd12d65fb", + "1680e40f7f634a4dbe26fc80d26d019f", + "7352763c56e44cf6b01e6f35cbc45f6c", + "1a74f2892d284c438dc9f8e979236c46", + "f2620c076e7245e9bb64c047189cef8f", + "e854978dea614a6184cfdbe908dbbb7d", + "62feb19f455347c9ba026157b1aa8c6c", + "c045dbabf97248f2bc92fd511d0914ff", + "a8a4df10db514857b24002267415cbb4", + "48a041c2e9524059b5f36edc99ae77fc", + "53d706caff2f4b968d8ddaed3449475c", + "d22fc1e159ac4ab1a9fa1562af765cc7", + "ab31e4e465b445bc88ccda012e39c9e8", + "69d7dd7f4f9b4bf29f4185ba49abe03a", + "8982da26b3ab4a17801f13d90175a62d", + "2919034923814f11a86b9b6ff6c2f3fa", + "0fe23be8273c448bb454b37342be6085", + "99ebf39523d34ee7976cf685d15f2ba7", + "33f687ead6784ae7a2e15e7e0a0b1036", + "088a7da6b99c4c63b9a3fd15aa25ee0b", + "8b9026ff40284939b49d8d287641df28", + "4b1c7a48d23b46179e9cef5130df6399", + "2643002917114763920f0421fc83bc67", + "affa587d803f4476b39dafba6b465c11", + "0fe75d35713f4e578bb22d57124e6b91", + "cecf91c366e643058d61e6acca9512c1", + "41125f5d4d0d48b080208754c0462524", + "b1937d82a61f4d4fa0431a3c558748ab", + "53ee84296103418dbbc5f3ea0a3d2ad4", + "fd61b8725e3f41e1afe7e7e29d99c417", + "0826103756d94005849f45cdbcb1d9fd", + "efd79e28736c4bd48c856e7646b303ac", + "0265104c4e7747fd9087792de45803c8", + "8c2867ee87534277931486bd4032f7e0", + "95076527f50d4d8fbcd02933393f476a", + "0690ada1b0b44e1b8112de6813666fcf", + "9e386007d0f3409fb650329f81f84d81", + "575806acf4a94ff4b88546d90f486f32", + "bb91269428644df1babd1ae39341ec33", + "a19b5850abca49348553d87cc9695d2b", + "e9ecc447d5aa4a5f9341c94aecdeb403", + "26b2822ca8904593aa2a95d23068fcbb", + "5e2dd7e5d39f4645ad53cb92ee056c02", + "504cf356c80e40c69d80d6af2bda4d19", + "de310b63d1c8418388992fe569036203", + "52c870a107ff45ccac57fedfe40d5135", + "8155e6ea360e41f89e3f8432fc1a5314", + "59c98ddd8a6e44ed88e6785bfcac8821", + "8981e41b49bf4482b5903d3852ac0f02", + "e2c7a889c4704065a862f00c5b0f59d5", + "c2191171dd014e29816ff9da1c29003a", + "b38f03393e084cd8a60fccc37423e806", + "8493e8b0e2d2455cb98161191da9727f", + "6e42adf16d3343cba5d35cf25777598e", + "81a52af510494f10a6acb9f449f41e20", + "06866706b5e24918b71ead17a4d85beb", + "492562169b04462f9d36375a5e4bb3fb", + "2c4edd5214de459fb2d8862944adb7da", + "0eebb5586f1e4b649bb375e79f5bec63", + "87160fec94c049fc808034e6cbc652c1", + "246acc02a5e5491c910a48b868d2a295", + "0fc5be4037a94e92893120f1c5d30b17", + "35491086cd6248da9ce15da93519273a", + "af8c9143672c484da309f3e8c486a118", + "36b7aa8e2c7249278838c6190fd8af98", + "973354c51a6244a9812dcd3a62b4a55a" + ] + }, + "id": "Hd5hXTCrqvmA", + "outputId": "8c324d7d-ec1d-46d3-ade8-e9b3f11e632c" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Fetching 10 files: 0%| | 0/10 [00:00\n", + " quantize_model(model, args, device)\n", + " File \"/content/SpQR/main.py\", line 89, in quantize_model\n", + " results = quantize_spqr(model, dataloader, args, device)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py\", line 116, in decorate_context\n", + " return func(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/content/SpQR/main.py\", line 230, in quantize_spqr\n", + " outs[j] = layer(inps[j].to(layer_dev).unsqueeze(0), **forward_args)[0]\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/modeling_layers.py\", line 83, in __call__\n", + " return super().__call__(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\", line 1739, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\", line 1750, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/models/llama/modeling_llama.py\", line 290, in forward\n", + " hidden_states, self_attn_weights = self.self_attn(\n", + " ^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\", line 1739, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\", line 1750, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/models/llama/modeling_llama.py\", line 235, in forward\n", + " cos, sin = position_embeddings\n", + " ^^^^^^^^\n", + "TypeError: cannot unpack non-iterable NoneType object\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(model.dtype)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "rdZmVaVUldt_", + "outputId": "e19fb060-4d46-4177-caf4-d6c3f0c12129" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "torch.bfloat16\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(model.parameters)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0zbn6B4GldqQ", + "outputId": "00f8a712-17c5-4b51-ec11-9fec18a89ddf" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(model.forward)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SOTz41u8ldlp", + "outputId": "75a24675-93c8-42b2-a885-8932e45f4f0a" + }, + "execution_count": 20, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "To resolve the error, AI suggested to build a Wrapper class that intercepts the call. We tried it out but in the end, the same mistake arised." + ], + "metadata": { + "id": "gGJt1tAEF3xu" + } + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "\n", + "class RotaryWrapper(torch.nn.Module):\n", + " def __init__(self, model):\n", + " super().__init__()\n", + " self.model = model\n", + "\n", + " def forward(self, *args, **kwargs):\n", + " # Inject rotary embeddings before forward\n", + " if \"position_embeddings\" not in kwargs or kwargs[\"position_embeddings\"] is None:\n", + " seq_len = kwargs[\"input_ids\"].shape[1]\n", + " kwargs[\"position_embeddings\"] = self.model.model.rotary_emb(seq_len)\n", + " return self.model(*args, **kwargs)\n" + ], + "metadata": { + "id": "r9kpGVRYldgx" + }, + "execution_count": 21, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "model = RotaryWrapper(model)" + ], + "metadata": { + "id": "j_dlynFildcg" + }, + "execution_count": 22, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!python main.py \"/root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/snapshots/fe8a4ea1ffedaf415f4da2f062534de366a451e6\" \\\n", + " \"pajama\" --wbits 4 --groupsize 16 --perchannel \\\n", + " --outlier_threshold=0.2 --nsamples 128" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ddNqS2WXldaQ", + "outputId": "cdae9172-0774-45cb-8ebf-6dd553b9353f" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "============ Loading model... ============\n", + "Loading pretrained model ...\n", + "2025-07-24 16:43:21.156604: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", + "E0000 00:00:1753375401.189309 4136 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "E0000 00:00:1753375401.195550 4136 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "Model loaded sucessfully ...\n", + "\n", + "============ Quantizing model... ============\n", + "Loading data ...\n", + "Loaded data from pajama; len(data)=128\n", + "\n", + "Starting SPQR quantization ...\n", + "catching inputs from data\n", + "\n", + "---------------- Layer 0 of 22 ----------------\n", + "layer_dev_original=device(type='cpu')\n", + "Traceback (most recent call last):\n", + " File \"/content/SpQR/main.py\", line 582, in \n", + " quantize_model(model, args, device)\n", + " File \"/content/SpQR/main.py\", line 89, in quantize_model\n", + " results = quantize_spqr(model, dataloader, args, device)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py\", line 116, in decorate_context\n", + " return func(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/content/SpQR/main.py\", line 230, in quantize_spqr\n", + " outs[j] = layer(inps[j].to(layer_dev).unsqueeze(0), **forward_args)[0]\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/modeling_layers.py\", line 83, in __call__\n", + " return super().__call__(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\", line 1739, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\", line 1750, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/models/llama/modeling_llama.py\", line 290, in forward\n", + " hidden_states, self_attn_weights = self.self_attn(\n", + " ^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\", line 1739, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\", line 1750, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/models/llama/modeling_llama.py\", line 235, in forward\n", + " cos, sin = position_embeddings\n", + " ^^^^^^^^\n", + "TypeError: cannot unpack non-iterable NoneType object\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "The problem are the rotary embeddings." + ], + "metadata": { + "id": "ZA_NQyaWIq-w" + } + }, + { + "cell_type": "code", + "source": [ + "model.eval()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b4VDusobIqzS", + "outputId": "8940425b-a42b-4d52-be77-4fb2fab9dd99" + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "RotaryWrapper(\n", + " (model): LlamaForCausalLM(\n", + " (model): LlamaModel(\n", + " (embed_tokens): Embedding(32000, 2048)\n", + " (layers): ModuleList(\n", + " (0-21): 22 x LlamaDecoderLayer(\n", + " (self_attn): LlamaAttention(\n", + " (q_proj): Linear(in_features=2048, out_features=2048, bias=False)\n", + " (k_proj): Linear(in_features=2048, out_features=256, bias=False)\n", + " (v_proj): Linear(in_features=2048, out_features=256, bias=False)\n", + " (o_proj): Linear(in_features=2048, out_features=2048, bias=False)\n", + " )\n", + " (mlp): LlamaMLP(\n", + " (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)\n", + " (up_proj): Linear(in_features=2048, out_features=5632, bias=False)\n", + " (down_proj): Linear(in_features=5632, out_features=2048, bias=False)\n", + " (act_fn): SiLU()\n", + " )\n", + " (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n", + " (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n", + " )\n", + " )\n", + " (norm): LlamaRMSNorm((2048,), eps=1e-05)\n", + " (rotary_emb): LlamaRotaryEmbedding()\n", + " )\n", + " (lm_head): Linear(in_features=2048, out_features=32000, bias=False)\n", + " )\n", + ")" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Since the internet does not really help (nothing related to this specific error), we used AI to help resolve this issue.\n", + "\n", + "The first suggestion is to inspect config.json and look for embedding keyword or something similar. In particular, MS Copilot suggested to look for:\n", + "- keyword rope_scaling which is set to linear and a factor, for example 1.0\n", + "- or use_rotary_embeddings: true\n", + "\n", + "Navigating and displaying the content of config.json we see the following output:\n", + "\n", + "\n", + "\n", + "```\n", + "# Als Code formatiert\n", + "{\n", + " \"architectures\": [\n", + " \"LlamaForCausalLM\"\n", + " ],\n", + " \"attention_bias\": false,\n", + " \"bos_token_id\": 1,\n", + " \"eos_token_id\": 2,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 2048,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 5632,\n", + " \"max_position_embeddings\": 2048,\n", + " \"model_type\": \"llama\",\n", + " \"num_attention_heads\": 32,\n", + " \"num_hidden_layers\": 22,\n", + " \"num_key_value_heads\": 4,\n", + " \"pretraining_tp\": 1,\n", + " \"rms_norm_eps\": 1e-05,\n", + " \"rope_scaling\": null,\n", + " \"rope_theta\": 10000.0,\n", + " \"tie_word_embeddings\": false,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.35.0\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 32000\n", + "}\n", + "```\n", + "\n", + "We see that we have the key rope_scaling, however, it is set to null. Seems to be disabled in TinyLlama by default.\n", + "\n", + "I have found a config.json file from another ([ HF repo](https://huggingface.co/elvircrn/Llama-2-7b-SPQR-3Bit-16x16-red_pajama-hf/blob/main/config.json) that specifies more json keys. The idea is to add those keys in my TinyLlama config.json with the hope that this somehow works. In partiluar, the referenced config.json adds \"quantization_config\" key. We will try this.\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "-HoaOqk2L2wr" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "json_file = \"/root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/snapshots/fe8a4ea1ffedaf415f4da2f062534de366a451e6/config.json\"\n", + "\n", + "with open(json_file) as json_file:\n", + " file_contents = json_file.read()\n", + "\n", + "print(file_contents)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kQ2dvNWUL2nA", + "outputId": "c96a4106-7e51-43f6-fa5d-91b899b1bf7f" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{\n", + " \"architectures\": [\n", + " \"LlamaForCausalLM\"\n", + " ],\n", + " \"attention_bias\": false,\n", + " \"bos_token_id\": 1,\n", + " \"eos_token_id\": 2,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 2048,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 5632,\n", + " \"max_position_embeddings\": 2048,\n", + " \"model_type\": \"llama\",\n", + " \"num_attention_heads\": 32,\n", + " \"num_hidden_layers\": 22,\n", + " \"num_key_value_heads\": 4,\n", + " \"pretraining_tp\": 1,\n", + " \"rms_norm_eps\": 1e-05,\n", + " \"rope_scaling\": null,\n", + " \"rope_theta\": 10000.0,\n", + " \"tie_word_embeddings\": false,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.35.0\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 32000\n", + "}\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import json\n", + "\n", + "json_file_path = \"/root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/snapshots/fe8a4ea1ffedaf415f4da2f062534de366a451e6/config.json\"\n", + "\n", + "with open(json_file_path, \"r\") as json_file:\n", + " data = json.load(json_file)\n", + "\n", + "data[\"quantization_config\"] = {\n", + " \"quant_method\": \"spqr\",\n", + " \"beta1\": 16,\n", + " \"beta2\": 16,\n", + " \"bits\": 3\n", + "}\n", + "\n", + "with open(json_file_path, \"w\") as json_file:\n", + " json.dump(data, json_file, indent=2)\n", + "\n", + "with open(json_file_path) as json_file:\n", + " file_contents = json_file.read()\n", + "\n", + "print(file_contents)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fMSxUrt0IqmI", + "outputId": "5e830df0-9a62-49d5-bef8-bc17f19554f1" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{\n", + " \"architectures\": [\n", + " \"LlamaForCausalLM\"\n", + " ],\n", + " \"attention_bias\": false,\n", + " \"bos_token_id\": 1,\n", + " \"eos_token_id\": 2,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 2048,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 5632,\n", + " \"max_position_embeddings\": 2048,\n", + " \"model_type\": \"llama\",\n", + " \"num_attention_heads\": 32,\n", + " \"num_hidden_layers\": 22,\n", + " \"num_key_value_heads\": 4,\n", + " \"pretraining_tp\": 1,\n", + " \"rms_norm_eps\": 1e-05,\n", + " \"rope_scaling\": null,\n", + " \"rope_theta\": 10000.0,\n", + " \"tie_word_embeddings\": false,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.35.0\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 32000,\n", + " \"quantization_config\": {\n", + " \"quant_method\": \"spqr\",\n", + " \"beta1\": 16,\n", + " \"beta2\": 16,\n", + " \"bits\": 3\n", + " }\n", + "}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!python main.py \"/root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/snapshots/fe8a4ea1ffedaf415f4da2f062534de366a451e6\" \\\n", + " \"pajama\" --wbits 4 --groupsize 16 --perchannel \\\n", + " --outlier_threshold=0.2 --nsamples 128" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QMug2uyEldX6", + "outputId": "f17048ad-7903-4ddb-9d58-21dfd55075b1" + }, + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "============ Loading model... ============\n", + "Loading pretrained model ...\n", + "2025-07-24 16:43:34.667534: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", + "E0000 00:00:1753375414.688225 4234 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "E0000 00:00:1753375414.694807 4234 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "Traceback (most recent call last):\n", + " File \"/content/SpQR/main.py\", line 577, in \n", + " model = get_model(args.model_path, args.load, args.dtype).train(False)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/content/SpQR/modelutils.py\", line 45, in get_model\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py\", line 600, in from_pretrained\n", + " return model_class.from_pretrained(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py\", line 311, in _wrapper\n", + " return func(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py\", line 4648, in from_pretrained\n", + " hf_quantizer.validate_environment(\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/quantizers/quantizer_spqr.py\", line 52, in validate_environment\n", + " raise ImportError(\"Using `spqr` quantization requires SpQR: `pip install spqr_quant[gpu]`\")\n", + "ImportError: Using `spqr` quantization requires SpQR: `pip install spqr_quant[gpu]`\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install spqr_quant[gpu]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XTEI-bEzldTM", + "outputId": "46f18a7d-711e-42b5-b93d-ae2e0f445ec2" + }, + "execution_count": 29, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting spqr_quant[gpu]\n", + " Downloading spqr_quant-0.2.0-py3-none-any.whl.metadata (6.4 kB)\n", + "Requirement already satisfied: torch>=2.4.0 in /usr/local/lib/python3.11/dist-packages (from spqr_quant[gpu]) (2.6.0+cu124)\n", + "Requirement already satisfied: transformers>=4.44.0 in /usr/local/lib/python3.11/dist-packages (from spqr_quant[gpu]) (4.53.2)\n", + "Requirement already satisfied: safetensors>=0.4.5 in /usr/local/lib/python3.11/dist-packages (from spqr_quant[gpu]) (0.5.3)\n", + "Collecting ninja (from spqr_quant[gpu])\n", + " Downloading ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.0 kB)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (3.18.0)\n", + "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (4.14.1)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (3.5)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (3.1.6)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (2023.10.0)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (12.4.127)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (12.4.5.8)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (11.2.1.3)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (10.3.5.147)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (11.6.1.9)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (12.3.1.170)\n", + "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (0.6.2)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (12.4.127)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (12.4.127)\n", + "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (3.2.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.4.0->spqr_quant[gpu]) (1.3.0)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.30.0 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (0.33.4)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (2.0.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (25.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (6.0.2)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (2024.11.6)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (2.32.3)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (0.21.2)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (4.67.1)\n", + "Requirement already satisfied: hf-xet<2.0.0,>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub<1.0,>=0.30.0->transformers>=4.44.0->spqr_quant[gpu]) (1.1.5)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=2.4.0->spqr_quant[gpu]) (3.0.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers>=4.44.0->spqr_quant[gpu]) (3.4.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers>=4.44.0->spqr_quant[gpu]) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers>=4.44.0->spqr_quant[gpu]) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers>=4.44.0->spqr_quant[gpu]) (2025.7.14)\n", + "Downloading ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (422 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m422.8/422.8 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading spqr_quant-0.2.0-py3-none-any.whl (20 kB)\n", + "Installing collected packages: ninja, spqr_quant\n", + "Successfully installed ninja-1.11.1.4 spqr_quant-0.2.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!python main.py \"/root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/snapshots/fe8a4ea1ffedaf415f4da2f062534de366a451e6\" \\\n", + " \"pajama\" --wbits 4 --groupsize 16 --perchannel \\\n", + " --outlier_threshold=0.2 --nsamples 128" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b8heHWLJUm13", + "outputId": "2c5175c7-a15e-471a-94aa-da1d3fe0dda9" + }, + "execution_count": 30, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "============ Loading model... ============\n", + "Loading pretrained model ...\n", + "2025-07-24 16:44:47.113163: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", + "E0000 00:00:1753375487.147590 4557 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "E0000 00:00:1753375487.158582 4557 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "Traceback (most recent call last):\n", + " File \"/content/SpQR/main.py\", line 577, in \n", + " model = get_model(args.model_path, args.load, args.dtype).train(False)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/content/SpQR/modelutils.py\", line 45, in get_model\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py\", line 600, in from_pretrained\n", + " return model_class.from_pretrained(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py\", line 311, in _wrapper\n", + " return func(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py\", line 4655, in from_pretrained\n", + " torch_dtype = hf_quantizer.update_torch_dtype(torch_dtype)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/quantizers/quantizer_spqr.py\", line 59, in update_torch_dtype\n", + " raise ValueError(\n", + "ValueError: You cannot use any type other than torch.float16 for SpQR. Please either leave it None or set it totorch.float16 explicitly.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "json_string = '''\n", + "{\n", + " \"architectures\": [\n", + " \"LlamaForCausalLM\"\n", + " ],\n", + " \"attention_bias\": false,\n", + " \"bos_token_id\": 1,\n", + " \"eos_token_id\": 2,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 2048,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 5632,\n", + " \"max_position_embeddings\": 2048,\n", + " \"model_type\": \"llama\",\n", + " \"num_attention_heads\": 32,\n", + " \"num_hidden_layers\": 22,\n", + " \"num_key_value_heads\": 4,\n", + " \"pretraining_tp\": 1,\n", + " \"rms_norm_eps\": 1e-05,\n", + " \"rope_scaling\": null,\n", + " \"rope_theta\": 10000.0,\n", + " \"tie_word_embeddings\": false,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.35.0\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 32000,\n", + " \"quantization_config\": {\n", + " \"quant_method\": \"spqr\",\n", + " \"beta1\": 16,\n", + " \"beta2\": 16,\n", + " \"bits\": 3\n", + " }\n", + "}\n", + "'''\n", + "\n", + "data = json.loads(json_string)\n", + "data[\"torch_dtype\"] = \"bfloat16\"\n", + "updated_str = json.dumps(data, indent=2)\n", + "print(updated_str)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OfsLodJ1Umyv", + "outputId": "379a4a77-f8a6-4002-eabb-e7b1148dc352" + }, + "execution_count": 44, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{\n", + " \"architectures\": [\n", + " \"LlamaForCausalLM\"\n", + " ],\n", + " \"attention_bias\": false,\n", + " \"bos_token_id\": 1,\n", + " \"eos_token_id\": 2,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 2048,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 5632,\n", + " \"max_position_embeddings\": 2048,\n", + " \"model_type\": \"llama\",\n", + " \"num_attention_heads\": 32,\n", + " \"num_hidden_layers\": 22,\n", + " \"num_key_value_heads\": 4,\n", + " \"pretraining_tp\": 1,\n", + " \"rms_norm_eps\": 1e-05,\n", + " \"rope_scaling\": null,\n", + " \"rope_theta\": 10000.0,\n", + " \"tie_word_embeddings\": false,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.35.0\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 32000,\n", + " \"quantization_config\": {\n", + " \"quant_method\": \"spqr\",\n", + " \"beta1\": 16,\n", + " \"beta2\": 16,\n", + " \"bits\": 3\n", + " }\n", + "}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "xAp8jlPLX04m" + }, + "execution_count": 42, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!python main.py \"/root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/snapshots/fe8a4ea1ffedaf415f4da2f062534de366a451e6\" \\\n", + " \"pajama\" --wbits 4 --groupsize 16 --perchannel \\\n", + " --outlier_threshold=0.2 --nsamples 128" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "chqw-1G6UmwH", + "outputId": "721c2c70-785d-4de4-e66d-5c35a746cad4" + }, + "execution_count": 43, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "============ Loading model... ============\n", + "Loading pretrained model ...\n", + "2025-07-24 17:00:29.814365: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", + "E0000 00:00:1753376429.834383 8639 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "E0000 00:00:1753376429.841005 8639 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "Traceback (most recent call last):\n", + " File \"/content/SpQR/main.py\", line 577, in \n", + " model = get_model(args.model_path, args.load, args.dtype).train(False)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/content/SpQR/modelutils.py\", line 45, in get_model\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py\", line 600, in from_pretrained\n", + " return model_class.from_pretrained(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py\", line 311, in _wrapper\n", + " return func(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py\", line 4655, in from_pretrained\n", + " torch_dtype = hf_quantizer.update_torch_dtype(torch_dtype)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/transformers/quantizers/quantizer_spqr.py\", line 59, in update_torch_dtype\n", + " raise ValueError(\n", + "ValueError: You cannot use any type other than torch.float16 for SpQR. Please either leave it None or set it totorch.float16 explicitly.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install spqr_quant[gpu]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ts59qeGLd_I9", + "outputId": "cdd406aa-e534-4e68-b2b4-62853ee66b79" + }, + "execution_count": 49, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: spqr_quant[gpu] in /usr/local/lib/python3.11/dist-packages (0.2.0)\n", + "Requirement already satisfied: torch>=2.4.0 in /usr/local/lib/python3.11/dist-packages (from spqr_quant[gpu]) (2.6.0+cu124)\n", + "Requirement already satisfied: transformers>=4.44.0 in /usr/local/lib/python3.11/dist-packages (from spqr_quant[gpu]) (4.53.2)\n", + "Requirement already satisfied: safetensors>=0.4.5 in /usr/local/lib/python3.11/dist-packages (from spqr_quant[gpu]) (0.5.3)\n", + "Requirement already satisfied: ninja in /usr/local/lib/python3.11/dist-packages (from spqr_quant[gpu]) (1.11.1.4)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (3.18.0)\n", + "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (4.14.1)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (3.5)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (3.1.6)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (2023.10.0)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (12.4.127)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (12.4.5.8)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (11.2.1.3)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (10.3.5.147)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (11.6.1.9)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (12.3.1.170)\n", + "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (0.6.2)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (12.4.127)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (12.4.127)\n", + "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (3.2.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->spqr_quant[gpu]) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.4.0->spqr_quant[gpu]) (1.3.0)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.30.0 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (0.33.4)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (2.0.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (25.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (6.0.2)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (2024.11.6)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (2.32.3)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (0.21.2)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.44.0->spqr_quant[gpu]) (4.67.1)\n", + "Requirement already satisfied: hf-xet<2.0.0,>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub<1.0,>=0.30.0->transformers>=4.44.0->spqr_quant[gpu]) (1.1.5)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=2.4.0->spqr_quant[gpu]) (3.0.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers>=4.44.0->spqr_quant[gpu]) (3.4.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers>=4.44.0->spqr_quant[gpu]) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers>=4.44.0->spqr_quant[gpu]) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers>=4.44.0->spqr_quant[gpu]) (2025.7.14)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from transformers import AutoModelForCausalLM, AutoTokenizer\n", + "\n", + "model_name_path = local_dir\n", + "\n", + "model_dtype = AutoModelForCausalLM.from_pretrained(model_name_path,\n", + " torch_dtype=torch.float16,\n", + " device_map=\"auto\",\n", + " trust_remote_code=True\n", + " )\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 578 + }, + "id": "ZkoJFkweUmtv", + "outputId": "18b09a5c-3f43-4dc5-b7e5-99a3163b26cd" + }, + "execution_count": 52, + "outputs": [ + { + "output_type": "error", + "ename": "ImportError", + "evalue": "Using `spqr` quantization requires SpQR: `pip install spqr_quant[gpu]`", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipython-input-52-2491796096.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mmodel_name_path\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlocal_dir\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m model_dtype = AutoModelForCausalLM.from_pretrained(model_name_path, \n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mtorch_dtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat16\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mdevice_map\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"auto\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 598\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmodel_class\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig_class\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msub_configs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"text_config\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 599\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_text_config\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 600\u001b[0;31m return model_class.from_pretrained(\n\u001b[0m\u001b[1;32m 601\u001b[0m \u001b[0mpretrained_model_name_or_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mmodel_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mhub_kwargs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 602\u001b[0m )\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py\u001b[0m in \u001b[0;36m_wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[0mold_dtype\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_default_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 310\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 311\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 312\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 313\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_default_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mold_dtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 4646\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4647\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhf_quantizer\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4648\u001b[0;31m hf_quantizer.validate_environment(\n\u001b[0m\u001b[1;32m 4649\u001b[0m \u001b[0mtorch_dtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtorch_dtype\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4650\u001b[0m \u001b[0mfrom_tf\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfrom_tf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/transformers/quantizers/quantizer_spqr.py\u001b[0m in \u001b[0;36mvalidate_environment\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_spqr_available\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 52\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mImportError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Using `spqr` quantization requires SpQR: `pip install spqr_quant[gpu]`\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 53\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mupdate_torch_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtorch_dtype\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"torch.dtype\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;34m\"torch.dtype\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mImportError\u001b[0m: Using `spqr` quantization requires SpQR: `pip install spqr_quant[gpu]`", + "", + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n" + ], + "errorDetails": { + "actions": [ + { + "action": "open_url", + "actionText": "Open Examples", + "url": "/notebooks/snippets/importing_libraries.ipynb" + } + ] + } + } + ] + }, + { + "cell_type": "code", + "source": [ + "import spqr_quant" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "aob87KQfUmrA", + "outputId": "0a285444-eb56-4576-be47-b955b13a3054" + }, + "execution_count": 54, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/torch/utils/cpp_extension.py:2059: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. \n", + "If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "error", + "ename": "RuntimeError", + "evalue": "Error building extension 'spqr_cuda': [1/3] /usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output spqr_cuda_kernel.cuda.o.d -DTORCH_EXTENSION_NAME=spqr_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -isystem /usr/local/lib/python3.11/dist-packages/torch/include -isystem /usr/local/lib/python3.11/dist-packages/torch/include/torch/csrc/api/include -isystem /usr/local/lib/python3.11/dist-packages/torch/include/TH -isystem /usr/local/lib/python3.11/dist-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /usr/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_75,code=compute_75 -gencode=arch=compute_75,code=sm_75 --compiler-options '-fPIC' -O3 -arch=native -lineinfo -std=c++17 -c /usr/local/lib/python3.11/dist-packages/spqr_quant/inference_kernels/spqr_cuda_kernel.cu -o spqr_cuda_kernel.cuda.o \nFAILED: spqr_cuda_kernel.cuda.o \n/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output spqr_cuda_kernel.cuda.o.d -DTORCH_EXTENSION_NAME=spqr_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -isystem /usr/local/lib/python3.11/dist-packages/torch/include -isystem /usr/local/lib/python3.11/dist-packages/torch/include/torch/csrc/api/include -isystem /usr/local/lib/python3.11/dist-packages/torch/include/TH -isystem /usr/local/lib/python3.11/dist-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /usr/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_75,code=compute_75 -gencode=arch=compute_75,code=sm_75 --compiler-options '-fPIC' -O3 -arch=native -lineinfo -std=c++17 -c /usr/local/lib/python3.11/dist-packages/spqr_quant/inference_kernels/spqr_cuda_kernel.cu -o spqr_cuda_kernel.cuda.o \n/usr/local/lib/python3.11/dist-packages/spqr_quant/inference_kernels/spqr_cuda_kernel.cu(162): warning #177-D: variable \"SHARED_OFFSET\" was declared but never referenced\n static constexpr u32 SHARED_OFFSET = 32;\n ^\n\nRemark: The warnings can be suppressed with \"-diag-suppress \"\n\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 1315; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 2719; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 4115; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 5516; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 6941; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 8444; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 9876; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 11300; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 12727; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 14178; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 15594; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 17012; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 18422; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 19837; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 21276; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 22793; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 24239; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 25677; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 27118; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 28583; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas fatal : Ptx assembly aborted due to errors\n[2/3] c++ -MMD -MF spqr_cuda.o.d -DTORCH_EXTENSION_NAME=spqr_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -isystem /usr/local/lib/python3.11/dist-packages/torch/include -isystem /usr/local/lib/python3.11/dist-packages/torch/include/torch/csrc/api/include -isystem /usr/local/lib/python3.11/dist-packages/torch/include/TH -isystem /usr/local/lib/python3.11/dist-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /usr/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -c /usr/local/lib/python3.11/dist-packages/spqr_quant/inference_kernels/spqr_cuda.cpp -o spqr_cuda.o \nIn file included from /usr/local/lib/python3.11/dist-packages/spqr_quant/inference_kernels/spqr_cuda.cpp:17:\n/usr/local/lib/python3.11/dist-packages/spqr_quant/inference_kernels/common.cuh:89:31: warning: ‘maybe_unused’ attribute ignored [-Wattributes]\n 89 | [[maybe_unused]] int sanity{};\n | ^\nninja: build stopped: subcommand failed.\n", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mCalledProcessError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/utils/cpp_extension.py\u001b[0m in \u001b[0;36m_run_ninja_build\u001b[0;34m(build_directory, verbose, error_prefix)\u001b[0m\n\u001b[1;32m 2208\u001b[0m \u001b[0mstdout_fileno\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2209\u001b[0;31m subprocess.run(\n\u001b[0m\u001b[1;32m 2210\u001b[0m \u001b[0mcommand\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/lib/python3.11/subprocess.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(input, capture_output, timeout, check, *popenargs, **kwargs)\u001b[0m\n\u001b[1;32m 570\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcheck\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mretcode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 571\u001b[0;31m raise CalledProcessError(retcode, process.args,\n\u001b[0m\u001b[1;32m 572\u001b[0m output=stdout, stderr=stderr)\n", + "\u001b[0;31mCalledProcessError\u001b[0m: Command '['ninja', '-v']' returned non-zero exit status 1.", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipython-input-54-1571779815.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mspqr_quant\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/spqr_quant/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0minference\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mQuantizedLinear\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0minference_kernels\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mget_spqr_mul_fused\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mget_spqr_mul_timer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mget_torch_mul_timer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/spqr_quant/inference.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtorch\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mTensor\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m from .inference_kernels.cuda_kernel import (\n\u001b[0m\u001b[1;32m 22\u001b[0m \u001b[0mcall_dequantize_compressed\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mcall_spqr_mul\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/spqr_quant/inference_kernels/cuda_kernel.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mCUDA_FOLDER\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdirname\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mabspath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m__file__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m SPQR_CUDA = load(\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"spqr_cuda\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0msources\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCUDA_FOLDER\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"spqr_cuda.cpp\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCUDA_FOLDER\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"spqr_cuda_kernel.cu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/utils/cpp_extension.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(name, sources, extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths, build_directory, verbose, with_cuda, is_python_module, is_standalone, keep_intermediates)\u001b[0m\n\u001b[1;32m 1378\u001b[0m ... verbose=True)\n\u001b[1;32m 1379\u001b[0m \"\"\"\n\u001b[0;32m-> 1380\u001b[0;31m return _jit_compile(\n\u001b[0m\u001b[1;32m 1381\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1382\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0msources\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msources\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0msources\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/utils/cpp_extension.py\u001b[0m in \u001b[0;36m_jit_compile\u001b[0;34m(name, sources, extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths, build_directory, verbose, with_cuda, is_python_module, is_standalone, keep_intermediates)\u001b[0m\n\u001b[1;32m 1796\u001b[0m \u001b[0msources\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhipified_sources\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1797\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1798\u001b[0;31m _write_ninja_file_and_build_library(\n\u001b[0m\u001b[1;32m 1799\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1800\u001b[0m \u001b[0msources\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msources\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/utils/cpp_extension.py\u001b[0m in \u001b[0;36m_write_ninja_file_and_build_library\u001b[0;34m(name, sources, extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths, build_directory, verbose, with_cuda, is_standalone)\u001b[0m\n\u001b[1;32m 1924\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1925\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'Building extension module {name}...'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstderr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1926\u001b[0;31m _run_ninja_build(\n\u001b[0m\u001b[1;32m 1927\u001b[0m \u001b[0mbuild_directory\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1928\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/utils/cpp_extension.py\u001b[0m in \u001b[0;36m_run_ninja_build\u001b[0;34m(build_directory, verbose, error_prefix)\u001b[0m\n\u001b[1;32m 2223\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'output'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# type: ignore[union-attr]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2224\u001b[0m \u001b[0mmessage\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34mf\": {error.output.decode(*SUBPROCESS_DECODE_ARGS)}\"\u001b[0m \u001b[0;31m# type: ignore[union-attr]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2225\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2226\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2227\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mRuntimeError\u001b[0m: Error building extension 'spqr_cuda': [1/3] /usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output spqr_cuda_kernel.cuda.o.d -DTORCH_EXTENSION_NAME=spqr_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -isystem /usr/local/lib/python3.11/dist-packages/torch/include -isystem /usr/local/lib/python3.11/dist-packages/torch/include/torch/csrc/api/include -isystem /usr/local/lib/python3.11/dist-packages/torch/include/TH -isystem /usr/local/lib/python3.11/dist-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /usr/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_75,code=compute_75 -gencode=arch=compute_75,code=sm_75 --compiler-options '-fPIC' -O3 -arch=native -lineinfo -std=c++17 -c /usr/local/lib/python3.11/dist-packages/spqr_quant/inference_kernels/spqr_cuda_kernel.cu -o spqr_cuda_kernel.cuda.o \nFAILED: spqr_cuda_kernel.cuda.o \n/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output spqr_cuda_kernel.cuda.o.d -DTORCH_EXTENSION_NAME=spqr_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -isystem /usr/local/lib/python3.11/dist-packages/torch/include -isystem /usr/local/lib/python3.11/dist-packages/torch/include/torch/csrc/api/include -isystem /usr/local/lib/python3.11/dist-packages/torch/include/TH -isystem /usr/local/lib/python3.11/dist-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /usr/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_75,code=compute_75 -gencode=arch=compute_75,code=sm_75 --compiler-options '-fPIC' -O3 -arch=native -lineinfo -std=c++17 -c /usr/local/lib/python3.11/dist-packages/spqr_quant/inference_kernels/spqr_cuda_kernel.cu -o spqr_cuda_kernel.cuda.o \n/usr/local/lib/python3.11/dist-packages/spqr_quant/inference_kernels/spqr_cuda_kernel.cu(162): warning #177-D: variable \"SHARED_OFFSET\" was declared but never referenced\n static constexpr u32 SHARED_OFFSET = 32;\n ^\n\nRemark: The warnings can be suppressed with \"-diag-suppress \"\n\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 1315; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 2719; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 4115; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 5516; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 6941; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 8444; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 9876; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 11300; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 12727; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 14178; error : Feature 'cp.async.wait_all' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 15594; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 17012; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 18422; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 19837; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 21276; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 22793; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 24239; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 25677; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 27118; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas /tmp/tmpxft_0000469e_00000000-7_spqr_cuda_kernel.ptx, line 28583; error : Feature 'cp.async.wait_group' requires .target sm_80 or higher\nptxas fatal : Ptx assembly aborted due to errors\n[2/3] c++ -MMD -MF spqr_cuda.o.d -DTORCH_EXTENSION_NAME=spqr_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -isystem /usr/local/lib/python3.11/dist-packages/torch/include -isystem /usr/local/lib/python3.11/dist-packages/torch/include/torch/csrc/api/include -isystem /usr/local/lib/python3.11/dist-packages/torch/include/TH -isystem /usr/local/lib/python3.11/dist-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /usr/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -c /usr/local/lib/python3.11/dist-packages/spqr_quant/inference_kernels/spqr_cuda.cpp -o spqr_cuda.o \nIn file included from /usr/local/lib/python3.11/dist-packages/spqr_quant/inference_kernels/spqr_cuda.cpp:17:\n/usr/local/lib/python3.11/dist-packages/spqr_quant/inference_kernels/common.cuh:89:31: warning: ‘maybe_unused’ attribute ignored [-Wattributes]\n 89 | [[maybe_unused]] int sanity{};\n | ^\nninja: build stopped: subcommand failed.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "3K0n9qecUmof" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "s_F_3NTkUml4" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "J0a1GvMQUmjX" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "Wc13uMCbUmg6" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "YWKQEfGLUmbx" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "VGV4tKjQUmZR" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "sRPPPAyXUmWZ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "Me7cQ7aVdYqn" + }, + "execution_count": 27, + "outputs": [] + } + ] +} \ No newline at end of file