Source/colab/Untitled1.ipynb


			
				
					
					
						
						
							
							
							{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyOuI/HFRXnL2TxCZhFMJvEF"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"fP-707OLlJF-"},"outputs":[],"source":["!pip install transformers[sentencepiece]==4.41.2 torch gradiO python-dotenv"]},{"cell_type":"code","source":["import gradio as gr\n","from transformers import pipeline\n","from PIL import Image\n","\n","captioner = pipeline(\"image-to-text\")\n","\n","def echo(message, history) :\n","\n","    # message {'text': 'text', 'files': []}\n","    # message {'text': '', 'files': ['/private/var/folders/_x/jfhn2s8d6t512jvv5lndzw2m0000gn/T/gradio/f299e1f235318c3c17f47efef61fabb3f52e14158f3c4e1a2f15b1cf0fb8329d/Gemini_Generated_Image_e9k4qje9k4qje9k4.png']}\n","\n","    # History[]\n","    # History[{'role': 'user', 'metadata': None, 'content': [{'text': 'text', 'type': 'text'}], 'options': None}, {'role': 'assistant', ']\n","    print(\"message\", message)\n","    print(\"history\", history)\n","\n","    # message에서 텍스트와 이미지 분리\n","    text = message[\"text\"]\n","\n","    if message.get(\"files\") :\n","        image = message[\"files\"][0]\n","        if image :\n","            # 이미지만\n","            result = captioner(image)\n","            return result[0]['generated_text']\n","    elif text :\n","        return text\n","\n","\n","\n","demo = gr.ChatInterface(\n","    fn=echo,\n","    multimodal=True,\n","    title=\"멀티 모달 AI 챗봇\",\n","    description=\"이미지를 업로드하면 이미지에 대한 설명을 생성하는 챗봇입니다. 테스트로 질문도 가능합니다.\",\n",")\n","demo.launch()"],"metadata":{"id":"XbKK4cPklO8T"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["import gradio as gr\n","from transformers import pipeline\n","from PIL import Image\n","\n","captioner = pipeline(\"image-to-text\")\n","\n","def chat(message, history) :\n","\n","    # message {'text': 'text', 'files': []}\n","    # message {'text': '', 'files': ['/private/var/folders/_x/jfhn2s8d6t512jvv5lndzw2m0000gn/T/gradio/f299e1f235318c3c17f47efef61fabb3f52e14158f3c4e1a2f15b1cf0fb8329d/Gemini_Generated_Image_e9k4qje9k4qje9k4.png']}\n","\n","    # History[]\n","    # History[{'role': 'user', 'metadata': None, 'content': [{'text': 'text', 'type': 'text'}], 'options': None}, {'role': 'assistant', ']\n","    print(\"message\", message)\n","    print(\"history\", history)\n","\n","    # message에서 텍스트와 이미지 분리\n","    text = message[\"text\"]\n","\n","    if message.get(\"files\") :\n","        image = message[\"files\"][0]\n","        if image :\n","            result = captioner(image)\n","            caption_result = result[0][\"generated_text\"]\n","            prompt = f\"\"\"\n","            이미지 설명:\n","            {caption_result}\n","\n","            사용자 질문:\n","            {text}\n","            \"\"\"\n","            return prompt\n","    elif text :\n","        return text\n","\n","\n","\n","demo = gr.ChatInterface(\n","    fn=chat,\n","    multimodal=True,\n","    title=\"멀티 모달 AI 챗봇\",\n","    description=\"이미지를 업로드하면 이미지에 대한 설명을 생성하는 챗봇입니다. 테스트로 질문도 가능합니다.\",\n",")\n","demo.launch()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":428},"id":"fuTWrcWQ9HyW","executionInfo":{"status":"error","timestamp":1779256780357,"user_tz":-540,"elapsed":44195,"user":{"displayName":"쿠니","userId":"18312388301484735023"}},"outputId":"9ff30f5c-eaff-4e1c-85bf-dde225532400"},"execution_count":1,"outputs":[{"output_type":"error","ename":"KeyError","evalue":"\"Unknown task image-to-text, available tasks are ['any-to-any', 'audio-classification', 'automatic-speech-recognition', 'depth-estimation', 'document-question-answering', 'feature-extraction', 'fill-mask', 'image-classification', 'image-feature-extraction', 'image-segmentation', 'image-text-to-text', 'image-to-image', 'keypoint-matching', 'mask-generation', 'ner', 'object-detection', 'question-answering', 'sentiment-analysis', 'table-question-answering', 'text-classification', 'text-generation', 'text-to-audio', 'text-to-speech', 'token-classification', 'video-classification', 'visual-question-answering', 'vqa', 'zero-shot-audio-classification', 'zero-shot-classification', 'zero-shot-image-classification', 'zero-shot-object-detection', 'translation_XX_to_YY']\"","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)","\u001b[0;32m/tmp/ipykernel_4438/1356788246.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mPIL\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mImage\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mcaptioner\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"image-to-text\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mchat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhistory\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.12/dist-packages/transformers/pipelines/__init__.py\u001b[0m in \u001b[0;36mpipeline\u001b[0;34m(task, model, config, tokenizer, feature_extractor, image_processor, processor, revision, use_fast, token, device, device_map, dtype, trust_remote_code, model_kwargs, pipeline_class, **kwargs)\u001b[0m\n\u001b[1;32m    775\u001b[0m             )\n\u001b[1;32m    776\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 777\u001b[0;31m         \u001b[0mnormalized_task\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtargeted_task\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtask_options\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_task\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    778\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mpipeline_class\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    779\u001b[0m             \u001b[0mpipeline_class\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtargeted_task\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"impl\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.12/dist-packages/transformers/pipelines/__init__.py\u001b[0m in \u001b[0;36mcheck_task\u001b[0;34m(task)\u001b[0m\n\u001b[1;32m    379\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    380\u001b[0m     \"\"\"\n\u001b[0;32m--> 381\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mPIPELINE_REGISTRY\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_task\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    382\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    383\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.12/dist-packages/transformers/pipelines/base.py\u001b[0m in \u001b[0;36mcheck_task\u001b[0;34m(self, task)\u001b[0m\n\u001b[1;32m   1354\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Invalid translation task {task}, use 'translation_XX_to_YY' format\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1355\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1356\u001b[0;31m         raise KeyError(\n\u001b[0m\u001b[1;32m   1357\u001b[0m             \u001b[0;34mf\"Unknown task {task}, available tasks are {self.get_supported_tasks() + ['translation_XX_to_YY']}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1358\u001b[0m         )\n","\u001b[0;31mKeyError\u001b[0m: \"Unknown task image-to-text, available tasks are ['any-to-any', 'audio-classification', 'automatic-speech-recognition', 'depth-estimation', 'document-question-answering', 'feature-extraction', 'fill-mask', 'image-classification', 'image-feature-extraction', 'image-segmentation', 'image-text-to-text', 'image-to-image', 'keypoint-matching', 'mask-generation', 'ner', 'object-detection', 'question-answering', 'sentiment-analysis', 'table-question-answering', 'text-classification', 'text-generation', 'text-to-audio', 'text-to-speech', 'token-classification', 'video-classification', 'visual-question-answering', 'vqa', 'zero-shot-audio-classification', 'zero-shot-classification', 'zero-shot-image-classification', 'zero-shot-object-detection', 'translation_XX_to_YY']\""]}]},{"cell_type":"code","source":["import gradio as gr\n","from transformers import pipeline\n","from PIL import Image\n","\n","captioner = pipeline(\"image-to-text\")\n","generator = pipeline(\"text-generation\")\n","\n","def echo(message, history) :\n","\n","    # message {'text': 'text', 'files': []}\n","    # message {'text': '', 'files': ['/private/var/folders/_x/jfhn2s8d6t512jvv5lndzw2m0000gn/T/gradio/f299e1f235318c3c17f47efef61fabb3f52e14158f3c4e1a2f15b1cf0fb8329d/Gemini_Generated_Image_e9k4qje9k4qje9k4.png']}\n","\n","    # History[]\n","    # History[{'role': 'user', 'metadata': None, 'content': [{'text': 'text', 'type': 'text'}], 'options': None}, {'role': 'assistant', ']\n","    print(\"message\", message)\n","    print(\"history\", history)\n","\n","    # message에서 텍스트와 이미지 분리\n","    text = message[\"text\"]\n","\n","    if message.get(\"files\") :\n","        image = message[\"files\"][0]\n","        if image :\n","            # 이미지만\n","            result = captioner(image)\n","            return result[0]['generated_text']\n","    elif text :\n","\n","        # caption_result 값이 있다면 적절한 문장 생성하도록 만들기\n","        generator(\"In this course, we will teach you how to\")\n","\n","        return text\n","\n","\n","\n","demo = gr.ChatInterface(\n","    fn=echo,\n","    multimodal=True,\n","    title=\"멀티 모달 AI 챗봇\",\n","    description=\"이미지를 업로드하면 이미지에 대한 설명을 생성하는 챗봇입니다. 테스트로 질문도 가능합니다.\",\n",")\n","demo.launch()"],"metadata":{"id":"t_29rw9Y9I0F"},"execution_count":null,"outputs":[]}]}