{
 "cells": [
  {
   "cell_type": "code",
   "id": "3139d6f7d0c700c6",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:25:36.525083953Z",
     "start_time": "2026-05-29T09:25:34.908128029Z"
    }
   },
   "source": [
    "from langchain_ollama import ChatOllama\n",
    "from langchain_ibm import ChatWatsonx\n",
    "from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder\n",
    "from langchain_core.output_parsers import StrOutputParser, JsonOutputParser, PydanticOutputParser\n",
    "from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda\n",
    "from langchain_core.messages import HumanMessage, SystemMessage, AIMessage\n",
    "from langchain_core.chat_history import InMemoryChatMessageHistory, BaseChatMessageHistory\n",
    "from langchain_core.runnables.history import RunnableWithMessageHistory\n",
    "from pydantic import BaseModel, Field\n",
    "from typing import Literal\n",
    "from dotenv import load_dotenv\n",
    "import os"
   ],
   "outputs": [],
   "execution_count": 1
  },
  {
   "cell_type": "code",
   "id": "7c9d9217136d718f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:25:36.553395885Z",
     "start_time": "2026-05-29T09:25:36.527713063Z"
    }
   },
   "source": [
    "# .env 내용 가져오기\n",
    "load_dotenv()\n",
    "\n",
    "apikey = os.getenv(\"WATSONX_API_KEY\")\n",
    "project_id = os.getenv(\"WATSONX_PROJECT_ID\")\n",
    "watsonx_ai_url = os.getenv(\"WATSONX_URL\")\n",
    "hf_token = os.getenv(\"HF_TOKEN\")"
   ],
   "outputs": [],
   "execution_count": 2
  },
  {
   "cell_type": "code",
   "id": "3052e78e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:25:38.632525447Z",
     "start_time": "2026-05-29T09:25:36.563121849Z"
    }
   },
   "source": [
    "#!pip install openai   \n",
    "\n",
    "!pip install langchain_openai"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: langchain_openai in ./.venv/lib/python3.12/site-packages (1.2.2)\r\n",
      "Requirement already satisfied: langchain-core<2.0.0,>=1.4.0 in ./.venv/lib/python3.12/site-packages (from langchain_openai) (1.4.0)\r\n",
      "Requirement already satisfied: openai<3.0.0,>=2.26.0 in ./.venv/lib/python3.12/site-packages (from langchain_openai) (2.38.0)\r\n",
      "Requirement already satisfied: tiktoken<1.0.0,>=0.7.0 in ./.venv/lib/python3.12/site-packages (from langchain_openai) (0.13.0)\r\n",
      "Requirement already satisfied: jsonpatch<2.0.0,>=1.33.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.4.0->langchain_openai) (1.33)\r\n",
      "Requirement already satisfied: langchain-protocol>=0.0.14 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.4.0->langchain_openai) (0.0.15)\r\n",
      "Requirement already satisfied: langsmith<1.0.0,>=0.3.45 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.4.0->langchain_openai) (0.8.5)\r\n",
      "Requirement already satisfied: packaging>=23.2.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.4.0->langchain_openai) (26.2)\r\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.4.0->langchain_openai) (2.13.4)\r\n",
      "Requirement already satisfied: pyyaml<7.0.0,>=5.3.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.4.0->langchain_openai) (6.0.3)\r\n",
      "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.4.0->langchain_openai) (9.1.4)\r\n",
      "Requirement already satisfied: typing-extensions<5.0.0,>=4.7.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.4.0->langchain_openai) (4.15.0)\r\n",
      "Requirement already satisfied: uuid-utils<1.0,>=0.12.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.4.0->langchain_openai) (0.16.0)\r\n",
      "Requirement already satisfied: anyio<5,>=3.5.0 in ./.venv/lib/python3.12/site-packages (from openai<3.0.0,>=2.26.0->langchain_openai) (4.13.0)\r\n",
      "Requirement already satisfied: distro<2,>=1.7.0 in ./.venv/lib/python3.12/site-packages (from openai<3.0.0,>=2.26.0->langchain_openai) (1.9.0)\r\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in ./.venv/lib/python3.12/site-packages (from openai<3.0.0,>=2.26.0->langchain_openai) (0.28.1)\r\n",
      "Requirement already satisfied: jiter<1,>=0.10.0 in ./.venv/lib/python3.12/site-packages (from openai<3.0.0,>=2.26.0->langchain_openai) (0.15.0)\r\n",
      "Requirement already satisfied: sniffio in ./.venv/lib/python3.12/site-packages (from openai<3.0.0,>=2.26.0->langchain_openai) (1.3.1)\r\n",
      "Requirement already satisfied: tqdm>4 in ./.venv/lib/python3.12/site-packages (from openai<3.0.0,>=2.26.0->langchain_openai) (4.67.3)\r\n",
      "Requirement already satisfied: regex in ./.venv/lib/python3.12/site-packages (from tiktoken<1.0.0,>=0.7.0->langchain_openai) (2026.5.9)\r\n",
      "Requirement already satisfied: requests in ./.venv/lib/python3.12/site-packages (from tiktoken<1.0.0,>=0.7.0->langchain_openai) (2.34.2)\r\n",
      "Requirement already satisfied: idna>=2.8 in ./.venv/lib/python3.12/site-packages (from anyio<5,>=3.5.0->openai<3.0.0,>=2.26.0->langchain_openai) (3.16)\r\n",
      "Requirement already satisfied: certifi in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->openai<3.0.0,>=2.26.0->langchain_openai) (2026.5.20)\r\n",
      "Requirement already satisfied: httpcore==1.* in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->openai<3.0.0,>=2.26.0->langchain_openai) (1.0.9)\r\n",
      "Requirement already satisfied: h11>=0.16 in ./.venv/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai<3.0.0,>=2.26.0->langchain_openai) (0.16.0)\r\n",
      "Requirement already satisfied: jsonpointer>=1.9 in ./.venv/lib/python3.12/site-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core<2.0.0,>=1.4.0->langchain_openai) (3.1.1)\r\n",
      "Requirement already satisfied: orjson>=3.9.14 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.4.0->langchain_openai) (3.11.9)\r\n",
      "Requirement already satisfied: requests-toolbelt>=1.0.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.4.0->langchain_openai) (1.0.0)\r\n",
      "Requirement already satisfied: xxhash>=3.0.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.4.0->langchain_openai) (3.7.0)\r\n",
      "Requirement already satisfied: zstandard>=0.23.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.4.0->langchain_openai) (0.25.0)\r\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.4.0->langchain_openai) (0.7.0)\r\n",
      "Requirement already satisfied: pydantic-core==2.46.4 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.4.0->langchain_openai) (2.46.4)\r\n",
      "Requirement already satisfied: typing-inspection>=0.4.2 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.4.0->langchain_openai) (0.4.2)\r\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in ./.venv/lib/python3.12/site-packages (from requests->tiktoken<1.0.0,>=0.7.0->langchain_openai) (3.4.7)\r\n",
      "Requirement already satisfied: urllib3<3,>=1.26 in ./.venv/lib/python3.12/site-packages (from requests->tiktoken<1.0.0,>=0.7.0->langchain_openai) (2.7.0)\r\n"
     ]
    }
   ],
   "execution_count": 3
  },
  {
   "cell_type": "code",
   "id": "bb2fbfe3d75358b0",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:25:47.821970428Z",
     "start_time": "2026-05-29T09:25:38.704672821Z"
    }
   },
   "source": [
    "import os\n",
    "from openai import OpenAI\n",
    "\n",
    "client = OpenAI(\n",
    "    base_url=\"https://router.huggingface.co/v1\",\n",
    "    api_key=hf_token,\n",
    ")\n",
    "\n",
    "completion = client.chat.completions.create(\n",
    "    model=\"Qwen/Qwen2.5-7B-Instruct:together\",\n",
    "    messages=[\n",
    "        {\n",
    "            \"role\": \"user\",\n",
    "            \"content\": \"파이썬 리스트 설명해줘\"\n",
    "        }\n",
    "    ],\n",
    ")\n",
    "\n",
    "print(completion.choices[0].message)"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ChatCompletionMessage(content='파이썬 리스트는 동적으로 크기 조절이 가능한 배열 형태의 데이터 구조로, 다양한 자료형의 요소들을 포함할 수 있습니다. 리스트는 중괄호 `{}` 대신 대괄호 `[]`로 표현하며, 요소들 사이에는 콤마 `,`로 구분됩니다.\\n\\n리스트의 주요 특징과 기능은 다음과 같습니다:\\n\\n1. **다양한 자료형**: 리스트의 요소는 숫자, 문자열, 논리값, 다른 리스트 등 다양한 자료형을 포함할 수 있습니다.\\n\\n2. **순서가 있는 집합**: 리스트는 순서가 있는 요소들의 집합으로, 각 요소는 인덱스로 접근할 수 있습니다. 인덱스는 0부터 시작합니다.\\n\\n3. **변경 가능**: 리스트는 변경 가능한 데이터 구조로, 요소를 추가하거나 삭제하거나 값을 변경할 수 있습니다.\\n\\n4. **복사 가능**: 리스트는 복사할 수 있으며, 원본과 복사본은 독립적으로 변경될 수 있습니다.\\n\\n5. **슬라이싱**: 리스트의 부분 집합을 쉽게 추출할 수 있습니다.\\n\\n6. **기본 메서드**: 리스트에는 다양한 내장 메서드가 있으며, 이들을 사용하여 리스트를 쉽게 조작할 수 있습니다.\\n\\n### 예제 코드\\n\\n```python\\n# 리스트 생성\\nmy_list = [1, 2, 3, 4, 5]\\nprint(my_list)  # 출력: [1, 2, 3, 4, 5]\\n\\n# 요소 접근\\nprint(my_list[0])  # 출력: 1\\nprint(my_list[2])  # 출력: 3\\n\\n# 요소 변경\\nmy_list[0] = 10\\nprint(my_list)  # 출력: [10, 2, 3, 4, 5]\\n\\n# 요소 추가\\nmy_list.append(6)\\nprint(my_list)  # 출력: [10, 2, 3, 4, 5, 6]\\n\\n# 요소 삭제\\nmy_list.remove(2)\\nprint(my_list)  # 출력: [10, 3, 4, 5, 6]\\n\\n# 리스트 슬라이싱\\nprint(my_list[1:4])  # 출력: [3, 4, 5]\\n```\\n\\n이렇게 파이썬 리스트는 매우 유연하고 강력한 데이터 구조로, 다양한 프로그래밍 작업에 활용될 수 있습니다.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[])\n"
     ]
    }
   ],
   "execution_count": 4
  },
  {
   "cell_type": "markdown",
   "id": "8f8caa8b",
   "metadata": {},
   "source": [
    "#### LangChain 에서 모델 생성하기"
   ]
  },
  {
   "cell_type": "code",
   "id": "20ff68cfaf353938",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:25:51.501131804Z",
     "start_time": "2026-05-29T09:25:47.855590737Z"
    }
   },
   "source": [
    "from langchain_openai import ChatOpenAI\n",
    "\n",
    "# 허깅페이스 모델 이용하기\n",
    "hugging_llm = ChatOpenAI(\n",
    "    model=\"Qwen/Qwen2.5-7B-Instruct:together\",\n",
    "    api_key=hf_token,\n",
    "    base_url=\"https://router.huggingface.co/v1\"\n",
    ")\n",
    "\n",
    "response = hugging_llm.invoke(\"생성형 AI 설명해줘\")\n",
    "print(response.content)"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "생성형 AI는 인공지능의 한 분야로, 주어진 입력을 기반으로 새로운 데이터를 생성하는 능력을 가지고 있습니다. 이는 텍스트, 이미지, 음성, 비디오 등 다양한 형태의 데이터를 생성할 수 있습니다. 생성형 AI는 주로 딥러닝 기법, 특히Generative Adversarial Networks (GANs)와 Variational Autoencoders (VAEs)와 같은 모델을 사용합니다.\n",
      "\n",
      "생성형 AI의 주요 특징은 다음과 같습니다:\n",
      "\n",
      "1. **데이터 생성**: 새로운 데이터를 생성할 수 있습니다. 예를 들어, 텍스트 생성, 이미지 생성, 음성 합성 등이 있습니다.\n",
      "\n",
      "2. **학습**: 대규모 데이터셋을 통해 학습하여 데이터의 패턴을 이해하고 이를 기반으로 새로운 데이터를 생성합니다.\n",
      "\n",
      "3. **응용 분야**: 예술, 디자인, 게임, 영화 제작, 의료, 뉴스 기사 생성 등 다양한 분야에서 활용됩니다.\n",
      "\n",
      "4. **비판적 학습**: GANs와 같은 생성형 모델은 두 개의 네트워크를 사용하여 서로 경쟁하며, 이는 모델의 성능을 향상시키는 데 도움이 됩니다.\n",
      "\n",
      "5. **개인화**: 사용자의 선호도나 요구사항을 고려하여 개인화된 콘텐츠를 생성할 수 있습니다.\n",
      "\n",
      "생성형 AI는 기존의 데이터를 기반으로 새로운 데이터를 생성하는 능력 때문에 매우 유용하며, 미래의 다양한 분야에서 중요한 역할을 할 것으로 예상됩니다.\n"
     ]
    }
   ],
   "execution_count": 5
  },
  {
   "cell_type": "code",
   "id": "1af25a40",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:25:53.782170142Z",
     "start_time": "2026-05-29T09:25:51.533571646Z"
    }
   },
   "source": [
    "# 유료 LLM\n",
    "watson_llm = ChatWatsonx(\n",
    "    model_id=\"ibm/granite-4-h-small\",\n",
    "    url = f\"{watsonx_ai_url}\",\n",
    "    api_key = f\"{apikey}\",\n",
    "    project_id=f\"{project_id}\",\n",
    "    max_tokens = 2000\n",
    ")\n",
    "\n",
    "# 로컬 LLM\n",
    "qwen_llm = ChatOllama(model=\"qwen3.5:4b\")\n",
    "exaone_llm = ChatOllama(model=\"exaone3.5:2.4b\")\n",
    "gemma_llm = ChatOllama(model=\"gemma4:e2b\")"
   ],
   "outputs": [],
   "execution_count": 6
  },
  {
   "cell_type": "markdown",
   "id": "72b45a9c",
   "metadata": {},
   "source": [
    "### RAG(Retrieval Augmented Generation) : 검색 증강 생성\n",
    "- 생성형 AI 가 외부 문서를 검색한 뒤 그 내용을 기반으로 답변\n",
    "- 환각증상 억제\n",
    "- 질문 -> 관련 문서 검색 -> 검색 결과를 LLM에게 전달 -> 답변 생성"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "50ca8b6f",
   "metadata": {},
   "source": [
    "### Document Loader\n",
    "- 다양한 형식의 파일 / URL 을 LangChain Document 객체로 변환\n",
    "- Document = page_content + metadata\n",
    "- Loader\n",
    "    - PyPDFLoader \n",
    "    - JSONLoader\n",
    "    - TextLoader\n",
    "    - WebBaseLoader"
   ]
  },
  {
   "cell_type": "code",
   "id": "aa1094ce3c642744",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:25:56.833847077Z",
     "start_time": "2026-05-29T09:25:53.808394231Z"
    }
   },
   "source": [
    "!pip install pypdf beautifulsoup4 youtube-transcript-api langchain-chroma faiss-cpu pdfplumber"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: pypdf in ./.venv/lib/python3.12/site-packages (6.12.2)\r\n",
      "Requirement already satisfied: beautifulsoup4 in ./.venv/lib/python3.12/site-packages (4.14.3)\r\n",
      "Requirement already satisfied: youtube-transcript-api in ./.venv/lib/python3.12/site-packages (1.2.4)\r\n",
      "Requirement already satisfied: langchain-chroma in ./.venv/lib/python3.12/site-packages (1.1.0)\r\n",
      "Requirement already satisfied: faiss-cpu in ./.venv/lib/python3.12/site-packages (1.14.2)\r\n",
      "Requirement already satisfied: pdfplumber in ./.venv/lib/python3.12/site-packages (0.11.9)\r\n",
      "Requirement already satisfied: soupsieve>=1.6.1 in ./.venv/lib/python3.12/site-packages (from beautifulsoup4) (2.8.4)\r\n",
      "Requirement already satisfied: typing-extensions>=4.0.0 in ./.venv/lib/python3.12/site-packages (from beautifulsoup4) (4.15.0)\r\n",
      "Requirement already satisfied: defusedxml<0.8.0,>=0.7.1 in ./.venv/lib/python3.12/site-packages (from youtube-transcript-api) (0.7.1)\r\n",
      "Requirement already satisfied: requests in ./.venv/lib/python3.12/site-packages (from youtube-transcript-api) (2.34.2)\r\n",
      "Requirement already satisfied: chromadb<2.0.0,>=1.3.5 in ./.venv/lib/python3.12/site-packages (from langchain-chroma) (1.5.9)\r\n",
      "Requirement already satisfied: langchain-core<2.0.0,>=1.1.3 in ./.venv/lib/python3.12/site-packages (from langchain-chroma) (1.4.0)\r\n",
      "Requirement already satisfied: numpy>=1.26.0 in ./.venv/lib/python3.12/site-packages (from langchain-chroma) (2.4.6)\r\n",
      "Requirement already satisfied: packaging in ./.venv/lib/python3.12/site-packages (from faiss-cpu) (26.2)\r\n",
      "Requirement already satisfied: pdfminer.six==20251230 in ./.venv/lib/python3.12/site-packages (from pdfplumber) (20251230)\r\n",
      "Requirement already satisfied: Pillow>=9.1 in ./.venv/lib/python3.12/site-packages (from pdfplumber) (12.2.0)\r\n",
      "Requirement already satisfied: pypdfium2>=4.18.0 in ./.venv/lib/python3.12/site-packages (from pdfplumber) (5.8.0)\r\n",
      "Requirement already satisfied: charset-normalizer>=2.0.0 in ./.venv/lib/python3.12/site-packages (from pdfminer.six==20251230->pdfplumber) (3.4.7)\r\n",
      "Requirement already satisfied: cryptography>=36.0.0 in ./.venv/lib/python3.12/site-packages (from pdfminer.six==20251230->pdfplumber) (48.0.0)\r\n",
      "Requirement already satisfied: build>=1.0.3 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.5.0)\r\n",
      "Requirement already satisfied: pydantic>=2.0 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (2.13.4)\r\n",
      "Requirement already satisfied: pydantic-settings>=2.0 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (2.14.1)\r\n",
      "Requirement already satisfied: pybase64>=1.4.1 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.4.3)\r\n",
      "Requirement already satisfied: uvicorn>=0.18.3 in ./.venv/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.48.0)\r\n",
      "Requirement already satisfied: onnxruntime>=1.14.1 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.26.0)\r\n",
      "Requirement already satisfied: opentelemetry-api>=1.2.0 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.42.1)\r\n",
      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.42.1)\r\n",
      "Requirement already satisfied: opentelemetry-sdk>=1.2.0 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.42.1)\r\n",
      "Requirement already satisfied: tokenizers>=0.13.2 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.23.1)\r\n",
      "Requirement already satisfied: pypika>=0.48.9 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.51.1)\r\n",
      "Requirement already satisfied: tqdm>=4.65.0 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (4.67.3)\r\n",
      "Requirement already satisfied: overrides>=7.3.1 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (7.7.0)\r\n",
      "Requirement already satisfied: importlib-resources in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (7.1.0)\r\n",
      "Requirement already satisfied: grpcio>=1.58.0 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.80.0)\r\n",
      "Requirement already satisfied: bcrypt>=4.0.1 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (5.0.0)\r\n",
      "Requirement already satisfied: typer>=0.9.0 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.25.1)\r\n",
      "Requirement already satisfied: kubernetes>=28.1.0 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (36.0.1)\r\n",
      "Requirement already satisfied: tenacity>=8.2.3 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (9.1.4)\r\n",
      "Requirement already satisfied: pyyaml>=6.0.0 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (6.0.3)\r\n",
      "Requirement already satisfied: mmh3>=4.0.1 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (5.2.1)\r\n",
      "Requirement already satisfied: orjson>=3.9.12 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (3.11.9)\r\n",
      "Requirement already satisfied: httpx>=0.27.0 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.28.1)\r\n",
      "Requirement already satisfied: rich>=10.11.0 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (15.0.0)\r\n",
      "Requirement already satisfied: jsonschema>=4.19.0 in ./.venv/lib/python3.12/site-packages (from chromadb<2.0.0,>=1.3.5->langchain-chroma) (4.26.0)\r\n",
      "Requirement already satisfied: jsonpatch<2.0.0,>=1.33.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.1.3->langchain-chroma) (1.33)\r\n",
      "Requirement already satisfied: langchain-protocol>=0.0.14 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.1.3->langchain-chroma) (0.0.15)\r\n",
      "Requirement already satisfied: langsmith<1.0.0,>=0.3.45 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.1.3->langchain-chroma) (0.8.5)\r\n",
      "Requirement already satisfied: uuid-utils<1.0,>=0.12.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.1.3->langchain-chroma) (0.16.0)\r\n",
      "Requirement already satisfied: idna<4,>=2.5 in ./.venv/lib/python3.12/site-packages (from requests->youtube-transcript-api) (3.16)\r\n",
      "Requirement already satisfied: urllib3<3,>=1.26 in ./.venv/lib/python3.12/site-packages (from requests->youtube-transcript-api) (2.7.0)\r\n",
      "Requirement already satisfied: certifi>=2023.5.7 in ./.venv/lib/python3.12/site-packages (from requests->youtube-transcript-api) (2026.5.20)\r\n",
      "Requirement already satisfied: pyproject_hooks in ./.venv/lib/python3.12/site-packages (from build>=1.0.3->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.2.0)\r\n",
      "Requirement already satisfied: cffi>=2.0.0 in ./.venv/lib/python3.12/site-packages (from cryptography>=36.0.0->pdfminer.six==20251230->pdfplumber) (2.0.0)\r\n",
      "Requirement already satisfied: anyio in ./.venv/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (4.13.0)\r\n",
      "Requirement already satisfied: httpcore==1.* in ./.venv/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.0.9)\r\n",
      "Requirement already satisfied: h11>=0.16 in ./.venv/lib/python3.12/site-packages (from httpcore==1.*->httpx>=0.27.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.16.0)\r\n",
      "Requirement already satisfied: jsonpointer>=1.9 in ./.venv/lib/python3.12/site-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core<2.0.0,>=1.1.3->langchain-chroma) (3.1.1)\r\n",
      "Requirement already satisfied: attrs>=22.2.0 in ./.venv/lib/python3.12/site-packages (from jsonschema>=4.19.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (26.1.0)\r\n",
      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in ./.venv/lib/python3.12/site-packages (from jsonschema>=4.19.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (2025.9.1)\r\n",
      "Requirement already satisfied: referencing>=0.28.4 in ./.venv/lib/python3.12/site-packages (from jsonschema>=4.19.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.37.0)\r\n",
      "Requirement already satisfied: rpds-py>=0.25.0 in ./.venv/lib/python3.12/site-packages (from jsonschema>=4.19.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (2026.5.1)\r\n",
      "Requirement already satisfied: six>=1.9.0 in ./.venv/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.17.0)\r\n",
      "Requirement already satisfied: python-dateutil>=2.5.3 in ./.venv/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (2.9.0.post0)\r\n",
      "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in ./.venv/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.9.0)\r\n",
      "Requirement already satisfied: requests-oauthlib in ./.venv/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (2.0.0)\r\n",
      "Requirement already satisfied: durationpy>=0.7 in ./.venv/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.10)\r\n",
      "Requirement already satisfied: aiohttp<4.0.0,>=3.13.5 in ./.venv/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (3.13.5)\r\n",
      "Requirement already satisfied: requests-toolbelt>=1.0.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.1.3->langchain-chroma) (1.0.0)\r\n",
      "Requirement already satisfied: xxhash>=3.0.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.1.3->langchain-chroma) (3.7.0)\r\n",
      "Requirement already satisfied: zstandard>=0.23.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.1.3->langchain-chroma) (0.25.0)\r\n",
      "Requirement already satisfied: flatbuffers in ./.venv/lib/python3.12/site-packages (from onnxruntime>=1.14.1->chromadb<2.0.0,>=1.3.5->langchain-chroma) (25.12.19)\r\n",
      "Requirement already satisfied: protobuf in ./.venv/lib/python3.12/site-packages (from onnxruntime>=1.14.1->chromadb<2.0.0,>=1.3.5->langchain-chroma) (6.33.6)\r\n",
      "Requirement already satisfied: googleapis-common-protos~=1.57 in ./.venv/lib/python3.12/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.75.0)\r\n",
      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.42.1 in ./.venv/lib/python3.12/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.42.1)\r\n",
      "Requirement already satisfied: opentelemetry-proto==1.42.1 in ./.venv/lib/python3.12/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.42.1)\r\n",
      "Requirement already satisfied: opentelemetry-semantic-conventions==0.63b1 in ./.venv/lib/python3.12/site-packages (from opentelemetry-sdk>=1.2.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.63b1)\r\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in ./.venv/lib/python3.12/site-packages (from pydantic>=2.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.7.0)\r\n",
      "Requirement already satisfied: pydantic-core==2.46.4 in ./.venv/lib/python3.12/site-packages (from pydantic>=2.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (2.46.4)\r\n",
      "Requirement already satisfied: typing-inspection>=0.4.2 in ./.venv/lib/python3.12/site-packages (from pydantic>=2.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.4.2)\r\n",
      "Requirement already satisfied: python-dotenv>=0.21.0 in ./.venv/lib/python3.12/site-packages (from pydantic-settings>=2.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.2.2)\r\n",
      "Requirement already satisfied: markdown-it-py>=2.2.0 in ./.venv/lib/python3.12/site-packages (from rich>=10.11.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (4.2.0)\r\n",
      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in ./.venv/lib/python3.12/site-packages (from rich>=10.11.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (2.20.0)\r\n",
      "Requirement already satisfied: huggingface-hub<2.0,>=0.16.4 in ./.venv/lib/python3.12/site-packages (from tokenizers>=0.13.2->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.16.4)\r\n",
      "Requirement already satisfied: click>=8.2.1 in ./.venv/lib/python3.12/site-packages (from typer>=0.9.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (8.4.1)\r\n",
      "Requirement already satisfied: shellingham>=1.3.0 in ./.venv/lib/python3.12/site-packages (from typer>=0.9.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.5.4)\r\n",
      "Requirement already satisfied: annotated-doc>=0.0.2 in ./.venv/lib/python3.12/site-packages (from typer>=0.9.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.0.4)\r\n",
      "Requirement already satisfied: httptools>=0.6.3 in ./.venv/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.8.0)\r\n",
      "Requirement already satisfied: uvloop>=0.15.1 in ./.venv/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.22.1)\r\n",
      "Requirement already satisfied: watchfiles>=0.20 in ./.venv/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.2.0)\r\n",
      "Requirement already satisfied: websockets>=10.4 in ./.venv/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb<2.0.0,>=1.3.5->langchain-chroma) (16.0)\r\n",
      "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.13.5->kubernetes>=28.1.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (2.6.2)\r\n",
      "Requirement already satisfied: aiosignal>=1.4.0 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.13.5->kubernetes>=28.1.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.4.0)\r\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.13.5->kubernetes>=28.1.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.8.0)\r\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.13.5->kubernetes>=28.1.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (6.7.1)\r\n",
      "Requirement already satisfied: propcache>=0.2.0 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.13.5->kubernetes>=28.1.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.5.2)\r\n",
      "Requirement already satisfied: yarl<2.0,>=1.17.0 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.13.5->kubernetes>=28.1.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.24.2)\r\n",
      "Requirement already satisfied: pycparser in ./.venv/lib/python3.12/site-packages (from cffi>=2.0.0->cryptography>=36.0.0->pdfminer.six==20251230->pdfplumber) (3.0)\r\n",
      "Requirement already satisfied: filelock>=3.10.0 in ./.venv/lib/python3.12/site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb<2.0.0,>=1.3.5->langchain-chroma) (3.29.0)\r\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in ./.venv/lib/python3.12/site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb<2.0.0,>=1.3.5->langchain-chroma) (2026.4.0)\r\n",
      "Requirement already satisfied: hf-xet<2.0.0,>=1.4.3 in ./.venv/lib/python3.12/site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb<2.0.0,>=1.3.5->langchain-chroma) (1.5.0)\r\n",
      "Requirement already satisfied: mdurl~=0.1 in ./.venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (0.1.2)\r\n",
      "Requirement already satisfied: oauthlib>=3.0.0 in ./.venv/lib/python3.12/site-packages (from requests-oauthlib->kubernetes>=28.1.0->chromadb<2.0.0,>=1.3.5->langchain-chroma) (3.3.1)\r\n"
     ]
    }
   ],
   "execution_count": 7
  },
  {
   "cell_type": "code",
   "id": "5e9df6fa3cc215a0",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:25:58.738597561Z",
     "start_time": "2026-05-29T09:25:56.867436104Z"
    }
   },
   "source": [
    "!pip install langchain-text-splitters"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: langchain-text-splitters in ./.venv/lib/python3.12/site-packages (1.1.2)\r\n",
      "Requirement already satisfied: langchain-core<2.0.0,>=1.2.31 in ./.venv/lib/python3.12/site-packages (from langchain-text-splitters) (1.4.0)\r\n",
      "Requirement already satisfied: jsonpatch<2.0.0,>=1.33.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (1.33)\r\n",
      "Requirement already satisfied: langchain-protocol>=0.0.14 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.0.15)\r\n",
      "Requirement already satisfied: langsmith<1.0.0,>=0.3.45 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.8.5)\r\n",
      "Requirement already satisfied: packaging>=23.2.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (26.2)\r\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (2.13.4)\r\n",
      "Requirement already satisfied: pyyaml<7.0.0,>=5.3.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (6.0.3)\r\n",
      "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (9.1.4)\r\n",
      "Requirement already satisfied: typing-extensions<5.0.0,>=4.7.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (4.15.0)\r\n",
      "Requirement already satisfied: uuid-utils<1.0,>=0.12.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.16.0)\r\n",
      "Requirement already satisfied: jsonpointer>=1.9 in ./.venv/lib/python3.12/site-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (3.1.1)\r\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.28.1)\r\n",
      "Requirement already satisfied: orjson>=3.9.14 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (3.11.9)\r\n",
      "Requirement already satisfied: requests-toolbelt>=1.0.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (1.0.0)\r\n",
      "Requirement already satisfied: requests>=2.0.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (2.34.2)\r\n",
      "Requirement already satisfied: xxhash>=3.0.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (3.7.0)\r\n",
      "Requirement already satisfied: zstandard>=0.23.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.25.0)\r\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.7.0)\r\n",
      "Requirement already satisfied: pydantic-core==2.46.4 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (2.46.4)\r\n",
      "Requirement already satisfied: typing-inspection>=0.4.2 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.4.2)\r\n",
      "Requirement already satisfied: anyio in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (4.13.0)\r\n",
      "Requirement already satisfied: certifi in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (2026.5.20)\r\n",
      "Requirement already satisfied: httpcore==1.* in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (1.0.9)\r\n",
      "Requirement already satisfied: idna in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (3.16)\r\n",
      "Requirement already satisfied: h11>=0.16 in ./.venv/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.16.0)\r\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in ./.venv/lib/python3.12/site-packages (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (3.4.7)\r\n",
      "Requirement already satisfied: urllib3<3,>=1.26 in ./.venv/lib/python3.12/site-packages (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (2.7.0)\r\n"
     ]
    }
   ],
   "execution_count": 8
  },
  {
   "cell_type": "code",
   "id": "c93b0857",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:00.870357617Z",
     "start_time": "2026-05-29T09:25:58.763246880Z"
    }
   },
   "source": [
    "!pip install langchain_community"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: langchain_community in ./.venv/lib/python3.12/site-packages (0.4.2)\r\n",
      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in ./.venv/lib/python3.12/site-packages (from langchain_community) (3.13.5)\r\n",
      "Requirement already satisfied: httpx-sse<1.0.0,>=0.4.0 in ./.venv/lib/python3.12/site-packages (from langchain_community) (0.4.3)\r\n",
      "Requirement already satisfied: langchain-classic<2.0.0,>=1.0.7 in ./.venv/lib/python3.12/site-packages (from langchain_community) (1.0.7)\r\n",
      "Requirement already satisfied: langchain-core<2.0.0,>=1.4.0 in ./.venv/lib/python3.12/site-packages (from langchain_community) (1.4.0)\r\n",
      "Requirement already satisfied: langsmith<1.0.0,>=0.1.125 in ./.venv/lib/python3.12/site-packages (from langchain_community) (0.8.5)\r\n",
      "Requirement already satisfied: numpy>=1.26.2 in ./.venv/lib/python3.12/site-packages (from langchain_community) (2.4.6)\r\n",
      "Requirement already satisfied: pydantic-settings<3.0.0,>=2.10.1 in ./.venv/lib/python3.12/site-packages (from langchain_community) (2.14.1)\r\n",
      "Requirement already satisfied: pyyaml<7.0.0,>=5.3.0 in ./.venv/lib/python3.12/site-packages (from langchain_community) (6.0.3)\r\n",
      "Requirement already satisfied: requests<3.0.0,>=2.32.5 in ./.venv/lib/python3.12/site-packages (from langchain_community) (2.34.2)\r\n",
      "Requirement already satisfied: sqlalchemy<3.0.0,>=1.4.0 in ./.venv/lib/python3.12/site-packages (from langchain_community) (2.0.50)\r\n",
      "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in ./.venv/lib/python3.12/site-packages (from langchain_community) (9.1.4)\r\n",
      "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (2.6.2)\r\n",
      "Requirement already satisfied: aiosignal>=1.4.0 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.4.0)\r\n",
      "Requirement already satisfied: attrs>=17.3.0 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (26.1.0)\r\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.8.0)\r\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (6.7.1)\r\n",
      "Requirement already satisfied: propcache>=0.2.0 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (0.5.2)\r\n",
      "Requirement already satisfied: yarl<2.0,>=1.17.0 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.24.2)\r\n",
      "Requirement already satisfied: langchain-text-splitters<2.0.0,>=1.1.2 in ./.venv/lib/python3.12/site-packages (from langchain-classic<2.0.0,>=1.0.7->langchain_community) (1.1.2)\r\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in ./.venv/lib/python3.12/site-packages (from langchain-classic<2.0.0,>=1.0.7->langchain_community) (2.13.4)\r\n",
      "Requirement already satisfied: jsonpatch<2.0.0,>=1.33.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.4.0->langchain_community) (1.33)\r\n",
      "Requirement already satisfied: langchain-protocol>=0.0.14 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.4.0->langchain_community) (0.0.15)\r\n",
      "Requirement already satisfied: packaging>=23.2.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.4.0->langchain_community) (26.2)\r\n",
      "Requirement already satisfied: typing-extensions<5.0.0,>=4.7.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.4.0->langchain_community) (4.15.0)\r\n",
      "Requirement already satisfied: uuid-utils<1.0,>=0.12.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<2.0.0,>=1.4.0->langchain_community) (0.16.0)\r\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.1.125->langchain_community) (0.28.1)\r\n",
      "Requirement already satisfied: orjson>=3.9.14 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.1.125->langchain_community) (3.11.9)\r\n",
      "Requirement already satisfied: requests-toolbelt>=1.0.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.1.125->langchain_community) (1.0.0)\r\n",
      "Requirement already satisfied: xxhash>=3.0.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.1.125->langchain_community) (3.7.0)\r\n",
      "Requirement already satisfied: zstandard>=0.23.0 in ./.venv/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.1.125->langchain_community) (0.25.0)\r\n",
      "Requirement already satisfied: python-dotenv>=0.21.0 in ./.venv/lib/python3.12/site-packages (from pydantic-settings<3.0.0,>=2.10.1->langchain_community) (1.2.2)\r\n",
      "Requirement already satisfied: typing-inspection>=0.4.0 in ./.venv/lib/python3.12/site-packages (from pydantic-settings<3.0.0,>=2.10.1->langchain_community) (0.4.2)\r\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in ./.venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.32.5->langchain_community) (3.4.7)\r\n",
      "Requirement already satisfied: idna<4,>=2.5 in ./.venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.32.5->langchain_community) (3.16)\r\n",
      "Requirement already satisfied: urllib3<3,>=1.26 in ./.venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.32.5->langchain_community) (2.7.0)\r\n",
      "Requirement already satisfied: certifi>=2023.5.7 in ./.venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.32.5->langchain_community) (2026.5.20)\r\n",
      "Requirement already satisfied: greenlet>=1 in ./.venv/lib/python3.12/site-packages (from sqlalchemy<3.0.0,>=1.4.0->langchain_community) (3.5.1)\r\n",
      "Requirement already satisfied: anyio in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.1.125->langchain_community) (4.13.0)\r\n",
      "Requirement already satisfied: httpcore==1.* in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.1.125->langchain_community) (1.0.9)\r\n",
      "Requirement already satisfied: h11>=0.16 in ./.venv/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<1.0.0,>=0.1.125->langchain_community) (0.16.0)\r\n",
      "Requirement already satisfied: jsonpointer>=1.9 in ./.venv/lib/python3.12/site-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core<2.0.0,>=1.4.0->langchain_community) (3.1.1)\r\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-classic<2.0.0,>=1.0.7->langchain_community) (0.7.0)\r\n",
      "Requirement already satisfied: pydantic-core==2.46.4 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-classic<2.0.0,>=1.0.7->langchain_community) (2.46.4)\r\n"
     ]
    }
   ],
   "execution_count": 9
  },
  {
   "cell_type": "code",
   "id": "b54aecd7aedf62de",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:01.502963670Z",
     "start_time": "2026-05-29T09:26:00.910905992Z"
    }
   },
   "source": [
    "from langchain_community.document_loaders import PyPDFLoader, CSVLoader, WebBaseLoader, DirectoryLoader\n",
    "from youtube_transcript_api import YouTubeTranscriptApi\n",
    "from langchain_core.documents import Document\n",
    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
    "from langchain_ollama import OllamaEmbeddings\n",
    "from langchain_ibm import WatsonxEmbeddings\n",
    "from langchain_chroma import Chroma\n",
    "from langchain_community.vectorstores import FAISS"
   ],
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_116700/452196621.py:1: DeprecationWarning: `langchain-community` is being sunset and is no longer actively maintained. See https://github.com/langchain-ai/langchain-community/issues/674 for details and migration guidance toward standalone integration packages.\n",
      "  from langchain_community.document_loaders import PyPDFLoader, CSVLoader, WebBaseLoader, DirectoryLoader\n",
      "USER_AGENT environment variable not set, consider setting it to identify your requests.\n"
     ]
    }
   ],
   "execution_count": 10
  },
  {
   "cell_type": "code",
   "id": "16f3e5bacf79359e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:02.036909011Z",
     "start_time": "2026-05-29T09:26:01.523194883Z"
    }
   },
   "source": [
    "loader = PyPDFLoader(\"./data/서울대 미대.pdf\")\n",
    "docs = loader.load()\n",
    "\n",
    "print(f\"총 페이지 수 : {len(docs)}\")\n",
    "print(f\"첫 페이지 텍스트 : {docs[0].page_content[:200]}\")\n",
    "print(f\"메타 데이터 : {docs[0].metadata}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "총 페이지 수 : 23\n",
      "첫 페이지 텍스트 : 학생생활안내 학부과정 \n",
      "Department of Design, College of Fine Arts\n",
      "Seoul National University\n",
      "서울대학교 미술대학 \n",
      "디자인전공\n",
      "2024\n",
      "메타 데이터 : {'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 19.2 (Windows)', 'creationdate': '2024-02-26T13:44:15+09:00', 'moddate': '2024-02-26T13:44:25+09:00', 'trapped': '/False', 'source': './data/서울대 미대.pdf', 'total_pages': 23, 'page': 0, 'page_label': '1'}\n"
     ]
    }
   ],
   "execution_count": 11
  },
  {
   "cell_type": "code",
   "id": "b8957d0379984bc",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:02.101806853Z",
     "start_time": "2026-05-29T09:26:02.056251417Z"
    }
   },
   "source": [
    "csv_loader = CSVLoader(\"./data/restaurant_reviews.csv\",encoding=\"utf-8\",csv_args={'delimiter':','})\n",
    "csv_docs = csv_loader.load()\n",
    "\n",
    "print(f\"총 페이지 수 : {len(csv_docs)}\")\n",
    "print(f\"첫 페이지 텍스트 : {csv_docs[0].page_content[:200]}\")\n",
    "print(f\"메타 데이터 : {csv_docs[0].metadata}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "총 페이지 수 : 30\n",
      "첫 페이지 텍스트 : review: Absolutely loved this place! The pasta was cooked to perfection and the sauce had such a rich, deep flavor. Service was warm and attentive throughout the entire meal.\n",
      "메타 데이터 : {'source': './data/restaurant_reviews.csv', 'row': 0}\n"
     ]
    }
   ],
   "execution_count": 12
  },
  {
   "cell_type": "code",
   "id": "f334bc8a11cb3f4e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:03.626226633Z",
     "start_time": "2026-05-29T09:26:02.104670219Z"
    }
   },
   "source": [
    "# WebBaseLoader() -> BeaufifulSoup4 라이브러리 필요\n",
    "\n",
    "web_loader = WebBaseLoader(web_paths=['https://python.org','https://langchain.com'])\n",
    "web_docs = web_loader.load()\n",
    "\n",
    "print(f\"총 페이지 수 : {len(web_docs)}\")\n",
    "print(f\"첫 페이지 텍스트 : {web_docs[0].page_content[:200]}\")\n",
    "print(f\"메타 데이터 : {web_docs[0].metadata}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "총 페이지 수 : 2\n",
      "첫 페이지 텍스트 : \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Welcome to Python.org\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Notice: This page displays a fallback because interactive scripts did not run. Possible causes include disabled JavaScript or fai\n",
      "메타 데이터 : {'source': 'https://python.org', 'title': 'Welcome to Python.org', 'description': 'The official home of the Python Programming Language', 'language': 'en'}\n"
     ]
    }
   ],
   "execution_count": 13
  },
  {
   "cell_type": "code",
   "id": "efa7427b4a19029c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:47.426065120Z",
     "start_time": "2026-05-29T09:26:03.649911645Z"
    }
   },
   "source": [
    "# glob : 지정된 확장자 파일만 선택, \n",
    "dir_loader = DirectoryLoader(path=\"./data\", glob=\"**/*.pdf\",loader_cls=PyPDFLoader,show_progress=True)\n",
    "dir_docs = dir_loader.load()\n",
    "\n",
    "print(f\"총 페이지 수 : {len(dir_docs)}\")\n",
    "print(f\"첫 페이지 텍스트 : {dir_docs[0].page_content[:200]}\")\n",
    "print(f\"메타 데이터 : {dir_docs[0].metadata}\")"
   ],
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 5/5 [00:43<00:00,  8.73s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "총 페이지 수 : 201\n",
      "첫 페이지 텍스트 : 학생생활안내 학부과정 \n",
      "Department of Design, College of Fine Arts\n",
      "Seoul National University\n",
      "서울대학교 미술대학 \n",
      "디자인전공\n",
      "2024\n",
      "메타 데이터 : {'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 19.2 (Windows)', 'creationdate': '2024-02-26T13:44:15+09:00', 'moddate': '2024-02-26T13:44:25+09:00', 'trapped': '/False', 'source': 'data/서울대 미대.pdf', 'total_pages': 23, 'page': 0, 'page_label': '1'}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "execution_count": 14
  },
  {
   "cell_type": "code",
   "id": "ce4cc859d59f99d9",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:48.809725907Z",
     "start_time": "2026-05-29T09:26:47.453442125Z"
    }
   },
   "source": [
    "# LangChain YoutubeLoader() X\n",
    "\n",
    "video_id = \"Pn-W41hC764\"\n",
    "ytt_api = YouTubeTranscriptApi()\n",
    "transcript = ytt_api.fetch(video_id, languages=['en'])\n",
    "\n",
    "for idx, t in enumerate(transcript, 1):\n",
    "    print(f\"{idx}: {t.text}\")\n",
    "    print(f\"{idx}: {t.start}\")\n",
    "    print(f\"{idx}: {t.duration}\")\n",
    "\n",
    "text = \" \".join([t.text for t in transcript])\n",
    "\n",
    "# print(text)\n",
    "\n",
    "docs = Document(page_content=text, metadata = {\n",
    "    \"source\":f\"https://www.youtube.com/watch?v={video_id}\",\n",
    "    \"video_id\":video_id\n",
    "})\n",
    "\n",
    "print(docs.page_content[:500])\n",
    "print(docs.metadata)\n"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1: I alluded in my opening remarks to the\n",
      "1: 0.179\n",
      "1: 6.361\n",
      "2: the jobs issue the economic effects on\n",
      "2: 3.6\n",
      "2: 7.32\n",
      "3: employment uh I think you have\n",
      "3: 6.54\n",
      "3: 7.019\n",
      "4: said uh in fact and I'm going to quote\n",
      "4: 10.92\n",
      "4: 4.799\n",
      "5: development of superhuman machine\n",
      "5: 13.559\n",
      "5: 4.441\n",
      "6: intelligence is probably the greatest\n",
      "6: 15.719\n",
      "6: 4.501\n",
      "7: threat to the continued existence of\n",
      "7: 18.0\n",
      "7: 5.52\n",
      "8: humanity end quote you may have had in\n",
      "8: 20.22\n",
      "8: 7.86\n",
      "9: mind the effect on on jobs which is\n",
      "9: 23.52\n",
      "9: 6.72\n",
      "10: really my biggest nightmare\n",
      "10: 28.08\n",
      "10: 5.96\n",
      "11: in the long term uh let me ask you uh\n",
      "11: 30.24\n",
      "11: 6.659\n",
      "12: what your biggest nightmare is and\n",
      "12: 34.04\n",
      "12: 7.8\n",
      "13: whether you share that concern\n",
      "13: 36.899\n",
      "13: 4.941\n",
      "14: like with all technological revolutions\n",
      "14: 42.0\n",
      "14: 4.98\n",
      "15: I expect there to be significant impact\n",
      "15: 43.98\n",
      "15: 5.16\n",
      "16: on jobs but exactly what that impact\n",
      "16: 46.98\n",
      "16: 3.78\n",
      "17: looks like is very difficult to predict\n",
      "17: 49.14\n",
      "17: 4.02\n",
      "18: if we went back to the the other side of\n",
      "18: 50.76\n",
      "18: 3.84\n",
      "19: a previous technological Revolution\n",
      "19: 53.16\n",
      "19: 3.66\n",
      "20: talking about the jobs that exist on the\n",
      "20: 54.6\n",
      "20: 3.54\n",
      "21: other side\n",
      "21: 56.82\n",
      "21: 3.12\n",
      "22: um you know you can go back and read\n",
      "22: 58.14\n",
      "22: 3.599\n",
      "23: books of this it's what people said at\n",
      "23: 59.94\n",
      "23: 3.72\n",
      "24: the time it's difficult\n",
      "24: 61.739\n",
      "24: 4.74\n",
      "25: I believe that there will be far greater\n",
      "25: 63.66\n",
      "25: 4.62\n",
      "26: jobs on the other side of this and the\n",
      "26: 66.479\n",
      "26: 3.781\n",
      "27: jobs of today will get better I think\n",
      "27: 68.28\n",
      "27: 3.54\n",
      "28: it's important\n",
      "28: 70.26\n",
      "28: 3.0\n",
      "29: first of all I think it's important to\n",
      "29: 71.82\n",
      "29: 3.659\n",
      "30: understand and think about gpd4 as a\n",
      "30: 73.26\n",
      "30: 4.98\n",
      "31: tool not a creature which is easy to get\n",
      "31: 75.479\n",
      "31: 4.381\n",
      "32: confused and it's a tool that people\n",
      "32: 78.24\n",
      "32: 4.5\n",
      "33: have a great deal of control over and\n",
      "33: 79.86\n",
      "33: 6.66\n",
      "34: how they use it and second gpt4 and\n",
      "34: 82.74\n",
      "34: 6.96\n",
      "35: things other systems like it are good at\n",
      "35: 86.52\n",
      "35: 6.0\n",
      "36: doing tasks not jobs and so you see\n",
      "36: 89.7\n",
      "36: 6.12\n",
      "37: already people that are using gpt4 to do\n",
      "37: 92.52\n",
      "37: 6.059\n",
      "38: their job much more efficiently by\n",
      "38: 95.82\n",
      "38: 6.9\n",
      "39: helping them with tasks now gbt4 will I\n",
      "39: 98.579\n",
      "39: 6.18\n",
      "40: think entirely automate away some jobs\n",
      "40: 102.72\n",
      "40: 4.859\n",
      "41: and it will create new ones that we\n",
      "41: 104.759\n",
      "41: 5.161\n",
      "42: believe will be much better this happens\n",
      "42: 107.579\n",
      "42: 4.441\n",
      "43: again my understanding of the history of\n",
      "43: 109.92\n",
      "43: 4.379\n",
      "44: technology is one long technological\n",
      "44: 112.02\n",
      "44: 3.72\n",
      "45: Revolution not a bunch of different ones\n",
      "45: 114.299\n",
      "45: 2.82\n",
      "46: put together but this has been\n",
      "46: 115.74\n",
      "46: 4.019\n",
      "47: continually happening we as our quality\n",
      "47: 117.119\n",
      "47: 5.521\n",
      "48: of life raises and as machines and tools\n",
      "48: 119.759\n",
      "48: 4.381\n",
      "49: that we create can help us live better\n",
      "49: 122.64\n",
      "49: 4.439\n",
      "50: lives uh the bar raises for what we do\n",
      "50: 124.14\n",
      "50: 5.339\n",
      "51: and and our human ability and what we\n",
      "51: 127.079\n",
      "51: 4.501\n",
      "52: spend our time going after uh goes after\n",
      "52: 129.479\n",
      "52: 3.84\n",
      "53: more ambitious more satisfying projects\n",
      "53: 131.58\n",
      "53: 4.62\n",
      "54: so there there will be an impact on jobs\n",
      "54: 133.319\n",
      "54: 5.28\n",
      "55: we try to be very clear about that and I\n",
      "55: 136.2\n",
      "55: 4.8\n",
      "56: think it will require\n",
      "56: 138.599\n",
      "56: 3.961\n",
      "57: partnership between the industry and\n",
      "57: 141.0\n",
      "57: 2.879\n",
      "58: government but mostly action by\n",
      "58: 142.56\n",
      "58: 2.7\n",
      "59: government to figure out how we want to\n",
      "59: 143.879\n",
      "59: 3.601\n",
      "60: mitigate that\n",
      "60: 145.26\n",
      "60: 4.44\n",
      "61: but I'm very optimistic about how great\n",
      "61: 147.48\n",
      "61: 4.02\n",
      "62: the jobs of the future will be thank you\n",
      "62: 149.7\n",
      "62: 4.679\n",
      "63: let me ask Ms Montgomery and Professor\n",
      "63: 151.5\n",
      "63: 4.92\n",
      "64: Marcus for your reactions those\n",
      "64: 154.379\n",
      "64: 4.86\n",
      "65: questions as well Ms Montgomery on the\n",
      "65: 156.42\n",
      "65: 5.94\n",
      "66: jobs Point yeah I mean well it's a\n",
      "66: 159.239\n",
      "66: 5.58\n",
      "67: hugely important question\n",
      "67: 162.36\n",
      "67: 3.959\n",
      "68: um and it's one that we've been talking\n",
      "68: 164.819\n",
      "68: 4.681\n",
      "69: about for a really long time at IBM you\n",
      "69: 166.319\n",
      "69: 5.461\n",
      "70: know we do believe that Ai and we've\n",
      "70: 169.5\n",
      "70: 4.019\n",
      "71: said it for a long time is going to\n",
      "71: 171.78\n",
      "71: 3.66\n",
      "72: change every job new jobs will be\n",
      "72: 173.519\n",
      "72: 4.681\n",
      "73: created many more jobs will be\n",
      "73: 175.44\n",
      "73: 4.62\n",
      "74: transformed and some jobs will\n",
      "74: 178.2\n",
      "74: 4.98\n",
      "75: transition away I'm a personal example\n",
      "75: 180.06\n",
      "75: 5.759\n",
      "76: of a job that didn't exist when I joined\n",
      "76: 183.18\n",
      "76: 5.1\n",
      "77: IBM and I have a team of AI governance\n",
      "77: 185.819\n",
      "77: 5.34\n",
      "78: professionals who are in new roles that\n",
      "78: 188.28\n",
      "78: 4.98\n",
      "79: we created you know as early as three\n",
      "79: 191.159\n",
      "79: 4.201\n",
      "80: years ago I mean they're new and they're\n",
      "80: 193.26\n",
      "80: 4.38\n",
      "81: growing so I think the most important\n",
      "81: 195.36\n",
      "81: 4.32\n",
      "82: thing that we could be doing and Canon\n",
      "82: 197.64\n",
      "82: 5.28\n",
      "83: should be doing now is to prepare the\n",
      "83: 199.68\n",
      "83: 5.52\n",
      "84: workforce of today and the workforce of\n",
      "84: 202.92\n",
      "84: 5.64\n",
      "85: tomorrow for partnering with F AI\n",
      "85: 205.2\n",
      "85: 5.34\n",
      "86: Technologies and using them and we've\n",
      "86: 208.56\n",
      "86: 4.74\n",
      "87: been very involved for for years now in\n",
      "87: 210.54\n",
      "87: 6.0\n",
      "88: doing that in focusing on skills-based\n",
      "88: 213.3\n",
      "88: 4.2\n",
      "89: hiring\n",
      "89: 216.54\n",
      "89: 4.14\n",
      "90: in educating for the skills of the\n",
      "90: 217.5\n",
      "90: 6.299\n",
      "91: future our skills build platform has\n",
      "91: 220.68\n",
      "91: 4.919\n",
      "92: seven million Learners and over a\n",
      "92: 223.799\n",
      "92: 4.08\n",
      "93: thousand courses worldwide focused on\n",
      "93: 225.599\n",
      "93: 5.821\n",
      "94: skills and we've pledged to train 30\n",
      "94: 227.879\n",
      "94: 6.661\n",
      "95: million individuals by 2030 in the\n",
      "95: 231.42\n",
      "95: 5.399\n",
      "96: skills that are needed for society today\n",
      "96: 234.54\n",
      "96: 4.86\n",
      "97: thank you Professor Marcus may I go back\n",
      "97: 236.819\n",
      "97: 5.041\n",
      "98: to the first question as well absolutely\n",
      "98: 239.4\n",
      "98: 4.979\n",
      "99: on on the subject of nutrition labels I\n",
      "99: 241.86\n",
      "99: 5.04\n",
      "100: think we absolutely need to do that I\n",
      "100: 244.379\n",
      "100: 3.601\n",
      "101: think that there's some technical\n",
      "101: 246.9\n",
      "101: 2.82\n",
      "102: challenges in that building proper\n",
      "102: 247.98\n",
      "102: 3.24\n",
      "103: nutrition labels goes hand in hand with\n",
      "103: 249.72\n",
      "103: 3.96\n",
      "104: transparency the biggest scientific\n",
      "104: 251.22\n",
      "104: 4.019\n",
      "105: challenge in understanding these models\n",
      "105: 253.68\n",
      "105: 3.36\n",
      "106: is how they generalize what do they\n",
      "106: 255.239\n",
      "106: 3.84\n",
      "107: memorize and what new things do they do\n",
      "107: 257.04\n",
      "107: 4.08\n",
      "108: the more that there's in the data set\n",
      "108: 259.079\n",
      "108: 3.661\n",
      "109: for example the thing that you want to\n",
      "109: 261.12\n",
      "109: 3.96\n",
      "110: test accuracy on the less you can get a\n",
      "110: 262.74\n",
      "110: 4.08\n",
      "111: proper read on that so it's important\n",
      "111: 265.08\n",
      "111: 3.36\n",
      "112: first of all that scientists be part of\n",
      "112: 266.82\n",
      "112: 3.78\n",
      "113: that process and second that we have\n",
      "113: 268.44\n",
      "113: 3.539\n",
      "114: much greater transparency about what\n",
      "114: 270.6\n",
      "114: 3.3\n",
      "115: actually goes into these systems if we\n",
      "115: 271.979\n",
      "115: 3.601\n",
      "116: don't know what's in them then we don't\n",
      "116: 273.9\n",
      "116: 3.48\n",
      "117: know exactly how well they're doing when\n",
      "117: 275.58\n",
      "117: 3.6\n",
      "118: we give something new and we don't know\n",
      "118: 277.38\n",
      "118: 3.3\n",
      "119: how good a benchmark that will be for\n",
      "119: 279.18\n",
      "119: 3.54\n",
      "120: something that's entirely novel so I\n",
      "120: 280.68\n",
      "120: 3.48\n",
      "121: could go into that more but I want to\n",
      "121: 282.72\n",
      "121: 2.539\n",
      "122: flag that\n",
      "122: 284.16\n",
      "122: 4.38\n",
      "123: second is on jobs past performance\n",
      "123: 285.259\n",
      "123: 5.321\n",
      "124: history is not a guarantee of the future\n",
      "124: 288.54\n",
      "124: 4.56\n",
      "125: it has always been the case in the past\n",
      "125: 290.58\n",
      "125: 5.04\n",
      "126: that we have had more jobs that new jobs\n",
      "126: 293.1\n",
      "126: 4.68\n",
      "127: new professions come in as new\n",
      "127: 295.62\n",
      "127: 4.2\n",
      "128: technologies come in I think this one's\n",
      "128: 297.78\n",
      "128: 3.12\n",
      "129: going to be different and the real\n",
      "129: 299.82\n",
      "129: 3.24\n",
      "130: question is over what time time scale is\n",
      "130: 300.9\n",
      "130: 3.6\n",
      "131: it going to be 10 years is it going to\n",
      "131: 303.06\n",
      "131: 2.82\n",
      "132: be 100 years and I don't think anybody\n",
      "132: 304.5\n",
      "132: 3.36\n",
      "133: knows the answer to that question I\n",
      "133: 305.88\n",
      "133: 4.14\n",
      "134: think in the long run so-called\n",
      "134: 307.86\n",
      "134: 4.2\n",
      "135: artificial general intelligence really\n",
      "135: 310.02\n",
      "135: 4.08\n",
      "136: will replace a large fraction of human\n",
      "136: 312.06\n",
      "136: 4.079\n",
      "137: jobs we're not that close to artificial\n",
      "137: 314.1\n",
      "137: 4.14\n",
      "138: general intelligence despite all of the\n",
      "138: 316.139\n",
      "138: 4.081\n",
      "139: media hype and so forth I would say that\n",
      "139: 318.24\n",
      "139: 4.019\n",
      "140: what we have right now is just small\n",
      "140: 320.22\n",
      "140: 4.199\n",
      "141: sampling of the AI that we will build in\n",
      "141: 322.259\n",
      "141: 4.5\n",
      "142: 20 years people will laugh at this as I\n",
      "142: 324.419\n",
      "142: 4.381\n",
      "143: think it was Senator Hawley made the but\n",
      "143: 326.759\n",
      "143: 3.481\n",
      "144: maybe Senator Durbin made the example\n",
      "144: 328.8\n",
      "144: 3.119\n",
      "145: about this it was Senator Durbin made\n",
      "145: 330.24\n",
      "145: 3.179\n",
      "146: the example about cell phones when we\n",
      "146: 331.919\n",
      "146: 4.741\n",
      "147: look back at the AI of today 20 years\n",
      "147: 333.419\n",
      "147: 4.981\n",
      "148: ago we'll be like wow that stuff was\n",
      "148: 336.66\n",
      "148: 4.02\n",
      "149: really unreliable it couldn't really do\n",
      "149: 338.4\n",
      "149: 3.84\n",
      "150: planning which is an important technical\n",
      "150: 340.68\n",
      "150: 4.32\n",
      "151: aspect it's reasoning wasability and\n",
      "151: 342.24\n",
      "151: 5.16\n",
      "152: reasoning abilities were limited but\n",
      "152: 345.0\n",
      "152: 4.44\n",
      "153: when we get to AGI or artificial general\n",
      "153: 347.4\n",
      "153: 3.48\n",
      "154: intelligence mainly let's say it's 50\n",
      "154: 349.44\n",
      "154: 3.3\n",
      "155: years that really is going to have I\n",
      "155: 350.88\n",
      "155: 4.8\n",
      "156: think profound effects on labor and\n",
      "156: 352.74\n",
      "156: 4.38\n",
      "157: there's just no way around that and last\n",
      "157: 355.68\n",
      "157: 2.64\n",
      "158: I don't know if I'm allowed to do this\n",
      "158: 357.12\n",
      "158: 3.6\n",
      "159: but I will note that Sam's worst fear I\n",
      "159: 358.32\n",
      "159: 4.2\n",
      "160: do not think is employment and he never\n",
      "160: 360.72\n",
      "160: 4.199\n",
      "161: told us what his worst fear actually is\n",
      "161: 362.52\n",
      "161: 5.88\n",
      "162: and I think it's germane to find out\n",
      "162: 364.919\n",
      "162: 7.56\n",
      "163: thank you I'm going to ask\n",
      "163: 368.4\n",
      "163: 7.2\n",
      "164: Mr Altman if he cares to respond yeah\n",
      "164: 372.479\n",
      "164: 5.401\n",
      "165: look we have tried to be very clear\n",
      "165: 375.6\n",
      "165: 6.42\n",
      "166: about the magnitude of the risks here I\n",
      "166: 377.88\n",
      "166: 7.319\n",
      "167: I think jobs and employment and what\n",
      "167: 382.02\n",
      "167: 4.2\n",
      "168: we're all going to do with our time\n",
      "168: 385.199\n",
      "168: 3.421\n",
      "169: really matters I agree that when we get\n",
      "169: 386.22\n",
      "169: 4.62\n",
      "170: to very powerful systems the landscape\n",
      "170: 388.62\n",
      "170: 3.359\n",
      "171: will change I think I'm just more\n",
      "171: 390.84\n",
      "171: 3.84\n",
      "172: optimistic that we are incredibly\n",
      "172: 391.979\n",
      "172: 4.381\n",
      "173: creative and we find new things to do\n",
      "173: 394.68\n",
      "173: 3.239\n",
      "174: with better tools and that will keep\n",
      "174: 396.36\n",
      "174: 2.82\n",
      "175: happening\n",
      "175: 397.919\n",
      "175: 2.34\n",
      "176: um\n",
      "176: 399.18\n",
      "176: 2.88\n",
      "177: my worst fears are that we cause\n",
      "177: 400.259\n",
      "177: 4.201\n",
      "178: significant we the field the technology\n",
      "178: 402.06\n",
      "178: 4.68\n",
      "179: the industry cause significant harm to\n",
      "179: 404.46\n",
      "179: 4.019\n",
      "180: the world\n",
      "180: 406.74\n",
      "180: 2.82\n",
      "181: I think that could happen a lot of\n",
      "181: 408.479\n",
      "181: 2.701\n",
      "182: different ways it's why we started the\n",
      "182: 409.56\n",
      "182: 4.56\n",
      "183: company it's a big part of why I'm here\n",
      "183: 411.18\n",
      "183: 4.62\n",
      "184: today and why we've been here in the\n",
      "184: 414.12\n",
      "184: 3.299\n",
      "185: past and we've been able to spend some\n",
      "185: 415.8\n",
      "185: 3.78\n",
      "186: time with you I think if this technology\n",
      "186: 417.419\n",
      "186: 5.941\n",
      "187: goes wrong it can go quite wrong and we\n",
      "187: 419.58\n",
      "187: 5.58\n",
      "188: want to be vocal about that we want to\n",
      "188: 423.36\n",
      "188: 3.779\n",
      "189: work with the government to prevent that\n",
      "189: 425.16\n",
      "189: 4.14\n",
      "190: from happening but we we try to be very\n",
      "190: 427.139\n",
      "190: 4.441\n",
      "191: clear-eyed about what the downside case\n",
      "191: 429.3\n",
      "191: 3.959\n",
      "192: is and the work that we have to do to\n",
      "192: 431.58\n",
      "192: 3.239\n",
      "193: mitigate that\n",
      "193: 433.259\n",
      "193: 4.641\n",
      "194: thank you and\n",
      "194: 434.819\n",
      "194: 3.081\n",
      "I alluded in my opening remarks to the the jobs issue the economic effects on employment uh I think you have said uh in fact and I'm going to quote development of superhuman machine intelligence is probably the greatest threat to the continued existence of humanity end quote you may have had in mind the effect on on jobs which is really my biggest nightmare in the long term uh let me ask you uh what your biggest nightmare is and whether you share that concern like with all technological revoluti\n",
      "{'source': 'https://www.youtube.com/watch?v=Pn-W41hC764', 'video_id': 'Pn-W41hC764'}\n"
     ]
    }
   ],
   "execution_count": 15
  },
  {
   "cell_type": "markdown",
   "id": "c98861ae471f551d",
   "metadata": {},
   "source": [
    "### RecursiveCharacterTextSplitter\n",
    "- 일반적인 RAG 에서 주로 사용\n",
    "- LLM의 Context Window 는 제한이 있어 문서를 한 번에 넣을 수 없음\n",
    "- TextSplitter 는 문서를 적절한 크기의 청크(chunk)로 분할해 줌\n",
    "- chunk_size:청크 최대 크기, chunk_overlap : 청크 겹치는 부분(문맥 유지)"
   ]
  },
  {
   "cell_type": "code",
   "id": "5b9a39d18d8ebec0",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:49.083792992Z",
     "start_time": "2026-05-29T09:26:48.825310296Z"
    }
   },
   "source": [
    "# 1. 문서 로드\n",
    "loader = PyPDFLoader(\"./data/서울대 미대.pdf\")\n",
    "docs = loader.load()\n",
    "\n",
    "# 2. 문서 분할\n",
    "spliter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50, length_function=len, separators=[\"\\n\\n\", \"\\n\", \" \", \"\"])\n",
    "\n",
    "chunks= spliter.split_documents(docs)\n",
    "print(f\"원본 페이지 수 : {len(docs)}\")\n",
    "print(f\"분할된 청크 수  : {len(chunks)}\")\n",
    "print(f\"첫 청크 길이 : {len(chunks[0].page_content)}\")\n",
    "print(f\"첫 청크 내용 : {chunks[0].page_content}\")\n",
    "print(f\"메타 데이터 : {chunks[0].metadata}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "원본 페이지 수 : 23\n",
      "분할된 청크 수  : 65\n",
      "첫 청크 길이 : 104\n",
      "첫 청크 내용 : 학생생활안내 학부과정 \n",
      "Department of Design, College of Fine Arts\n",
      "Seoul National University\n",
      "서울대학교 미술대학 \n",
      "디자인전공\n",
      "2024\n",
      "메타 데이터 : {'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 19.2 (Windows)', 'creationdate': '2024-02-26T13:44:15+09:00', 'moddate': '2024-02-26T13:44:25+09:00', 'trapped': '/False', 'source': './data/서울대 미대.pdf', 'total_pages': 23, 'page': 0, 'page_label': '1'}\n"
     ]
    }
   ],
   "execution_count": 16
  },
  {
   "cell_type": "code",
   "id": "bf6e7a96327eaa31",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:49.111269657Z",
     "start_time": "2026-05-29T09:26:49.086788776Z"
    }
   },
   "source": [
    "# chunk 파라미터 비교\n",
    "\n",
    "text = \"파이썬은 1991년에 발표된 언어이다.\" * 20\n",
    "\n",
    "configs = [\n",
    "    {\"chunk_size\":100, \"chunk_overlap\":0},\n",
    "    {\"chunk_size\":100, \"chunk_overlap\":20},\n",
    "    {\"chunk_size\":200, \"chunk_overlap\":50},\n",
    "]\n",
    "\n",
    "for cfg in configs:\n",
    "    sp = RecursiveCharacterTextSplitter(**cfg)\n",
    "    chunks = sp.split_text(text)\n",
    "    print(f\"size={cfg['chunk_size']}, overlap={cfg['chunk_overlap']}\")\n",
    "    print(f\" {len(chunks)}개 청크, 첫 청크 : '{chunks[0][:30]}....'\")\n",
    "    if len(chunks) > 1:\n",
    "        print(f\" 첫 청크 끝 : '{chunks[0][-20:]}'\")\n",
    "        print(f\" 둘째 청크 시작 : '{chunks[1][:20]}'\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "size=100, overlap=0\n",
      " 5개 청크, 첫 청크 : '파이썬은 1991년에 발표된 언어이다.파이썬은 1991....'\n",
      " 첫 청크 끝 : '언어이다.파이썬은 1991년에 발표된'\n",
      " 둘째 청크 시작 : '언어이다.파이썬은 1991년에 발표된'\n",
      "size=100, overlap=20\n",
      " 5개 청크, 첫 청크 : '파이썬은 1991년에 발표된 언어이다.파이썬은 1991....'\n",
      " 첫 청크 끝 : '언어이다.파이썬은 1991년에 발표된'\n",
      " 둘째 청크 시작 : '1991년에 발표된 언어이다.파이썬은'\n",
      "size=200, overlap=50\n",
      " 3개 청크, 첫 청크 : '파이썬은 1991년에 발표된 언어이다.파이썬은 1991....'\n",
      " 첫 청크 끝 : '발표된 언어이다.파이썬은 1991년에'\n",
      " 둘째 청크 시작 : '1991년에 발표된 언어이다.파이썬은'\n"
     ]
    }
   ],
   "execution_count": 17
  },
  {
   "cell_type": "code",
   "id": "332444d3",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:50.815178540Z",
     "start_time": "2026-05-29T09:26:49.114072217Z"
    }
   },
   "source": [
    "!pip install RecursiveCharacterTextSplitter"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: RecursiveCharacterTextSplitter in ./.venv/lib/python3.12/site-packages (0.1.0)\r\n",
      "Requirement already satisfied: streamlit>=1.32 in ./.venv/lib/python3.12/site-packages (from RecursiveCharacterTextSplitter) (1.57.0)\r\n",
      "Requirement already satisfied: altair!=5.4.0,!=5.4.1,<7,>=4.0 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (6.1.0)\r\n",
      "Requirement already satisfied: blinker<2,>=1.5.0 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (1.9.0)\r\n",
      "Requirement already satisfied: cachetools<8,>=5.5 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (7.1.4)\r\n",
      "Requirement already satisfied: click<9,>=7.0 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (8.4.1)\r\n",
      "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (3.1.50)\r\n",
      "Requirement already satisfied: numpy<3,>=1.23 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (2.4.6)\r\n",
      "Requirement already satisfied: packaging>=20 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (26.2)\r\n",
      "Requirement already satisfied: pandas<4,>=1.4.0 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (2.3.3)\r\n",
      "Requirement already satisfied: pillow<13,>=7.1.0 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (12.2.0)\r\n",
      "Requirement already satisfied: pydeck<1,>=0.8.0b4 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (0.9.2)\r\n",
      "Requirement already satisfied: protobuf<8,>=3.20 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (6.33.6)\r\n",
      "Requirement already satisfied: pyarrow>=7.0 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (24.0.0)\r\n",
      "Requirement already satisfied: requests<3,>=2.27 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (2.34.2)\r\n",
      "Requirement already satisfied: tenacity<10,>=8.1.0 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (9.1.4)\r\n",
      "Requirement already satisfied: toml<2,>=0.10.1 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (0.10.2)\r\n",
      "Requirement already satisfied: typing-extensions<5,>=4.10.0 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (4.15.0)\r\n",
      "Requirement already satisfied: starlette>=0.40.0 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (1.1.0)\r\n",
      "Requirement already satisfied: uvicorn>=0.30.0 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (0.48.0)\r\n",
      "Requirement already satisfied: httptools>=0.6.3 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (0.8.0)\r\n",
      "Requirement already satisfied: anyio>=4.0.0 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (4.13.0)\r\n",
      "Requirement already satisfied: python-multipart>=0.0.10 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (0.0.29)\r\n",
      "Requirement already satisfied: websockets>=12.0.0 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (16.0)\r\n",
      "Requirement already satisfied: itsdangerous>=2.1.2 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (2.2.0)\r\n",
      "Requirement already satisfied: watchdog<7,>=2.1.5 in ./.venv/lib/python3.12/site-packages (from streamlit>=1.32->RecursiveCharacterTextSplitter) (6.0.0)\r\n",
      "Requirement already satisfied: jinja2 in ./.venv/lib/python3.12/site-packages (from altair!=5.4.0,!=5.4.1,<7,>=4.0->streamlit>=1.32->RecursiveCharacterTextSplitter) (3.1.6)\r\n",
      "Requirement already satisfied: jsonschema>=3.0 in ./.venv/lib/python3.12/site-packages (from altair!=5.4.0,!=5.4.1,<7,>=4.0->streamlit>=1.32->RecursiveCharacterTextSplitter) (4.26.0)\r\n",
      "Requirement already satisfied: narwhals>=2.4.0 in ./.venv/lib/python3.12/site-packages (from altair!=5.4.0,!=5.4.1,<7,>=4.0->streamlit>=1.32->RecursiveCharacterTextSplitter) (2.21.2)\r\n",
      "Requirement already satisfied: idna>=2.8 in ./.venv/lib/python3.12/site-packages (from anyio>=4.0.0->streamlit>=1.32->RecursiveCharacterTextSplitter) (3.16)\r\n",
      "Requirement already satisfied: gitdb<5,>=4.0.1 in ./.venv/lib/python3.12/site-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit>=1.32->RecursiveCharacterTextSplitter) (4.0.12)\r\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in ./.venv/lib/python3.12/site-packages (from pandas<4,>=1.4.0->streamlit>=1.32->RecursiveCharacterTextSplitter) (2.9.0.post0)\r\n",
      "Requirement already satisfied: pytz>=2020.1 in ./.venv/lib/python3.12/site-packages (from pandas<4,>=1.4.0->streamlit>=1.32->RecursiveCharacterTextSplitter) (2026.2)\r\n",
      "Requirement already satisfied: tzdata>=2022.7 in ./.venv/lib/python3.12/site-packages (from pandas<4,>=1.4.0->streamlit>=1.32->RecursiveCharacterTextSplitter) (2026.2)\r\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in ./.venv/lib/python3.12/site-packages (from requests<3,>=2.27->streamlit>=1.32->RecursiveCharacterTextSplitter) (3.4.7)\r\n",
      "Requirement already satisfied: urllib3<3,>=1.26 in ./.venv/lib/python3.12/site-packages (from requests<3,>=2.27->streamlit>=1.32->RecursiveCharacterTextSplitter) (2.7.0)\r\n",
      "Requirement already satisfied: certifi>=2023.5.7 in ./.venv/lib/python3.12/site-packages (from requests<3,>=2.27->streamlit>=1.32->RecursiveCharacterTextSplitter) (2026.5.20)\r\n",
      "Requirement already satisfied: h11>=0.8 in ./.venv/lib/python3.12/site-packages (from uvicorn>=0.30.0->streamlit>=1.32->RecursiveCharacterTextSplitter) (0.16.0)\r\n",
      "Requirement already satisfied: smmap<6,>=3.0.1 in ./.venv/lib/python3.12/site-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit>=1.32->RecursiveCharacterTextSplitter) (5.0.3)\r\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in ./.venv/lib/python3.12/site-packages (from jinja2->altair!=5.4.0,!=5.4.1,<7,>=4.0->streamlit>=1.32->RecursiveCharacterTextSplitter) (3.0.3)\r\n",
      "Requirement already satisfied: attrs>=22.2.0 in ./.venv/lib/python3.12/site-packages (from jsonschema>=3.0->altair!=5.4.0,!=5.4.1,<7,>=4.0->streamlit>=1.32->RecursiveCharacterTextSplitter) (26.1.0)\r\n",
      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in ./.venv/lib/python3.12/site-packages (from jsonschema>=3.0->altair!=5.4.0,!=5.4.1,<7,>=4.0->streamlit>=1.32->RecursiveCharacterTextSplitter) (2025.9.1)\r\n",
      "Requirement already satisfied: referencing>=0.28.4 in ./.venv/lib/python3.12/site-packages (from jsonschema>=3.0->altair!=5.4.0,!=5.4.1,<7,>=4.0->streamlit>=1.32->RecursiveCharacterTextSplitter) (0.37.0)\r\n",
      "Requirement already satisfied: rpds-py>=0.25.0 in ./.venv/lib/python3.12/site-packages (from jsonschema>=3.0->altair!=5.4.0,!=5.4.1,<7,>=4.0->streamlit>=1.32->RecursiveCharacterTextSplitter) (2026.5.1)\r\n",
      "Requirement already satisfied: six>=1.5 in ./.venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas<4,>=1.4.0->streamlit>=1.32->RecursiveCharacterTextSplitter) (1.17.0)\r\n"
     ]
    }
   ],
   "execution_count": 18
  },
  {
   "cell_type": "markdown",
   "id": "d2ef279e18e05037",
   "metadata": {},
   "source": [
    "## Embedding\n",
    "- 텍스트를 고차원 벡터로 변환(의미가 비슷할수록 벡터가 가깝다)\n",
    "- RAG 에서 임베딩은 1) 문서 청크 저장, 2) 질문 검색 시 사용\n",
    "- 임베딩모델과 LLM 은 별개"
   ]
  },
  {
   "cell_type": "code",
   "id": "7ebc937c7e571f99",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:51.031447306Z",
     "start_time": "2026-05-29T09:26:50.831145821Z"
    }
   },
   "source": [
    "ollama_embedding = OllamaEmbeddings(model=\"nomic-embed-text-v2-moe\")\n",
    "\n",
    "# 단일 텍스트 임베딩 : embed_query\n",
    "vector = ollama_embedding.embed_query(\"파이썬이란?\")\n",
    "print(f\"벡터 차원 {len(vector)}\")\n",
    "print(f\"첫 5개 값 {vector[:5]}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "벡터 차원 768\n",
      "첫 5개 값 [0.07053184, -0.010346294, 0.024336357, -0.007980802, -0.026818372]\n"
     ]
    }
   ],
   "execution_count": 19
  },
  {
   "cell_type": "code",
   "id": "2386ae1e4680c5b3",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:52.852635956Z",
     "start_time": "2026-05-29T09:26:51.041135880Z"
    }
   },
   "source": [
    "watson_embedding = WatsonxEmbeddings(\n",
    "    model_id=\"ibm/granite-embedding-278m-multilingual\",\n",
    "    url = f\"{watsonx_ai_url}\",\n",
    "    api_key = f\"{apikey}\",\n",
    "    project_id=f\"{project_id}\"\n",
    ")\n",
    "\n",
    "# 단일 텍스트 임베딩 : embed_query\n",
    "vector = watson_embedding.embed_query(\"파이썬이란?\")\n",
    "print(f\"벡터 차원 {len(vector)}\")\n",
    "print(f\"첫 5개 값 {vector[:5]}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "벡터 차원 768\n",
      "첫 5개 값 [-0.017763184383511543, 0.026020662859082222, -0.04275166243314743, -0.020823732018470764, 0.04191151261329651]\n"
     ]
    }
   ],
   "execution_count": 20
  },
  {
   "cell_type": "code",
   "id": "e6be692377091b86",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:53.082295330Z",
     "start_time": "2026-05-29T09:26:52.868375167Z"
    }
   },
   "source": [
    "texts = [\"파이썬이란?\", \"자바란 무엇인가?\", \"오늘의 날씨는?\"]\n",
    "\n",
    "# 여러 문서 임베딩 : embedembed_documents\n",
    "vectors = ollama_embedding.embed_documents(texts)\n",
    "print(f\"임베딩 문서 수 {len(vectors)}\")\n"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "임베딩 문서 수 3\n"
     ]
    }
   ],
   "execution_count": 21
  },
  {
   "cell_type": "code",
   "id": "9769ce374115c75c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:53.098487022Z",
     "start_time": "2026-05-29T09:26:53.085586053Z"
    }
   },
   "source": [
    "# 벡터 유사도\n",
    "import numpy as np\n",
    "\n",
    "# 코사인 유사도\n",
    "def cosine_sim(a, b):\n",
    "    a, b = np.array(a), np.array(b)\n",
    "    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))"
   ],
   "outputs": [],
   "execution_count": 22
  },
  {
   "cell_type": "code",
   "id": "3ec252669eab5973",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:53.513965083Z",
     "start_time": "2026-05-29T09:26:53.101896761Z"
    }
   },
   "source": [
    "vec1 = ollama_embedding.embed_query(\"파이썬이란?\")\n",
    "vec2 = ollama_embedding.embed_query(\"python이란?\")\n",
    "vec3 = ollama_embedding.embed_query(\"오늘 저녁 뭐 먹지?\")\n",
    "\n",
    "print(f\"파이썬 vs python : {cosine_sim(vec1, vec2):.3f}\")\n",
    "print(f\"파이썬 vs 날씨 : {cosine_sim(vec1, vec3):.3f}\")\n"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "파이썬 vs python : 0.760\n",
      "파이썬 vs 날씨 : 0.129\n"
     ]
    }
   ],
   "execution_count": 23
  },
  {
   "cell_type": "markdown",
   "id": "9de31788cdadb53",
   "metadata": {},
   "source": [
    "### 벡터 저장소\n",
    "- ChromaDB, FAISS, Milvus, Qdrant, Pinecone..."
   ]
  },
  {
   "cell_type": "code",
   "id": "655b52f1ae0aee7",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:56.649757013Z",
     "start_time": "2026-05-29T09:26:53.517351007Z"
    }
   },
   "source": [
    "# 1. 문서로드 및 분할\n",
    "loader = PyPDFLoader(\"./data/서울대 미대.pdf\")\n",
    "docs = loader.load()\n",
    "splitter = RecursiveCharacterTextSplitter(\n",
    "    chunk_size=500, chunk_overlap=50, length_function=len, separators=['\\n\\n','\\n', ' ',''])\n",
    "\n",
    "chunks = splitter.split_documents(docs)\n",
    "\n",
    "# 2. 임베딩\n",
    "ollama_embedding = OllamaEmbeddings(model=\"nomic-embed-text-v2-moe\")\n",
    "\n",
    "# 3. 벡터 스토어(ChromaDB) 저장\n",
    "vectorstore = Chroma.from_documents(\n",
    "    documents=chunks, embedding=ollama_embedding, persist_directory=\"./db/chroma_db\",collection_name=\"my_docs\"\n",
    ")\n"
   ],
   "outputs": [],
   "execution_count": 24
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:56.960269658Z",
     "start_time": "2026-05-29T09:26:56.674152094Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 4. 검색(Retrieval)\n",
    "# 질문 => 벡터로 변경, 유사도 높은 걸로 몇개 추출할거냐?\n",
    "# similarity_search() : 질문 => 벡터로 변경, 거리 기반(코사인유사도)\n",
    "\n",
    "# results = vectorstore.similarity_search(\"수강신청 내역확인 방법은?\",k=3)\n",
    "\n",
    "# for idx, doc in enumerate(results,1):\n",
    "#     print(f\"\\n[결과] {idx} 출처 : {doc.metadata}\")\n",
    "#     print(doc.page_content[:300])\n",
    "\n",
    "# 유사도 점수와 함꼐 검색\n",
    "results_score = vectorstore.similarity_search_with_score(\"수강신청 내역확인 방법은?\",k=3)\n",
    "for doc, score in results_score:\n",
    "    print(f\"유사도 : {score:.4f}, {doc.page_content[:20]}\")"
   ],
   "id": "b5a856383ef4d1a6",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "유사도 : 0.8489, 외부수업의 경우 반드시 담당 강사의 \n",
      "유사도 : 0.8489, 외부수업의 경우 반드시 담당 강사의 \n",
      "유사도 : 0.8489, 외부수업의 경우 반드시 담당 강사의 \n"
     ]
    }
   ],
   "execution_count": 25
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:57.021687664Z",
     "start_time": "2026-05-29T09:26:56.963661699Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# db 확인\n",
    "\n",
    "data = vectorstore.get()\n",
    "print(data['documents'][0][:300])\n",
    "print(data['metadatas'][0])\n",
    "print(f\"청크 개수 : {vectorstore._collection.count()}\")"
   ],
   "id": "7275a661ec5c1e90",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "학생생활안내 학부과정 \n",
      "Department of Design, College of Fine Arts\n",
      "Seoul National University\n",
      "서울대학교 미술대학 \n",
      "디자인전공\n",
      "2024\n",
      "{'producer': 'Adobe PDF Library 17.0', 'page_label': '1', 'creator': 'Adobe InDesign 19.2 (Windows)', 'page': 0, 'moddate': '2024-02-26T13:44:25+09:00', 'total_pages': 23, 'source': './data/서울대 미대.pdf', 'trapped': '/False', 'creationdate': '2024-02-26T13:44:15+09:00'}\n",
      "청크 개수 : 455\n"
     ]
    }
   ],
   "execution_count": 26
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": [
    "#### ChromeDB vs FAISS\n",
    "\n",
    "| 항목              | ChromaDB              | FAISS                    |\n",
    "|-------------------|-----------------------|--------------------------|\n",
    "| 저장방식          | 디스크 기반 영구 저장 | 메모리기반중심(저장 가능 |\n",
    "| 검색속도          | 빠름                  | 매우 빠름                |\n",
    "| 메타데이터 필터링 | 지원                  | 제한적                   |\n",
    "| 설치 | 'pip install chromadb' | 'pip install faiss-cpu'|\n",
    "| 서버모드 | 지원 | 내장 서버 기능 없음 |\n",
    "| 권장 용도 | 운영형 RAG | 빠른 실험 / 프로토타입 |"
   ],
   "id": "ff5394a0a563e850"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:59.701154064Z",
     "start_time": "2026-05-29T09:26:57.028420800Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 3. 벡터 스토어(FAISS) 저장\n",
    "faiss_store = FAISS.from_documents(documents=chunks, embedding=ollama_embedding)\n",
    "\n",
    "results = faiss_store.similarity_search(\"수강신청 내역확인 방법은?\",k=3)\n",
    "\n",
    "for idx, doc in enumerate(results,1):\n",
    "    print(doc.page_content[:300])"
   ],
   "id": "23c46da5f839301d",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "외부수업의 경우 반드시 담당 강사의 사인 혹은 통화내역을 증명해야 한다.\n",
      "※취소원 제출 후에는 반드시 마이스누에서 교과목 추가 삭제여부를 확인 한다.\n",
      "개강 후 2주일이 지나면 대부분 수강신청이 확정되며, 담당 교수들은 확정 출석부를 \n",
      "통해서 출석을 확인하게 된다. 하지만, 대부분 선생님들은 확정 출석부를 사용하지 \n",
      "않고, 기존의 출석부를 통해서 출석을 확인하기 때문에 반드시 마이스누에서 본인의 \n",
      "수강신청 여부를 확인하여야 한다. 수업주수 1/2선, 즉 수강신청 취소기간이 지난 \n",
      "이후에는 어떠한 변경도 불가하므로 이후의 수강신청 관련\n",
      "20 21\n",
      "학사일정표를 통해 수강신청 기간을 알 수 있으며 포털 마이스누 > 학사정보 > \n",
      "나의정보 > 종합정보 > 학사일정 페이지를 참고한다. 수강신청 기간에는 마이스누에서 \n",
      "바로 수강신청 페이지로 이 동할 수 있다. 수강 신청은 총 18학점까지 가능하며, \n",
      "직전학기 2학기 평점 3.3 이상일 경우 21학 점까지 신청 가능하다. 수강신청 시 \n",
      "교과구분, 전공구분을 명확히 하도록 한다. 교과구분은 전선, 전필, 교양을 말하는 \n",
      "것이며, 전공구분은 주전공, 복수전공, 연계전공 등을 의미한다. 수강신청 시 전공구분, \n",
      "교과구분을 명확히\n",
      "* 정원 외 신청 가능한 경우\n",
      " - 수강반 제한 등으로 정원이 마감되지 않았더라도 수강신청 할 수 없는 경우\n",
      " - 정원이 마감되어 수강신청 할 수 없는 경우\n",
      " - 신청가능학점 : 수강신청가능학점 수강신청한 학점 + 6 학점\n",
      " - 수강확정가능학점 : 수강신청 가능 학점 이내\n",
      "◦ 수강신청\n",
      "◦ 수강정원외신청\n",
      "◦ 수강신청취소\n",
      "수강신청 취소는 수업주수 1/2선까지 가능하다. 정확한 날짜는 포털 마이스누 > \n",
      "학사정보 > 나의정보 > 종합정보 > 학사일정 페이지를 참고한다. 개강 후 일주일동안의 \n",
      "변경기간에는 임의로 취소가 가능하며, 변경기간\n"
     ]
    }
   ],
   "execution_count": 27
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:59.882923209Z",
     "start_time": "2026-05-29T09:26:59.718630252Z"
    }
   },
   "cell_type": "code",
   "source": [
    "results_score = faiss_store.similarity_search_with_score(\"수강신청 내역확인 방법은?\",k=3)\n",
    "for doc, score in results_score:\n",
    "    print(f\"유사도 : {score:.4f}, {doc.page_content[:20]}\")"
   ],
   "id": "1ec7bf136e4d89be",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "유사도 : 0.8489, 외부수업의 경우 반드시 담당 강사의 \n",
      "유사도 : 1.0370, 20 21\n",
      "학사일정표를 통해 수강신청\n",
      "유사도 : 1.1306, * 정원 외 신청 가능한 경우\n",
      " - \n"
     ]
    }
   ],
   "execution_count": 28
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:59.896752469Z",
     "start_time": "2026-05-29T09:26:59.886066742Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 로컬 저장\n",
    "faiss_store.save_local(\"./db/faiss_index\")"
   ],
   "id": "1f17978987053a70",
   "outputs": [],
   "execution_count": 29
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:26:59.919728419Z",
     "start_time": "2026-05-29T09:26:59.899849123Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 로컬 저장 후 불러오기\n",
    "chroma_db=Chroma(persist_directory=\"./db/chroma_db\", embedding_function=ollama_embedding,collection_name=\"my_docs\")\n",
    "faiss_db=FAISS.load_local(\"./db/faiss_index\",embeddings=ollama_embedding,allow_dangerous_deserialization=True)"
   ],
   "id": "fefc38ec86546d1e",
   "outputs": [],
   "execution_count": 30
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:27:08.345176251Z",
     "start_time": "2026-05-29T09:26:59.921812510Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# STEP 1 : 문서로드\n",
    "loader = PyPDFLoader(\"./data/Summary of ChatGPTGPT-4 Research.pdf\")\n",
    "# STEP 2 : 문서분할\n",
    "splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)\n",
    "chunks = splitter.split_documents(loader.load())\n",
    "print(f\"chunks 수 {chunks}\")\n",
    "# STEP 3 : 인덱싱 - 임베딩\n",
    "embeddings = OllamaEmbeddings(model=\"nomic-embed-text-v2-moe\")\n",
    "# STEP 4 : 벡터스토어(Chroma or FAISS)\n",
    "vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory=\"./db/chroma_db\", collection_name=\"research\")\n",
    "# STEP 5 : as_retriever() : Vector Store 를 Retriever 형태로 변환하여 LangChain 에 연결\n",
    "retriever = vectorstore.as_retriever(search_type=\"similarity\", search_kwargs={\"k\":3})\n",
    "# STEP 6 : RAG 프롬프트 생성\n",
    "system_message = \"\"\"\\\n",
    "다음 컨텍스트를 참고하여 질문에 답하세요.\n",
    "컨텍스트에 없는 내용은 모른다고 답하세요.\n",
    "\n",
    "컨텍스트:\n",
    "{context}\n",
    "\"\"\"\n",
    "rag_prompt = ChatPromptTemplate.from_messages([\n",
    "    (\"system\",system_message),\n",
    "    (\"human\",\"{question}\")\n",
    "])\n",
    "\n",
    "def format_docs(docs):\n",
    "    return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
    "\n",
    "\n",
    "rag_chain = {\n",
    "    \"context\": retriever | format_docs,\n",
    "    \"question\" : RunnablePassthrough()\n",
    "} | rag_prompt | watson_llm | StrOutputParser()\n",
    "\n",
    "answer = rag_chain.invoke(\"where can i use ChatGPT?\")\n",
    "\n",
    "print(answer)"
   ],
   "id": "23d8b74bfc29fb9a",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "chunks 수 [Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 0, 'page_label': '1'}, page_content='Summary of ChatGPT/GPT-4 Research\\nand Perspective Towards the Future of Large\\nLanguage Models\\nYiheng Liu ∗1, Tianle Han ∗1, Siyuan Ma 1, Jiayue Zhang 1,\\nYuanyuan Yang1, Jiaming Tian 1, Hao He 1, Antong Li 2, Mengshen\\nHe1, Zhengliang Liu 3, Zihao Wu 3, Dajiang Zhu 4, Xiang Li 5, Ning\\nQiang1, Dingang Shen 6,7,8, Tianming Liu 3, and Bao Ge †1\\n1School of Physics and Information Technology, Shaanxi Normal University, Xi’an\\n710119 China'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 0, 'page_label': '1'}, page_content='710119 China\\n2School of Life and Technology Biomedical-Engineering, Xi’an Jiaotong University,\\nXi’an 710049, China\\n3School of Computing, The University of Georgia, Athens 30602, USA\\n4Department of Computer Science and Engineering, The University of Texas at\\nArlington, Arlington 76019, USA\\n5Department of Radiology, Massachusetts General Hospital and Harvard Medical\\nSchool, Boston 02115, USA\\n6School of Biomedical Engineering, ShanghaiTech University, Shanghai 201210,\\nChina'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 0, 'page_label': '1'}, page_content='China\\n7Shanghai United Imaging Intelligence Co., Ltd., Shanghai 200230, China\\n8Shanghai Clinical Research and Trial Center, Shanghai 201210, China\\nAbstract\\nThis paper presents a comprehensive survey of ChatGPT and GPT-4,\\nstate-of-the-art large language models (LLM) from the GPT series, and\\ntheir prospective applications across diverse domains. Indeed, key innova-\\ntions such as large-scale pre-training that captures knowledge across the'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 0, 'page_label': '1'}, page_content='entire world wide web, instruction ﬁne-tuning and Reinforcement Learn-\\ning from Human Feedback (RLHF) have played signiﬁcant roles in en-\\nhancing LLMs’ adaptability and performance. We performed an in-depth\\nanalysis of 194 relevant papers on arXiv, encompassing trend analysis,\\nword cloud representation, and distribution analysis across various appli-\\ncation domains. The ﬁndings reveal a signiﬁcant and increasing interest\\nin ChatGPT/GPT-4 research, predominantly centered on direct natural'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 0, 'page_label': '1'}, page_content='language processing applications, while also demonstrating considerable\\npotential in areas ranging from education and history to mathematics,\\n∗Co-ﬁrst author\\n†Corresponding author: bob ge@snnu.edu.cn\\n1\\narXiv:2304.01852v1  [cs.CL]  4 Apr 2023'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 1, 'page_label': '2'}, page_content='medicine, and physics. This study endeavors to furnish insights into Chat-\\nGPT’s capabilities, potential implications, ethical concerns, and oﬀer di-\\nrection for future advancements in this ﬁeld.\\n1 Introduction\\nRecent advances in natural language processing (NLP) have led to the devel-\\nopment of powerful language models such as the GPT (Generative Pre-trained\\nTransformer) series [1, 2, 3, 4, 5], including large language models (LLM) such'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 1, 'page_label': '2'}, page_content='as ChatGPT and GPT-4. These models are pre-trained on vast amounts of\\ntext data and have demonstrated exceptional performance in a wide range of\\nNLP tasks, including language translation, text summarization, and question-\\nanswering. In particular, the ChatGPT model has demonstrated its potential\\nin various ﬁelds, including education, healthcare, reasoning, text generation,\\nhuman-machine interaction, and scientiﬁc research.\\nA key milestone of LLM development is InstructGPT [5], a framework that'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 1, 'page_label': '2'}, page_content='allows for instruction ﬁne-tuning of a pre-trained language model based on Re-\\ninforcement Learning from Human Feedback (RLHF) [6, 5]. This framework\\nenables an LLM to adapt to a wide range of NLP tasks, making it highly ver-\\nsatile and ﬂexible by leveraging human feedback. RLHF enables the model to\\nalign with human preferences and human values, which signiﬁcantly improves\\nfrom large language models that are solely trained text corpora through unsu-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 1, 'page_label': '2'}, page_content='pervised pre-training. ChatGPT is a successor to InstructGPT. Since its release\\nin December 2022, ChatGPT has been equipped with these advanced develop-\\nments, leading to impressive performances in various downstream NLP tasks\\nsuch as reasoning and generalized text generation. These unprecedented NLP\\ncapabilities spur applications in diverse domains such as education, healthcare,\\nhuman-machine interaction, medicine and scientiﬁc research. ChatGPT has re-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 1, 'page_label': '2'}, page_content='ceived widespread attention and interest, leading to an increasing number of\\napplications and research that harness its exceeding potential. The open release\\nof the multi-modal GPT-4 model further expands the horizon of large language\\nmodels and empowers exciting developments that involve diverse data beyond\\ntext.\\nThe purpose of this paper is to provide a comprehensive survey of the existing\\nresearch on ChatGPT and its potential applications in various ﬁelds. To achieve'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 1, 'page_label': '2'}, page_content='this goal, we conducted a thorough analysis of papers related to ChatGPT in\\nthe arXiv repository. As of April 1st, 2023, there are a total of 194 papers\\nmentioning ChatGPT on arXiv. In this study, we conducted a trend analysis of\\nthese papers and generated a word cloud to visualize the commonly used terms.\\nAdditionally, we also examined the distribution of the papers across various\\nﬁelds and presented the corresponding statistics. Figure 1 displays the daily'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 1, 'page_label': '2'}, page_content='submission trend of papers related to ChatGPT, indicating a growing interest\\nin this ﬁeld. Figure 2 illustrates the word cloud analysis of all the papers. We\\ncan observe that the current research is primarily focused on natural language\\nprocessing, but there is still signiﬁcant potential for research in other ﬁelds such\\n2'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 2, 'page_label': '3'}, page_content='Figure 1: The number of papers submitted by researchers per day.\\n3'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 3, 'page_label': '4'}, page_content='Figure 2: Word cloud analysis of all the 194 papers.\\nas education and history. This is further supported by Figure 3, which displays\\nthe distribution of submitted papers across various ﬁelds, highlighting the need\\nfor more research and development in these areas.\\nThis paper aims to shed light on the promising capabilities of ChatGPT\\nand provide insight into its potential impact in the future, including ethical\\nconsiderations. Through this survey, we hope to provide insights into how these'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 3, 'page_label': '4'}, page_content='models can be improved and extended in the future. In section 2, we will\\nreview the existing work related to ChatGPT, including its applications, ethical\\nconsiderations, and evaluation. In addition to discussing the current state of\\nresearch related to ChatGPT, we will also explore its limitations in section 3.\\nFurthermore, we will provide guidance on future directions for language model\\ndevelopment.\\n2 Related work of ChatGPT'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 3, 'page_label': '4'}, page_content='development.\\n2 Related work of ChatGPT\\nIn this section, we review the latest research related to the application, ethics,\\nand evaluation of ChatGPT.\\n2.1 Application of ChatGPT\\n2.1.1 Question And Answering\\nIn the education ﬁeld\\nChatGPT is commonly used for question and answers testing in the edu-\\ncation sector. Users can use ChatGPT to learn, compare and verify answers\\nfor diﬀerent academic subjects such as physics, mathematics, and chemistry,\\n4'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 4, 'page_label': '5'}, page_content='Figure 3: The distribution of submitted papers across various ﬁelds.\\n5'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 5, 'page_label': '6'}, page_content='and/or conceptual subjects such as philosophy and religion. Additionally, users\\ncan ask open-ended and analytical questions to understand the capabilities of\\nChatGPT.\\nIn the ﬁeld of mathematics, Frieder et al. [7] constructed the GHOSTS nat-\\nural language dataset, which consists of graduate-level math test questions.\\nThe authors tested ChatGPT’s math abilities on the GHOSTS dataset us-\\ning a question-and-answer format and evaluated it according to ﬁne-grained'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 5, 'page_label': '6'}, page_content='standards.In the Grad Text dataset, which covers simple set theory and logic\\nproblems, ChatGPT performed the best. However, in the Olympiad-Problem-\\nSolving dataset, ChatGPT performed poorly, receiving only two 4-point scores\\n(out of a total of 5), with the majority of scores being 2 points. In the Holes-\\nin-Proofs dataset, ChatGPT received the lowest score of 1 point. In the MATH\\ndataset, ChatGPT only scored impressively in 26% of cases. These results sug-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 5, 'page_label': '6'}, page_content='gest that ChatGPT’s math abilities are clearly lower than those of ordinary\\nmath graduate students. Although ChatGPT can generally understand math\\nproblems, it fails to provide the correct solutions. Pardos et al. [8] used the\\nOpen Adaptive Tutoring system (OATutor) to investigate whether prompts gen-\\nerated by ChatGPT were helpful for learning algebra, with 77 participants from\\nMechanical Turk taking part in the experiment. The experiment used ques-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 5, 'page_label': '6'}, page_content='tions from OpenStax’s Elementary and Intermediate Algebra textbooks. These\\nparticipants were randomly assigned to either a control group (with manual\\nprompts) or an experimental group (with ChatGPT prompts). For each ques-\\ntion in both courses, the authors obtained answers from ChatGPT through a\\nquestion-and-answer format and evaluated scores according to three criteria:\\nChatGPT provided an answer, the answer was correct, and inappropriate lan-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 5, 'page_label': '6'}, page_content='guage was not used in the answer. The study found that 70% of prompts gener-\\nated by ChatGPT passed manual quality checks, and both humans and Chat-\\nGPT produced positive learning gains. However, the scores of human prompts\\nranged from 74.59% to 84.32%, signiﬁcantly higher than those of ChatGPT\\nprompts. Shakarian et al. [9] studied the performance of ChatGPT on math\\nword problems (MWPs), using the DRAW-1K dataset for experimentation. The'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 5, 'page_label': '6'}, page_content='dataset consists of 1000 MWPs and their answers, along with algebraic equa-\\ntion templates for solving such problems. The authors used the idea of machine\\nlearning introspection and built performance prediction models using random\\nforests and XGBoost, and evaluated them on the dataset using ﬁve-fold cross-\\nvalidation. ChatGPT’s accuracy increased from an initial 34% to a ﬁnal 69%,\\nwhile its recall increased from an initial 41% to a ﬁnal 83%. The authors also'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 5, 'page_label': '6'}, page_content='found that ChatGPT’s failure rate decreased from an initial 84% to a ﬁnal\\n20%, indicating that performance can vary greatly depending on speciﬁc job\\nrequirements.\\nIn the ﬁeld of physics, Lehnert et al. [10] explored the capabilities and limita-\\ntions of ChatGPT by studying how it handles obscure physics topics such as the\\nswamp land conjecture in string theory. The experimental dialogue began with\\nbroader and more general questions in the ﬁeld of string theory before narrowing'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 5, 'page_label': '6'}, page_content='down to speciﬁc swamp land conjectures and examining ChatGPT’s understand-\\ning of them. The study found that ChatGPT could deﬁne and explain diﬀerent\\n6'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 6, 'page_label': '7'}, page_content='concepts in various styles, but was not eﬀective in truly connecting various con-\\ncepts. It would conﬁdently provide false information and fabricate statements\\nwhen necessary, indicating that ChatGPT cannot truly create new knowledge\\nor establish new connections. However, in terms of identifying analogies and\\ndescribing abstract concepts of visual representation, ChatGPT can cleverly\\nuse language. Kortemeyer et al. [11] evaluated ChatGPT’s ability to answer'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 6, 'page_label': '7'}, page_content='calculus-based physics questions through a question-and-answer test. The tests\\nincluded online homework, clicker questions, programming exercises, and exams\\ncovering classical mechanics, thermodynamics, electricity and magnetism, and\\nmodern physics. While ChatGPT was able to pass the course, it also demon-\\nstrated many misconceptions and errors commonly held by beginners. West et\\nal. [12] used the Force Concept Inventory (FCI) to evaluate ChatGPT’s accu-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 6, 'page_label': '7'}, page_content='racy in answering physics concept problems related to kinematics and Newtonian\\nmechanics in the ﬁrst semester of college physics. The FCI covers topics such\\nas kinematics, projectile motion, free fall, circular motion, and Newton’s laws.\\nThe study included data from 415 students who took the FCI at the end of the\\nsemester, with an average score of 56%, while ChatGPT scored approximately\\nbetween 50% to 65%. The authors demonstrated that ChatGPT’s performance'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 6, 'page_label': '7'}, page_content='in physics learning can reach or even exceed the average level of a semester of\\ncollege physics.\\nIn the medical ﬁeld\\nChatGPT’s question-answering capabilities can also be applied in the med-\\nical ﬁeld, such as for answering medical questions from patients or assisting\\nhealthcare professionals in diagnosing diseases. Nov et al. [13] evaluated the fea-\\nsibility of using ChatGPT for patient-doctor communication. The experiment'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 6, 'page_label': '7'}, page_content='extracted 10 representative patient-doctor interactions from EHR, placed the\\npatient’s questions in ChatGPT, and asked ChatGPT to respond using roughly\\nthe same number of words as the doctor’s response. Each patient’s question was\\nanswered by either the doctor or ChatGPT, and the patient was informed that\\n5 were answered by the doctor and 5 were generated by ChatGPT, and was\\nasked to correctly identify the source of the response. The results of the exper-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 6, 'page_label': '7'}, page_content='iment showed that the probability of correctly identifying ChatGPT’s response\\nwas 65.5%, while the probability of correctly identifying the doctor’s response\\nwas 65.1%. In addition, the experiment found that the patient’s response to\\nthe trustworthiness of ChatGPT’s function was weakly positive (average Likert\\nscore: 3.4), and trust decreased as the complexity of health-related tasks in\\nthe questions increased. ChatGPT’s responses to patient questions were only'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 6, 'page_label': '7'}, page_content='slightly diﬀerent from those of doctors, but people seem to trust ChatGPT to\\nanswer low-risk health questions, while for complex medical questions, people\\nstill tend to trust the doctor’s responses and advice.\\nTu et al. [14] explored the causal discovery ability of ChatGPT in the diag-\\nnosis of neuropathic pain. Causal relationship discovery aims to reveal potential\\nunknown causal relationships based purely on observed data [15]. The experi-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 6, 'page_label': '7'}, page_content='mental results found that ChatGPT has some limitations in understanding new\\n7'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 7, 'page_label': '8'}, page_content='knowledge and concepts beyond the existing textual training data corpus, that\\nis, it only understands language commonly used to describe situations and not\\nunderlying knowledge. In addition, its performance consistency and stability are\\nnot high, as the experiment observed that it would provide diﬀerent answers for\\nthe same question under multiple inquiries. However, despite the many limita-\\ntions of ChatGPT, we believe that it has a great opportunity to improve causal\\nrelationship research.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 7, 'page_label': '8'}, page_content='relationship research.\\nIn the other ﬁeld\\nGuo et al. [16] attempted to apply ChatGPT in the ﬁeld of communication,\\nspeciﬁcally using ChatGPT for ordered importance semantic communication,\\nwhere ChatGPT plays the role of an intelligent consulting assistant that can\\nreplace humans in identifying the semantic importance of words in messages and\\ncan be directly embedded into the current communication system. For a message\\nto be transmitted, the sender ﬁrst utilizes ChatGPT to output the semantic'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 7, 'page_label': '8'}, page_content='importance order of each word. Then, the transmitter executes an unequal error\\nprotection transmission strategy based on the importance order to make the\\ntransmission of important words in the message more reliable. The experimental\\nresults show that the error rate and semantic loss of important words measured\\nin the communication system embedded with ChatGPT are much lower than\\nthose of existing communication schemes, indicating that ChatGPT can protect'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 7, 'page_label': '8'}, page_content='important words well and make semantic communication more reliable.\\nWang et al. [17] studied the eﬀectiveness of ChatGPT in generating high-\\nquality Boolean queries for systematic literature search. They designed a wide\\nrange of prompts and investigated these tasks on more than 100 systematic\\nreview topics. In the end, queries generated by ChatGPT achieved higher accu-\\nracy compared to the currently most advanced query generation methods but'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 7, 'page_label': '8'}, page_content='at the cost of reduced recall. For time-limited rapid reviews, it is often accept-\\nable to trade oﬀ higher precision for lower recall. Additionally, ChatGPT can\\ngenerate high search accuracy Boolean queries by guiding the prompts. How-\\never, it should be noted that when two queries use the same prompts, ChatGPT\\ngenerates diﬀerent queries, indicating its limitations in consistency and stabil-\\nity. Overall, this study demonstrated the potential of ChatGPT in generating'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 7, 'page_label': '8'}, page_content='eﬀective Boolean queries for systematic literature searches.\\n2.1.2 Text Classiﬁcation\\nThe purpose of text classiﬁcation is to assign text data to predeﬁned categories.\\nThis task is critical for many applications, including sentiment analysis, spam\\ndetection, and topic modeling. While traditional machine learning algorithms\\nhave been widely used for text classiﬁcation, recent advances in natural lan-\\nguage processing have led to the development of more advanced techniques.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 7, 'page_label': '8'}, page_content='ChatGPT has shown immense potential in this ﬁeld. Its ability to accurately\\nclassify text, ﬂexibility in handling various classiﬁcation tasks, and potential for\\ncustomization make it a valuable tool for text classiﬁcation, as evidenced by\\nseveral studies in the literature.\\n8'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 8, 'page_label': '9'}, page_content='Kuzman et al. [18] employed ChatGPT for automatic genre recognition, with\\nthe goal of simplifying the text classiﬁcation task by utilizing ChatGPT’s zero-\\nshot classiﬁcation capability. They compared ChatGPT’s genre recognition per-\\nformance, using two prompt languages (EN and SL), with the X-GENRE clas-\\nsiﬁer based on the multilingual model XLM-RoBERTa on the English dataset\\nEN-GINCO and the Slovenian dataset GINCO. The results showed that when'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 8, 'page_label': '9'}, page_content='EN was used as the prompt language, ChatGPT achieved Micro F1, Macro F1,\\nand Accuracy scores of 0.74, 0.66, and 0.72. However, on the GINCO dataset,\\nChatGPT’s genre recognition performance with both EN and SL prompt lan-\\nguages was lower than that of the X-GENRE classiﬁer to varying degrees.\\nAmin et al. [19] evaluated the text classiﬁcation ability of ChatGPT in\\naﬀective computing by using it to perform personality prediction, sentiment'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 8, 'page_label': '9'}, page_content='analysis, and suicide ideation detection tasks. They prompted ChatGPT with\\ncorresponding prompts on three datasets: First Impressions, Sentiment140, and\\nSuicide and Depression, and compared its classiﬁcation performance with three\\nbaseline models: RoBERTa-base, Word2Vec, and BoW. The results showed\\nthat ChatGPT’s accuracy and UAR for the ﬁve personality classiﬁcations on\\nthe First Impressions dataset were lower than the baseline methods to varying'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 8, 'page_label': '9'}, page_content='degrees. On the Sentiment140 dataset, ChatGPT’s accuracy and UAR were 85.5\\nand 85.5, respectively, which were better than the three baseline methods. On\\nthe Suicide and Depression dataset, ChatGPT’s accuracy and UAR were 92.7\\nand 91.2, respectively, which were lower than RoBERTa, the best-performing\\nbaseline method.\\nZhang et al. [20] employed ChatGPT for stance detection, which includes\\nsupport and opposition. They used ChatGPT to classify the political stance'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 8, 'page_label': '9'}, page_content='of tweets in the SemEval-2016 and P-Stance datasets. SemEval-2016 contains\\n4,870 English tweets, and they selected tweets with the most commonly occur-\\nring FM, LA, and HC political labels for stance classiﬁcation. The P-Stance\\ndataset has 21,574 English tweets, and they classiﬁed the stance of tweets to-\\nwards Trump, Biden, and Bernie. The ﬁnal results showed that on the SemEval-\\n2016 dataset, ChatGPT achieved F1-m scores of 68.4, 58.2, and 79.5 for the FM,'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 8, 'page_label': '9'}, page_content='LA, and HC political labels, and F1-avg scores of 72.6, 59.3, and 78.0, respec-\\ntively. On the P-Stance dataset, ChatGPT achieved F1-m scores of 82.8, 82.3,\\nand 79.4 for the Trump, Biden, and Bernie political ﬁgures, and F1-avg scores\\nof 83.2, 82.0, and 79.4, respectively.\\nHuang et al. [21] used ChatGPT to detect implicit hate speech in tweets.\\nThey selected 12.5% (795 tweets) of the LatentHatred dataset containing im-\\nplicit hate speech and asked ChatGPT to classify them into three categories:'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 8, 'page_label': '9'}, page_content='implicit hate speech, non-hate speech, and uncertain. The results showed that\\nChatGPT correctly recognized 636 (80%) of the tweets. The number of tweets\\nclassiﬁed as non-hate speech and uncertain were 146 (18.4%) and 13 (1.6%),\\nrespectively. The results of the reclassiﬁcation of tweets in the non-hate speech\\nand uncertain categories by Amazon Mechanical Turk (Mturk) workers were\\nconsistent with ChatGPT’s classiﬁcation.\\nOverall, ChatGPT has tremendous potential in text classiﬁcation tasks, as it'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 8, 'page_label': '9'}, page_content='can eﬀectively address problems such as genre identiﬁcation, sentiment analysis,\\n9'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 9, 'page_label': '10'}, page_content='stance detection, and more. However, there are still challenges that ChatGPT\\nfaces in the ﬁeld of text classiﬁcation. Firstly, it struggles to perform well in\\nclassiﬁcation tasks with rare or out-of-vocabulary words since it heavily relies\\non the distribution of training data. Additionally, the signiﬁcant computational\\nresources required for training and utilizing ChatGPT can limit its use in some\\napplications.\\n2.1.3 Text generation'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 9, 'page_label': '10'}, page_content='applications.\\n2.1.3 Text generation\\nWe live in an era of information explosion, and text is an eﬃcient way of trans-\\nmitting information. The diversity of information has led to a diversity of text\\ncategories. When researchers use ChatGPT’s text generation capabilities for\\nresearch, they inevitably choose to generate diﬀerent types of text. In the pro-\\ncess of reading papers, we found that the word count of the text generated by'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 9, 'page_label': '10'}, page_content='researchers increased from small to large, so we wanted to summarize existing\\nresearch based on the size of the text word count. We divided the generated\\ntext into three levels: phrases, sentences, and paragraphs.\\nThe following article uses ChatGPT to generate phrases. Zhang et al. [22]\\nproves that the semantic HAR model with semantic augmentation added dur-\\ning training performs better in motion recognition than other models. Semantic'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 9, 'page_label': '10'}, page_content='augmentation requires shared tokens, which is lacking in some datasets. There-\\nfore, authors leverage ChatGPT for an automated label generation approach\\nfor datasets originally without shared tokens. Fu et al. [23] describes a new\\nworkﬂow for converting natural language commands into Bash commands. The\\nauthor uses ChatGPT to generate a candidate list of Bash commands based\\non user input, and then uses a combination of heuristic and machine learning'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 9, 'page_label': '10'}, page_content='techniques to rank and select the most likely candidates. This workﬂow was\\nevaluated on a real command dataset and achieved high accuracy compared\\nto other state-of-the-art methods. Chen et al. [24] used the Bart model and\\nChatGPT for the task of summarizing humorous titles and compared the perfor-\\nmance of the two models. It was found that the Bart model performed better on\\nlarge datasets, but ChatGPT was competitive with our best ﬁne-tuned model\\nin a small range (48), albeit slightly weaker.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 9, 'page_label': '10'}, page_content='in a small range (48), albeit slightly weaker.\\nThe following article uses ChatGPT to generate sentences.Chen et al. [25]\\nconstructed a dialogue dataset (HPD) with scenes, timelines, character at-\\ntributes, and character relationships in order to use ChatGPT as a conver-\\nsational agent to generate dialogue. However, ChatGPT’s performance on the\\ntest set was poor, and there is room for improvement.In study [26], chatGPT\\ndemonstrated its ability to simplify complex text by providing three ﬁctional'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 9, 'page_label': '10'}, page_content='radiology reports to chatGPT for simpliﬁcation. Most radiologists found the\\nsimpliﬁed reports to be accurate and complete, with no potential harm to pa-\\ntients. However, some errors, omissions of critical medical information and text\\npassages were identiﬁed, which could potentially lead to harmful conclusions if\\nnot understood by the physicians. Xia et al. [27] proposes a new program re-\\npair paradigm called Session-based Automatic Program Repair (APR). In APR,'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 9, 'page_label': '10'}, page_content='the previously generated patches are iteratively built upon by combining them\\n10'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 10, 'page_label': '11'}, page_content='with validation feedback to construct the model’s input. The eﬀectiveness of\\nthe approach is veriﬁed using the QuixBugs dataset. The experiment shows\\nthat ChatGPT ﬁne-tuned with reinforcement learning from human feedback\\n(RLHF) outperforms Codex trained unsupervisedly in both repair datasets. In\\nreference to study [28], ChatGPT was compared to three commercial transla-\\ntion products: Google Translate2, DeepL Translate3, and Tencent TranSmart4.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 10, 'page_label': '11'}, page_content='The evaluation was conducted on the Flores101 test set, using the WMT19\\nbiomedical translation task to test translation robustness, with BLEU score as\\nthe main metric. The study found that ChatGPT is competitive with commer-\\ncial translation products on high-resource European languages but falls behind\\non low-resource or distant languages. The authors explored an interesting strat-\\negy called pivot prompts, which signiﬁcantly improved translation performance.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 10, 'page_label': '11'}, page_content='While ChatGPT did not perform as well as commercial systems on biomedical\\nabstracts or Reddit comments, it may be a good speech translator. Prieto et\\nal. [29] evaluated the use of ChatGPT in developing an automated construction\\nschedule based on natural language prompts. The experiment required building\\nnew partitions in an existing space and providing details on the rooms to be\\npartitioned. The results showed that ChatGPT was able to generate a coher-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 10, 'page_label': '11'}, page_content='ent schedule that followed a logical approach to meet the requirements of the\\ngiven scope. However, there were still several major ﬂaws that would limit the\\nuse of this tool in real-world projects.Michail et al. [30] proposed a method\\nto improve the prediction accuracy of the HeFit ﬁne-tuned XLM T model on\\ntweet intimacy by generating a dataset of tweets with intimacy rating tags using\\nChatGPT. The speciﬁc operation is to input tweets with intimacy rating tags'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 10, 'page_label': '11'}, page_content='into ChatGPT and then output similar tweets.\\nThe following article uses ChatGPT to generate paragraphs. Wang et al.\\n[31] compared the abstract summarization performance of ChatGPT and other\\nmodels on various cross-lingual text datasets and found that ChatGPT may\\nperform worse in metrics such as R 1, R 2, R L, and B S. Yang et al. [32]\\nsummarized the performance of ChatGPT in question answering-based text\\nsummarization and found that, compared to ﬁne-tuned models, ChatGPT’s'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 10, 'page_label': '11'}, page_content='performance is slightly worse in all performance metrics. However, the article\\nsuggests that if the dataset is golden annotation, ChatGPT’s performance may\\nsurpass ﬁne-tuned models in these metrics. Belouadi et al. [33] compared the\\nability of ByGPT5 and ChatGPT trained on a range of labeled and unlabeled\\ndatasets of English and German poetry to generate constrained style poetry, and\\nevaluated them using three metrics: Rhyme, ScoreAlliteration, and ScoreMe-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 10, 'page_label': '11'}, page_content='ter Score. The conclusion is that ByGPT5 performs better than ChatGPT.\\nBlanco-Gonzalez et al. [34] evaluated chatGPT’s ability to write commentary\\narticles, and in fact, this article itself was written by chatGPT. The human au-\\nthor rewrote the manuscript based on chatGPT’s draft. Experts found that it\\ncan quickly generate and optimize text, as well as help users complete multiple\\ntasks. However, in terms of generating new content, it is not ideal. Ultimately,'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 10, 'page_label': '11'}, page_content='it can be said that without strong human intervention, chatGPT is not a use-\\nful tool for writing reliable scientiﬁc texts. It lacks the knowledge and expertise\\nrequired to accurately and fully convey complex scientiﬁc concepts and informa-\\n11'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 11, 'page_label': '12'}, page_content='tion. Khalil et al. [35] on the originality of content generated by ChatGPT. To\\nevaluate the originality of 50 papers on various topics generated by ChatGPT,\\ntwo popular plagiarism detection tools, Turnitin and iThenticate, were used.\\nThe results showed that ChatGPT has great potential in generating complex\\ntext output that is not easily captured by plagiarism detection software. The\\nexisting plagiarism detection software should update their plagiarism detection'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 11, 'page_label': '12'}, page_content='engines. Basic et al. [36] conducted a comparison of the writing performance\\nof students using or not using ChatGPT-3 as a writing aid. The experiment\\nconsisted of two groups of 9 participants each. The control group wrote articles\\nusing traditional methods, while the experimental group used ChatGPT as an\\naid. Two teachers evaluated the papers. The study showed that the assistance of\\nChatGPT did not necessarily improve the quality of the students’ essays.Noever'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 11, 'page_label': '12'}, page_content='et al. [37] discusses the potential of using artiﬁcial intelligence (AI), particularly\\nlanguage models like GPT (including GPT-3), to create more convincing chat-\\nbots that can deceive humans into thinking they are interacting with another\\nperson. The article describes a series of experiments in which they used GPT-3\\nto generate chatbot responses that mimic human-like conversations and were\\ntested on human participants. The results show that some participants were'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 11, 'page_label': '12'}, page_content='unable to distinguish between the chatbot and a real human, highlighting the\\npotential for these AI chatbots to be used for deceptive purposes.\\n2.1.4 Code Generation\\nCode generation refers to the process of automatically generating computer code\\nfrom high-level descriptions or speciﬁcations. ChatGPT’s advanced natural lan-\\nguage processing capabilities make it capable of performing code generation\\ntasks. By analyzing the requirements for code generation, ChatGPT can pro-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 11, 'page_label': '12'}, page_content='duce code snippets that accurately execute the intended functionality. This not\\nonly saves time and eﬀort in writing code from scratch but also reduces the risk\\nof errors that may occur during manual coding. In addition, ChatGPT’s ability\\nto learn and adapt to new programming languages and frameworks enables it\\nto complete more complex programming tasks. For example:\\nMegahed et al. [38] discussed the potential of using ChatGPT for tasks'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 11, 'page_label': '12'}, page_content='such as code explanation, suggesting alternative methods for problem-solving\\nwith code, and translating code between programming languages. The solutions\\nprovided by ChatGPT were found to be viable. In another study, Treude et\\nal. [39] introduced a ChatGPT-based prototype called GPTCOMCARE, which\\nhelps programmers generate multiple solutions for a programming problem and\\nhighlight the diﬀerences between each solution using colors.Sobania et al. [40]'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 11, 'page_label': '12'}, page_content='utilized ChatGPT for code bug ﬁxing, and further improved the success rate of\\nbug ﬁxing by inputting more information through its dialogue system. Speciﬁ-\\ncally, the QuixBugs standard bug ﬁxing benchmark contained 40 code bugs that\\nneeded to be ﬁxed. With limited information, ChatGPT ﬁxed 19 bugs, which\\nwas slightly lower than the 21 bugs ﬁxed by the Codex model, but signiﬁcantly\\nhigher than the 7 ﬁxed by the Standard APR model. When given more prompts'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 11, 'page_label': '12'}, page_content='and information, ChatGPT was able to ﬁx 31 bugs, demonstrating its potential\\n12'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 12, 'page_label': '13'}, page_content='for code bug ﬁxing tasks.Xia et al. [27] proposed a conversational approach\\nfor Automated Program Repair (APR), which alternates between generating\\npatches and validating them against feedback from test cases until the correct\\npatch is generated. Selecting 30 bugs from the QuixBugs standard bug ﬁxing\\nbenchmark, which are suitable for test case feedback, and demonstrating them\\nwith Java and Python, the QuixBugs-Python and QuixBugs-Java datasets were'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 12, 'page_label': '13'}, page_content='obtained. The conversational APR using ChatGPT outperformed the conversa-\\ntional APR using Codex and the conversational APR using CODEGEN (with\\nmodel parameters of 350M, 2B, 6B, and 16B) on both datasets. Furthermore,\\nChatGPT’s conversational APR generated and validated patches with signiﬁ-\\ncantly fewer feedback loops than the other models.\\nChatGPT can not only be used to achieve some simple code generation tasks\\nbut also can be used to accomplish some complex programming tasks. Noever'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 12, 'page_label': '13'}, page_content='et al. [41] tested ChatGPT’s code generation capabilities on four datasets -\\nIris, Titanic, Boston Housing, and Faker. When prompted to mimic a Python\\ninterpreter in the form of a Jupyter notebook, the model was able to generate\\nindependent code based on the prompt and respond with the expected out-\\nput. For example, when given the prompt ”data.cor()” for the Iris dataset,\\nChatGPT generated correct Python output. The test results indicate that'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 12, 'page_label': '13'}, page_content='ChatGPT can access structured datasets and perform basic software opera-\\ntions required by databases, such as create, read, update, and delete (CRUD).\\nThis suggests that cutting-edge language models like ChatGPT have the nec-\\nessary scale to tackle complex problems. McKee et al. [42] utilized ChatGPT\\nas an experimental platform to investigate cybersecurity issues. They modeled\\nﬁve diﬀerent modes of computer virus properties, including self-replication, self-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 12, 'page_label': '13'}, page_content='modiﬁcation, execution, evasion, and application, using ChatGPT. These ﬁve\\nmodes encompassed thirteen encoding tasks from credential access to defense\\nevasion within the MITRE ATT&CK framework. The results showed that the\\nquality of ChatGPT’s generated code was generally above average, except for the\\nself-replication mode, where it performed poorly.They [43] also employed Chat-\\nGPT as a network honeypot to defend against attackers. By having ChatGPT'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 12, 'page_label': '13'}, page_content='mimic Linux, Mac, and Windows terminal commands and providing interfaces\\nfor TeamViewer, nmap, and ping, a dynamic environment can be created to\\nadapt to attackers’ operations, and logs can be used to gain insight into their\\nattack methods, tactics, and procedures. The authors demonstrated ten hon-\\neypot tasks to illustrate that ChatGPT’s interface not only provides suﬃcient\\nAPI memory to execute previous commands without defaulting to repetitive'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 12, 'page_label': '13'}, page_content='introductory tasks but also oﬀers a responsive welcome program that maintains\\nattackers’ interest in multiple queries.\\nIn the ﬁeld of code generation, there are still several challenges with Chat-\\nGPT. Firstly, its application scope is limited as its training data is biased to-\\nwards programming languages such as Python, C++, and Java, making it po-\\ntentially unsuitable for some programming languages or coding styles. Secondly,'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 12, 'page_label': '13'}, page_content='manual optimization is necessary for code formatting, as the generated code may\\nnot be performance-optimized or follow best coding practices, requiring manual\\nediting and optimization. Lastly, the quality of the generated code cannot be\\n13'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 13, 'page_label': '14'}, page_content='guaranteed, as it heavily relies on the quality of the natural language input,\\nwhich may contain errors, ambiguities, or inconsistencies, ultimately aﬀecting\\nthe accuracy and reliability of the generated code.\\n2.1.5 Inference\\nInference refers to the process of drawing new conclusions or information through\\nlogical deduction from known facts or information. It is typically based on a se-\\nries of premises or assumptions, and involves applying logical rules or reasoning'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 13, 'page_label': '14'}, page_content='methods to arrive at a conclusion. Inference is an important ability in human\\nthinking, and is often used to solve problems, make decisions, analyze and eval-\\nuate information, etc. Inference also plays a key role in ﬁelds such as science,\\nphilosophy, law, etc. There are two types of inference: inductive reasoning,\\nwhich involves deriving general rules or conclusions from known facts or expe-\\nriences, and deductive reasoning, which involves deriving speciﬁc conclusions'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 13, 'page_label': '14'}, page_content='from known premises or assumptions. Whether inductive or deductive, the pro-\\ncess of inference requires following strict logical rules to ensure the correctness\\nand reliability of the inference.\\nSome papers attempt to use ChatGPT’s ability in inductive reasoning to\\ncapture the meaning in text and use deﬁned metrics to score the text. Michail\\net al. [30] uses ChatGPT to infer intimacy expressed in tweets. They ﬁrst input\\n50 tweets with intimacy markers to ChatGPT, then use inductive reasoning to'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 13, 'page_label': '14'}, page_content='infer the standards for generating tweets with diﬀerent levels of intimacy, and\\nﬁnally generate ten tweets with intimacy values ranging from 0 to 5. Susnjak\\net al. [44] collected a large amount of textual data from patient-doctor dis-\\ncussion forums, patient testimonials, social media platforms, medical journals,\\nand other scientiﬁc research publications. Using the BERT model, the author\\ninferred emotion values from 0 to 1. The author visualized the process of how'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 13, 'page_label': '14'}, page_content='the presence of bias in the discourse surrounding chronic manifestations of the\\ndisease using the SHAP tool. The author also envisioned ChatGPT as a replace-\\nment for the BERT model for scoring the emotional value of text. Huang et al.\\n[21] chose 12.5% of individuals in the potential hate dataset as study materials,\\ninduced ChatGPT to make classiﬁcations based on a prompt, and ChatGPT\\nproduced three classiﬁcations: unclear, yes, and no. The author assigned a'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 13, 'page_label': '14'}, page_content='value of 1 to yes, -1 to no, and 0 to unclear, and had ChatGPT score and clas-\\nsify them. ChatGPT was able to correctly classify 80% of implicit hate tweets\\nin the author’s experimental setup, demonstrating ChatGPT’s great potential\\nas a data labeling tool using simple prompts.\\nSome papers have evaluated ChatGPT’s reasoning performance, mainly in\\ndecision-making and spatial reasoning, and identifying ambiguity. Tang et al.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 13, 'page_label': '14'}, page_content='[45] used the independence axiom and the transitivity axiom, as well as other\\nnon-VNM related decision-making abilities, by presenting bets conditioned on\\nrandom events, bets with asymmetric outcomes, decisions encapsulating Sav-\\nage’s Sure Thing principle, and other complex bet structures like nested bets, to\\ndesign experiments where each experiment input a short prompt to ChatGPT\\nand evaluated the results. The conclusion is that ChatGPT exhibits uncer-\\n14'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 14, 'page_label': '15'}, page_content='tainty in the decision-making process: in some cases, large language models\\ncan arrive at the correct answer through incorrect reasoning; and it may make\\nsuboptimal decisions for simple reasoning problems. Ortega-Martn et al. [46]\\nhad ChatGPT detect three diﬀerent levels of language ambiguity and evaluated\\nits performance. The conclusion is that In semantics, ChatGPT performed per-\\nfectly in the detection of ambiguities. Apart from that, it has some bright sports'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 14, 'page_label': '15'}, page_content='(co-reference resolution) and some weaknesses (puts gender bias over grammar\\nin some non-ambiguous situations). In the generation task ChatGPT did well,\\nbut also revealed some of its worse issues: the lack of systematicity. Lastly, it\\nshould also be pointed that in most of the cases ChatGPT brilliantly alludes to\\nlack of context as the key factor in disambiguation.\\n2.1.6 Data or information extraction, transformation, enhancement,\\nprocessing\\nData Visualization'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 14, 'page_label': '15'}, page_content='processing\\nData Visualization\\nNatural language interfaces have contributed to generating visualizations\\ndirectly from natural language, but visualization problems remain challenging\\ndue to the ambiguity of natural language.ChatGPT provides a new avenue for\\nthe ﬁeld by converting natural language into visualized code.\\nIn terms of data visualization, Noever et al. [41] tested ChatGPT’s ba-\\nsic arithmetic skills by asking questions.On the iris dataset, Titanic survival'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 14, 'page_label': '15'}, page_content='dataset, Boston housing data, and randomly generated insurance claims dataset,\\nthe statistical analysis of data and visualization problems were converted to\\nprogramming problems using Jupyter to verify ChatGPT’s ability to generate\\npython code to draw suitable graphs and analyze the data. The results show\\nthat ChatGPT can access structured and organized datasets to perform the\\nfour basic software operations required for databases: create, read, update, and'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 14, 'page_label': '15'}, page_content='delete, and generate suitable python code to plot graphs for descriptive statis-\\ntics, variable correlation analysis, describing trends, and other data analysis\\noperations.Maddigan et al. [47] proposed an end-to-end solution for visualizing\\ndata in natural language using LLM, which uses an open-source python frame-\\nwork designed to generate appropriate hints for selected datasets to make LLM\\nmore eﬀective in understanding natural language, and uses internal reasoning'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 14, 'page_label': '15'}, page_content='capabilities to select the appropriate visualization type to generate the code for\\nvisualization. In this paper,the reseachers compare the visualization results of\\nGPT-3, Codex and ChatGPT in the case of nvBench SQLite database [48] and\\nthe visualization results of energy production dataset in the study of ADVISor\\nwith NL4DV [49, 50].In addition to, they explore the ability to reason and hy-\\npothesize of the LLM on movie dataset [48] when the hints are insuﬃcient or'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 14, 'page_label': '15'}, page_content='wrong .Experimental results show that LLM can eﬀectively support the end-to-\\nend generation of visualization results from natural language when supported\\nby hints, providing an eﬃcient, reliable and accurate solution to the natural\\nlanguage visualization problem.\\n15'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 15, 'page_label': '16'}, page_content='Information Extraction\\nThe goal of information extraction is to extract speciﬁc information from\\nnatural language text for structured representation, including three important\\nsubtasks such as entity relationship extraction, named entity recognition, and\\nevent extraction, which have wide applications in business, medical, and other\\nﬁelds.\\nIn information extraction, Wei et al. [51] proposed ChatIE, a ChatGPT-\\nbased multi-round question-and-answer framework for information extraction.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 15, 'page_label': '16'}, page_content='The framework decomposes a complex information extraction (IE) task into\\nseveral parts, then combines the results of each round into a ﬁnal structured\\nresult. The entity association triple extraction, named entity recognition, and\\nevent extraction tasks were performed on six datasets NYT11-HRL, DuIE2.0\\n, conllpp, MSR , DuEE1.0 [52, 53, 54, 55, 56], and ACE05 in both languages,\\ncomparing three metrics of precision, recall, and F1 score.These results sug-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 15, 'page_label': '16'}, page_content='gest that on six widely used IE datasets, ChatIE improves performance by an\\naverage of 18.98% compared to the original ChatGPT without ChatIE, and out-\\nperforms the supervised models FCM and MultiR [57, 58] on the NYT11-HRL\\ndataset.While the original ChatGPT cannot solve complex IE problems with\\noriginal task instructions, and with this framework, successfully IE tasks were\\nimplemented on six datasets.Gao et al. [59] explored the feasibility and chal-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 15, 'page_label': '16'}, page_content='lenges of ChatGPT for event extraction on the ACE2005 corpus, evaluating the\\nperformance of ChatGPT in long-tail and complex scenarios (texts containing\\nmultiple events) and comparing it with two task-speciﬁc models, Text2Event\\nand EEQA [60, 61].Then,they explored the impact of diﬀerent cues on perfor-\\nmance of ChatGPT. The results show that the average performance of Chat-\\nGPT in long-tail and complex scenarios is only 51.04% of that of task-speciﬁc'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 15, 'page_label': '16'}, page_content='models such as EEQA. Continuous reﬁnement of cues does not lead to consis-\\ntent performance improvements, and ChatGPT is highly sensitive to diﬀerent\\ncue styles.Tang et al. [62] proposed a new training paradigm that incorporates\\nappropriate cues to guide ChatGPT to generate a variety of examples with dif-\\nferent sentence structures and language patterns and eliminate the resulting\\nlow-quality or duplicate samples for downstream tasks. Although compared to'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 15, 'page_label': '16'}, page_content='a soft model for a speciﬁc healthcare tasks, ChatGPT underperforms in Named\\nEntity Recognition (NER) and Relationship Extraction (RE) tasks , in the Gene\\nAssociation Database (GAD) Release; EU-ADR corpus for the RE task , the\\ninnovative training framework was able to train local models, with F1 scores im-\\nproving from 23.37% to 63.99% for the named entity recognition task and from\\n75%, while alleviating privacy concerns and time-consuming data collection and'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 15, 'page_label': '16'}, page_content='annotation problems.He et al. [63] proposed a contextual learning framework\\nICL- D3IE. this framework introduces formatted presentation, continuously it-\\nerates to update and improve the presentation, and then combines ChatGPT\\nfor text information extraction. In the paper, ICL-D3IE is compared with ex-\\nisting pre-trained models such as LiLT,BROS (in-distribution (ID) setting and\\nout-of-distribution (OOD) setting) on datasets (FUNSD, CORD, and SROIE'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 15, 'page_label': '16'}, page_content='[64, 65, 66]).These results show that the ICL-D3IE method in all datasets and\\nsettings except for the ID setting on CORD are superior to other methods,\\n16'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 16, 'page_label': '17'}, page_content='with ICL-D3IE (GPT-3) F1 scores reaching 90.32% on FUNSD and97.88% on\\nSROIE; in the out-of-distribution (OOD) setting, ICL-D3IE performs much\\nbetter than previous pre-trained methods on all datasets.Polak et al. [67] pro-\\nposed ChatExtract method - consisting of a set of engineering prompts applied\\nto a conversational LLM - for automatic data extraction. In the experiment,\\nthey extracted a large number of sentences from hundreds of papers and ran-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 16, 'page_label': '17'}, page_content='domly selected 100 sentences containing data and 100 sentences without data\\nas test data. The results show that the accuracy and recall of LLM exceeded\\n90% and may be comparable to human accuracy in many cases; in addition to\\nthis, the experiments were conducted under the condition of removing follow-up\\nprompts and not keeping the conversation compared to previous experiments,\\nrespectively. The accuracy of deleting follow-up questions dropped to 80.2%'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 16, 'page_label': '17'}, page_content='and the recall rate dropped to 88.0%. Removing the conversational aspect and\\nrelated information retention recall and accuracy dropped to 90.0% and 56.6%,\\nrespectively, demonstrating the eﬀect of information retention combined with\\npurposeful redundancy on LLM information extraction performance.\\nQuality Assessment\\nFor translation quality, text generation quality, manual assessment is usually\\neﬀective but suﬀers from subjectivity and time-consuming, etc. It was found'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 16, 'page_label': '17'}, page_content='through exploration that ChatGPT has also achieved signiﬁcant performance\\nin automatic quality assessment.\\nIn terms of quality assessment,Kocmi et al. [68] proposed a GPT-based\\ntranslation quality assessment metric, GEMBA, which evaluates the transla-\\ntion of each fragment individually and then averages all the obtained scores to\\nobtain a ﬁnal system-level score. In the MQM2022 test set (English-German,\\nEnglish-Russian, and Chinese-English) [69], a scoring task was performed with'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 16, 'page_label': '17'}, page_content='a classiﬁcation task to compare the accuracy [70] and kendall tau scores [71] of\\nseven GPT models under four cue templates.The results showed that GEMBA\\nhad the highest system-level accuracy of 88.0% compared to more than 10 au-\\ntomatic metrics such as BLEU, and among the seven GPT models, ChatGPT\\naccuracy is above 80%, in addition to, the best performance can be obtained in\\nthe least constrained template, demonstrating the potential of LLM for trans-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 16, 'page_label': '17'}, page_content='lation quality assessment tasks, but the evaluation is only applicable at the\\nsystem level and needs further improvement.Wang et al. [72] used ChatGPT\\nas a natural language generation (NLG) evaluator to study the correlation with\\nhuman judgment. On three datasets covering diﬀerent NLG tasks, task- and\\naspect-speciﬁc cues were designed to guide ChatGPT for NLG evaluation in\\nCNN/DM [73], OpenMEVA-ROC, and BAGEL for summary, story generation,'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 16, 'page_label': '17'}, page_content='and data-to-text scoring, respectively. Then,they compute Spearman coeﬃ-\\ncients [74],Pearson correlation coeﬃcients [75]. Kendall’s Tau score [76] to as-\\nsess the correlation with human evaluations.The results show that ChatGPT is\\nhighly correlated with human judgments in all aspects, with correlation coeﬃ-\\ncients of 0.4 or more in all categories, showing its potential as an NLG indicator.\\n17'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 17, 'page_label': '18'}, page_content='Data Augmentation\\nIn natural language processing, text data augmentation is an eﬀective mea-\\nsure to alleviate the problem of low data quantity and low quality training data,\\nand ChatGPT has shown great potential in this regard.\\nIn terms of data augmentation, Dai et al. [77] proposed a ChatGPT-based\\ntext data augmentation method that reformulates each sentence in the train-\\ning sample into multiple conceptually similar but semantically diﬀerent samples'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 17, 'page_label': '18'}, page_content='for classiﬁcation tasks downstream of the Bert model.On text transcriptions\\nand PubMed 20k datasets containing more than 8 hours of audio data of com-\\nmon medical symptom descriptions,experiments were conducted to compare co-\\nsine similarity and TransRate metrics with multiple data enhancement methods\\n[9].This paper shows that compared with existing data enhancement methods,\\nthe proposed ChatAug method shows a double-digit improvement in sentence'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 17, 'page_label': '18'}, page_content='classiﬁcation accuracy and generates more diverse augmented samples while\\nmaintaining its accuracy, but the original model is not ﬁne-tuned in the pa-\\nper and suﬀers from a lack of domain knowledge, which may produce incorrect\\naugmented data.\\nMultimodal fusion\\nChatGPT can currently only process natural language directly, but with a\\ncross-modal encoder, it can combine natural language with cross-modal pro-\\ncessing to provide solutions for intelligent transportation, healthcare, and other\\nﬁelds.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 17, 'page_label': '18'}, page_content='ﬁelds.\\nIn terms of multimodal data processing, Wu et al. [78] constructed a frame-\\nwork that Visual ChatGPT integrates with diﬀerent Visual Foundation Models\\n(VFMs) and then combines a series of hints to input visual information to Chat-\\nGPT to solve visual problems.The paper shows examples of visual tasks such\\nas removing or replacing certain objects from images, interconversion between\\nimages and text, demonstrating the Visual ChatGPT has great potential and'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 17, 'page_label': '18'}, page_content='capability for diﬀerent tasks.But there are issues during the task that require\\na large number of hints to convert VFMs to language, invoke multiple VFMs\\nto solve complex problems leading to limited real-time capability, and security\\nand privacy issues. Zheng et al. [79] showed a text mining example of LLM\\nfor extracting self-driving car crash data from California crash news, analyz-\\ning a failure report example, and generating a crash report example based on'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 17, 'page_label': '18'}, page_content='keywords; introduced a use case concept of a smartphone-based framework for\\nautomatic LLM failure report generation, which absorbs multiple data sources\\ncaptured by cell phone sensors and then transfers the data to a language space\\nfor text mining, inference and generation, and further outputs the key informa-\\ntion needed to form a comprehensive fault report, demonstrating the potential\\nof LLM for a variety of transportation tasks.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 17, 'page_label': '18'}, page_content='of LLM for a variety of transportation tasks.\\nNowadays, ChatGPT shows a wide range of applications in data visualiza-\\ntion, information extraction, data enhancement, quality assessment, and multi-\\nmodal data processing.But there are also issues on how to further utilize hints\\nto eﬀectively interact with ChatGPT, lack of ability to process and analyze data\\n18'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 18, 'page_label': '19'}, page_content='from devices such as sensors, and data privacy and security.\\nCueing Techniques\\nCue engineering provides important support for eﬀective dialogue with large\\nlanguage models.White et al. [80] proposed a framework for cueing models\\napplicable to diﬀerent domains. This framework structures cues to interact\\nwith LLMs by providing speciﬁc rules and guidelines. Also, this paper presents\\na catalog of cueing patterns that have been applied to LLM interactions, as well'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 18, 'page_label': '19'}, page_content='as speciﬁc examples with and without cues. The advantages of the combinability\\nof prompting patterns are demonstrated, allowing users to interact with LLM\\nmore eﬀectively, but patterns for reusable solutions and new ways to use LLM\\nneed to be continuously explored.\\n2.1.7 Human-ChatGPT Collaboration\\nCollaboration between humans and machines is a process where humans and\\nmachines work together to achieve a common goal. In such collaboration, hu-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 18, 'page_label': '19'}, page_content='mans provide domain expertise, creativity, and decision-making abilities, while\\nmachines provide automation, scalability, and computing power. ChatGPT is\\nan advanced natural language processing model that can understand and gen-\\nerate human-like language, thereby reducing communication costs. Its ability\\nto process and generate natural language makes it an ideal partner for human\\ncollaboration. ChatGPT can oﬀer relevant suggestions, complete tasks based'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 18, 'page_label': '19'}, page_content='on human input, and enhance human productivity and creativity. It can learn\\nfrom human feedback and adapt to new tasks and domains, further improv-\\ning its performance in human-machine collaboration. ChatGPT’s capability to\\ncomprehend natural language and produce appropriate responses makes it a\\nvaluable tool for various collaboration applications, as demonstrated by several\\nstudies in the literature we have gathered.\\nAhmad et al. [81] proposed a method for human-machine collaboration us-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 18, 'page_label': '19'}, page_content='ing ChatGPT to create software architecture. This method transforms software\\nstories (created by software architects based on application scenarios) into feasi-\\nble software architecture diagrams through continuous interaction between the\\nsoftware architect and ChatGPT. During the evaluation stage, ChatGPT uses\\nthe Software Architecture Analysis Method (SAAM) to evaluate each compo-\\nnent in the software architecture and generate evaluation reports. This method'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 18, 'page_label': '19'}, page_content='eﬃciently utilizes the knowledge and supervision of the architect with the ca-\\npabilities of ChatGPT to collaboratively build software-intensive systems and\\nservices. Lanzi et al. [82] proposed a collaborative design framework that com-\\nbines interactive evolution and ChatGPT to simulate typical human design pro-\\ncesses. Humans collaborate with large language models (such as ChatGPT) to\\nrecombine and transform ideas, and use genetic algorithms to iterate through'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 18, 'page_label': '19'}, page_content='complex creative tasks. The results of three game design tasks showed that the\\nframework received positive feedback from game designers. The framework has\\ngood reusability and can be applied to any design task that can be described in\\nfree text form.\\n19'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 19, 'page_label': '20'}, page_content='In the future, ChatGPT’s ability to understand nonverbal cues such as tone\\nof voice and body language can be enhanced, enabling it to better understand\\nhuman thoughts and interact with people more eﬀectively.\\n2.1.8 ChatGPT Integration\\nIntegration refers to combining diﬀerent systems or software components to\\nachieve a common goal. ChatGPT can be integrated as a part of a whole or\\nact as an integration tool to enable seamless communication between diﬀerent'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 19, 'page_label': '20'}, page_content='systems. Its natural language processing ability makes it easier for non-technical\\nusers to interact with systems, reducing the need for specialized knowledge or\\ntraining. Some studies in the literature we collected have already demonstrated\\nthis.\\nTreude et al. [39] integrated ChatGPT into the prototype of ”GPTCOM-\\nCARE” to address programming query problems. This integration allowed for\\nthe generation of multiple source code solutions for the same query, which in-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 19, 'page_label': '20'}, page_content='creased the eﬃciency of software development. The results of their study demon-\\nstrated the eﬀectiveness of using ChatGPT to improve the quality and diversity\\nof code solutions, ultimately reducing the amount of time and eﬀort required for\\nsoftware development.Wang et al. [83] proposed the chatCAD method, which\\nutilizes large language models (LLMs) such as ChatGPT to enhance the out-\\nput of multiple CAD networks for medical images, including diagnosis, lesion'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 19, 'page_label': '20'}, page_content='segmentation, and report generation networks. The method generates sugges-\\ntions in the form of a chat dialogue. The authors tested the eﬀectiveness of\\nthe method on a randomly selected set of 300 cases from the MIMIC-CXR\\ndataset, which included 50 cases each of cardiomegaly, edema, consolidation,\\natelectasis, pleural eﬀusion, and no ﬁndings. Compared to CvT2DistilGPT2\\nand R2GenCMN, chatCAD showed signiﬁcant advantages in RC and F1, while\\nonly performing weaker than R2GenCMN in PR.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 19, 'page_label': '20'}, page_content='only performing weaker than R2GenCMN in PR.\\nIntegrating ChatGPT into applications will still present challenges. Firstly,\\nChatGPT’s performance may be aﬀected by language barriers or diﬀerences\\nin terminology between diﬀerent systems. Additionally, ChatGPT’s responses\\nare not always deterministic, which poses a challenge when integrating with\\nsystems that require precise and reproducible results. Finally, the processing'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 19, 'page_label': '20'}, page_content='time of ChatGPT is slow for integration tasks involving time-sensitive data such\\nas traﬃc, which is a limitation in time-critical environments.\\n2.2 AI Ethics\\nSince the advent of ChatGPT, this powerful natural language processing model\\nhas not only brought great convenience to people but also triggered more crisis-\\naware thinking. Some researchers have started to hypothesize and study the\\npotential negative impacts of ChatGPT. This proactive research provides good'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 19, 'page_label': '20'}, page_content='proposals for standardized construction to address future AI abuse issues.\\nRegarding the evaluation of ChatGPT’s own political and ethical tendencies,\\nHartmann et al. [84] used Wahl-O-Mat, one of the most commonly used voting\\n20'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 20, 'page_label': '21'}, page_content='advice applications in the world, to show ChatGPT political statements from\\ndiﬀerent parties, forcing it to make choices of agree, disagree, or neutral. The\\nresults indicated that ChatGPT has a pro-environment, left-wing liberal ide-\\nology, which was also conﬁrmed in the nation-state agnostic political compass\\ntest. Another study (referenced as [85]) examined ChatGPT’s moral standards\\nby repeatedly asking it diﬀerent versions of the trolley problem, and found that'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 20, 'page_label': '21'}, page_content='ChatGPT gave answers with diﬀerent moral orientations, lacking a ﬁrm moral\\nstance. A subsequent test also found that ChatGPT’s lack of consistency could\\naﬀect people’s moral judgments. Additionally, Borji et al. [86] demonstrated\\nChatGPT’s inconsistency in reasoning, factual errors, mathematics, coding, and\\nbias across eleven related aspects. These ﬁndings highlight ChatGPT’s inher-\\nent traits and limitations, and people should be aware of their potential impact'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 20, 'page_label': '21'}, page_content='when seeking advice from ChatGPT.\\nRegarding relevant policies and regulations, Hacker et al. [87] discussed the\\nnature and rules of large generative AI models, including ChatGPT, which are\\nrapidly changing the way we communicate, explain, and create. The author\\nsuggested that diﬀerent stakeholders in the value chain should take regulatory\\nresponsibility and deploy four strategies to tailor more comprehensive laws for'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 20, 'page_label': '21'}, page_content='the beneﬁt of society. Another study (referenced as [88]) criticized the European\\nCommission’s proposal on AI responsibility and suggested revising the proposed\\nAI responsibility framework to ensure eﬀective compensation while promoting\\ninnovation, legal certainty, and sustainable AI regulation. A policy framework\\nwas proposed (referenced as [89]) to customize LLMs, such as ChatGPT, in a so-\\ncially acceptable and safe manner, emphasizing the need to align large language'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 20, 'page_label': '21'}, page_content='models (LLMs) with human preferences.\\nThe political and ethical tendencies of ChatGPT could inﬂuence users’ be-\\nhavior and decision-making to some extent. However, some studies have con-\\nducted in-depth research on the use of norms and limitations, which could enable\\nhumans to use ChatGPT more reasonably and safely.\\n2.3 Evaluation\\n2.3.1 Comparison of ChatGPT with existing popular models\\nWe use publicly available datasets to comprehensively evaluate the strengths'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 20, 'page_label': '21'}, page_content='and limitations of ChatGPT. Reference [90] evaluates the technical performance\\nof ChatGPT in multitask, multilingual, and multimodal aspects based on 23\\nstandard public datasets and newly designed multimodal datasets, including\\neight diﬀerent common natural language processing application tasks. The ex-\\nperimental results show that, in terms of multitasking, ChatGPT outperforms\\nvarious state-of-the-art zero-shot learning large language models in most tasks,'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 20, 'page_label': '21'}, page_content='and even outperforms ﬁne-tuned task-speciﬁc models in some individual tasks.\\nIn terms of multilingualism, we found that ChatGPT cannot be applied to low-\\nresource languages because it cannot understand the language and generate\\ntranslations for that language. In terms of multimodality, ChatGPT’s ability is\\nstill basic compared to specialized language-visual models.\\n21'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 21, 'page_label': '22'}, page_content='In terms of stability, reference [91] concludes that ChatGPT’s performance is\\nalways lower than SOTA, the current state-of-the-art model, in almost all tasks.\\nThis means that as a general model, ChatGPT has never reached the level of the\\nbest existing models. Experimental data shows that the average quality of the\\nSOTA model is 73.7%, while the average quality of the ChatGPT model is only\\n56.5%. At the same time, ChatGPT’s stability is poor: the standard deviation'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 21, 'page_label': '22'}, page_content='of its performance is 23.3%, while the SOTA model’s standard deviation is\\nonly 16.7%. This non-deterministic behavior exhibited by ChatGPT could be\\na serious drawback in some problems.\\nSimilarly, Qin et al. [92] conducted a comprehensive evaluation of whether\\nChatGPT is a qualiﬁed general natural language processing task solver. The ex-\\nperiment analyzed ChatGPT’s zero-shot learning ability based on 20 commonly\\nused public datasets covering 7 representative task categories. Below, we will'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 21, 'page_label': '22'}, page_content='analyze ChatGPT’s performance on each task:\\nIn terms of reasoning tasks, ChatGPT performs average on mathematical\\nsymbol, commonsense causal, and logical reasoning tasks, but performs well\\nin arithmetic reasoning [92]. That is to say, ChatGPT’s abilities vary among\\ndiﬀerent types of reasoning tasks. In terms of logical reasoning, ChatGPT’s\\ndeductive and abductive reasoning are superior to inductive reasoning, while\\nin other reasoning tasks, such as analogy, causal and commonsense reasoning,'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 21, 'page_label': '22'}, page_content='ChatGPT performs well [90].\\nIn terms of sentiment analysis task, ChatGPT performs similarly to GPT-3.5\\nand bert-style models [92, 93]. However, according to literature [91], ChatGPT\\nhas losses not exceeding 25% on most tasks, except for three relatively sub-\\njective emotion perception tasks where it performs poorly. If we remove these\\ntasks to calculate the average quality of the two models, we ﬁnd that the SOTA\\nmethod has an average quality of 80%, while the ChatGPT method has an av-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 21, 'page_label': '22'}, page_content='erage quality of 69.7%. That is to say, ChatGPT performs well on all tasks\\nexcept for emotion-related tasks, and can handle most of the problems we con-\\nsider. However, overall, its performance is lower than the SOTA model based\\non experimental data, but the diﬀerence between the two is not very large.\\nIn other tasks, according to literature [92], ChatGPT performs well in nat-\\nural language inference, i.e., the task of inferring sentence relationships, and its'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 21, 'page_label': '22'}, page_content='performance on this task is signiﬁcantly better than all bert-style models [93].\\nHowever, while ChatGPT performs well on inference tasks, it may produce some\\nself-contradictory or unreasonable responses, which is its potential limitation.\\nIn question-answering, dialogue, and summarization tasks, ChatGPT performs\\nbetter than the GPT-3.5 model [92], especially in the question-answering task,\\nwhere its performance is comparable to bert-style models [93]. Therefore, we'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 21, 'page_label': '22'}, page_content='have demonstrated that ChatGPT is a qualiﬁed general-purpose model.\\nHowever, ChatGPT also has limitations in many aspects. Firstly, it lacks the\\nability to handle non-textual semantic reasoning tasks such as mathematical,\\ntemporal, and spatial reasoning, and it performs poorly in multi-hop reasoning\\n[90]. Secondly, ChatGPT is not good at solving named entity recognition tasks\\n[92]. Furthermore, ChatGPT performs poorly in handling tasks involving nega-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 21, 'page_label': '22'}, page_content='tive connotations and neutral similarity [93]. Finally, these conclusions indicate\\n22'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 22, 'page_label': '23'}, page_content='that, like other large pre-trained language models, ChatGPT has limitations in\\ncompleting complex reasoning tasks.\\nIn summary, ChatGPT’s zero-shot performance is comparable to ﬁne-tuned\\nbert and GPT-3.5 models, and with the help of advanced prompting strategies,\\nChatGPT can demonstrate better comprehension abilities. However, it still\\ncannot outperform the current SOTA models.\\n2.3.2 The possibility of using ChatGPT for plagiarism and cheating'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 22, 'page_label': '23'}, page_content='In response to the possibility of ChatGPT being used for plagiarism and cheat-\\ning, Zhou et al. [94] reﬂected on the current state of development of artiﬁcial in-\\ntelligence like ChatGPT. As ChatGPT becomes increasingly easy to obtain and\\nscalable in text generation, there is a high likelihood that these technologies will\\nbe used for plagiarism, including scientiﬁc literature and news sources, posing\\na great threat to the credibility of various forms of news media and academic'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 22, 'page_label': '23'}, page_content='articles. Some scholars are concerned that the end of paper as a meaningful\\nevaluation tool may be approaching [95, 96], as ChatGPT can easily generate\\npersuasive paragraphs, chapters, and papers on any given topic. Additionally,\\nit will exacerbate plagiarism issues in many ﬁelds such as education, medicine,\\nand law [10], and may be used for cheating in academic exams [97]. Deﬁnitional\\nrecognition technology is a relatively eﬀective method for detecting plagiarism,'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 22, 'page_label': '23'}, page_content='and the deﬁnitional typology proposed in [94] can alleviate people’s concerns by\\nbeing used to construct new datasets. Susnjak [97] proposed a solution to the\\npossibility of large language models like ChatGPT being used for exam cheat-\\ning: guiding ChatGPT to generate some critical thinking problems through\\nquestioning, then providing answers and critically evaluating them. Analysis of\\nChatGPT shows that it exhibits critical thinking, can generate highly realistic'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 22, 'page_label': '23'}, page_content='text in terms of accuracy, relevance, depth, breadth, logic, persuasiveness, and\\noriginality. Therefore, educators must be aware of the possibility of ChatGPT\\nbeing used for exam cheating and take measures to combat cheating behavior\\nto ensure the fairness of online exams.\\n2.3.3 Feedback from ChatGPT users\\nIn response to feedback from ChatGPT users, Haque et al. [98] conducted a\\nmixed-methods study using 10,732 early ChatGPT user tweets. The authors'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 22, 'page_label': '23'}, page_content='extracted Twitter data using Python and Twitter API and constructed the\\nChatGPTTweet dataset, which contains 18k tweets. For each tweet, the au-\\nthors collected information on text content, user location, occupation, veriﬁca-\\ntion status, date of publication, and tags. Based on this dataset, the authors\\nstudied the characteristics of early ChatGPT users, discussion topics related to\\nChatGPT on Twitter, and the sentiment of Twitter users toward ChatGPT.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 22, 'page_label': '23'}, page_content='For RQ1, the authors found that early ChatGPT users had a diverse and wide\\nrange of occupational backgrounds and geographical locations. For RQ2, the\\nauthors identiﬁed nine topics related to ChatGPT, including its impact on soft-\\nware development, entertainment and creativity, natural language processing,\\n23'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 23, 'page_label': '24'}, page_content='education, chatbot intelligence, business development, search engines, question-\\nanswering tests, and future careers and opportunities. For RQ3, most early\\nusers expressed positive sentiment toward topics such as software development\\nand creativity, while only a few expressed concern about the potential misuse\\nof ChatGPT.\\n2.3.4 Adverse eﬀects of ChatGPT on users\\nRegarding the negative eﬀects of ChatGPT on users, Luan et al. [99] studied'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 23, 'page_label': '24'}, page_content='the psychological principles of ChatGPT, delved into the factors that attract\\nusers’ attention, and revealed the impact of these factors on future learning.\\nIn the post-pandemic era, teachers and students are both facing uncertainty\\nin the teaching process and job pressures. Under these common constraints of\\neducation and employment, educators and students must re-evaluate current\\neducational methods and outcomes, as well as students’ future career devel-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 23, 'page_label': '24'}, page_content='opment. Through question-and-answer exchanges with ChatGPT, people can\\neasily obtain appropriate solutions or key information, thereby enhancing their\\nmotivation, eliminating anxiety in learning, improving interest, and achieving\\npsychological satisfaction. Subhash et al. [100] explored whether large language\\nmodels have the ability to reverse user preferences. With the development of\\npre-trained large language models, people are increasingly concerned about the'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 23, 'page_label': '24'}, page_content='ability of these models to inﬂuence, persuade, and potentially manipulate user\\npreferences in extreme cases. Therefore, the literature [100] roughly qualita-\\ntively analyzed that adversarial behavior does lead to potential changes in user\\npreferences and behaviors in dialogue systems. If we want to further quanti-\\ntatively analyze the ability of large language models in this regard, additional\\nstatistical summary techniques need to be used for future research.\\n3 Discussion\\n3.1 Limitations'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 23, 'page_label': '24'}, page_content='3 Discussion\\n3.1 Limitations\\nDespite the remarkable capabilities of ChatGPT and GPT-4, it still faces certain\\nlimitations. Some of these limitations include:\\nOutdated Knowledge\\nThe current models are trained on historical data (up to 2021), thereby\\nlacking real-time comprehension of current aﬀairs. This is a critical concern\\nin today’s information-explosion era, as the reliability of prior knowledge bases\\nprogressively diminishes, potentially yielding inaccurate responses, especially in'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 23, 'page_label': '24'}, page_content='rapidly evolving domains such as jurisprudence and technology. Additionally,\\nthese models are incapable of fact-checking while the training data is composed\\nof content from various sources, some of which may be unreliable, which may\\nresult in seemingly plausible yet nonsensical responses.\\n24'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 24, 'page_label': '25'}, page_content='Insuﬃcient Understanding\\nWhile these models can interpret the majority of inquiries and contextual\\nsituations, they occasionally encounter comprehension biases when addressing\\nambiguous or contextually complex queries. Furthermore, in certain specialized\\nﬁelds, the abundance of unique abbreviation exacerbates the models’ under-\\nstanding challenges, resulting in incorrect and vacuous responses.\\nEnergy Consumption\\nThroughout the training and inference stages, these large-scale models re-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 24, 'page_label': '25'}, page_content='quire signiﬁcant computational resources and electrical power, resulting in ele-\\nvated energy consumption and signiﬁcant carbon emissions. Consequently, this\\nrestricts their deployment and practical applications.\\nMalicious Usage\\nDespite OpenAI implementing a series of restrictions to mitigate model tox-\\nicity, instances of users evading these constraints through meticulously designed\\nprompts have emerged, inducing the model to produce unhealthy content or'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 24, 'page_label': '25'}, page_content='even using it for illicit commercial purposes.\\nBias and Discrimination\\nDue to the inﬂuence of pre-training data, the models exhibit biases in polit-\\nical, ideological, and other areas. The application of LLMs in public domains,\\nsuch as education and publicity, should be approached with extreme caution.\\nPrivacy and Data Security\\nConcurrent with the expansion of users, protecting user privacy and data\\nsecurity becomes increasingly important. In fact, ChatGPT was banned in'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 24, 'page_label': '25'}, page_content='Italy in early April due to privacy concerns. This is particularly relevant given\\nthe models’ extensive collection of personal information and preferences during\\ninteractions, and as future multimodal models, such as GPT-4, may frequently\\nrequire users to upload private photos.\\n3.2 Future Directions\\nIn forthcoming research, the development of models based on ChatGPT and\\nGPT-4 may focus on addressing these limitations to enhance their practical\\napplications.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 24, 'page_label': '25'}, page_content='applications.\\nPrimarily, researchers should continue to work on reﬁning model training\\nmethodologies while ﬁltering pre-training data to minimize the presence of mis-\\nleading information in the model’s knowledge base, thereby obtaining accurate\\nresponses. Concurrently, it is crucial to emphasize training approaches that\\neconomize computational resources, thereby mitigating costs and broadening\\npotential application scenarios.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 24, 'page_label': '25'}, page_content='potential application scenarios.\\nMoreover, the advancements in context-awareness and disambiguation tech-\\nnologies are anticipated to facilitate enhanced comprehension of complex queries\\n25'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 25, 'page_label': '26'}, page_content='by models, improving the accuracy, relevance, and context-awareness of AI-\\ngenerated content. Integrating real-time data streams can also keep these mod-\\nels in sync with current events and trends, enabling them to provide up-to-date\\ninformation such as live traﬃc, weather, and stock updates.\\nAdditionally, developers should engage in interdisciplinary collaboration with\\nspecialists from diverse domains, including policy-making, jurisprudence, and'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 25, 'page_label': '26'}, page_content='sociology, with the objective of formulating standard and ethical frameworks\\nfor LLM development, deployment, and utilization, thereby alleviating poten-\\ntial harmful consequences. In terms of public awareness and education, manda-\\ntory awareness training should be implemented prior to large-scale public de-\\nployment and application to increase public awareness of LLM capabilities and\\nlimitations while promoting responsible and informed utilization, especially in'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 25, 'page_label': '26'}, page_content='industries such as K-12 education and journalism.\\nFinally, the inﬂuence of ChatGPT and GPT-4 should not be limited to\\njust the NLP ﬁeld. They also show promising prospects in the areas of com-\\nputer vision, brain-inspired AI, and robotics. These models exhibit a capacity\\nfor learning and comprehension comparable with human-level intelligence, po-\\nsitioning them as a pivotal component in the development of artiﬁcial general'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 25, 'page_label': '26'}, page_content='intelligence (AGI)[101]. Their ability to facilitate seamless interactions between\\nhumans and robots paves the way for the execution of more complex tasks.\\nThe remarkable capacity of zero-shot in-context learning of these models en-\\nables quick adaptation to new tasks without the requirement for labeled data\\nfor ﬁne-tuning, which is a critical challenge in ﬁelds like medical informatics[102]\\nand robotics[103] where the availability of labeled data is commonly limited or\\nnon-existent.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 25, 'page_label': '26'}, page_content='non-existent.\\n4 Conclusion\\nThis review paper provides a comprehensive survey of ChatGPT and GPT-4,\\nhighlighting their potential applications and signiﬁcant contributions to the ﬁeld\\nof natural language processing. The ﬁndings of this study reveal that the interest\\nin these models is growing rapidly, and they have shown considerable potential\\nfor application across a wide range of domains. One key factor contributing\\nto the success of ChatGPT and GPT-4 is their ability to perform large-scale'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 25, 'page_label': '26'}, page_content='pre-training, which captures knowledge from the vast expanse of the internet,\\nallowing the models to learn from a massive amount of data. The integration of\\nReinforcement Learning from Human Feedback (RLHF) has further enhanced\\nthe model’s adaptability and performance, making it highly eﬃcient in process-\\ning natural language. This study has also identiﬁed several potential ethical\\nconcerns related to the development and use of ChatGPT and GPT-4. For in-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 25, 'page_label': '26'}, page_content='stance, there are concerns about the generation of biased or harmful content,\\nprivacy violations, and the potential for misuse of the technology. It is crucial\\nto address these concerns and ensure that ChatGPT and GPT-4 are developed\\nand used in a responsible and ethical manner. Furthermore, the results of this\\nstudy demonstrate that there is signiﬁcant potential for ChatGPT and GPT-4\\n26'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 26, 'page_label': '27'}, page_content='to be applied in a range of domains, including education, history, mathematics,\\nphysics, and more. These models can facilitate tasks such as generating sum-\\nmaries, answering questions, and providing personalized recommendations to\\nusers. Overall, the insights presented in this review paper can serve as a useful\\nguide for researchers and practitioners looking to advance the ﬁeld of natural\\nlanguage processing. Future research in this ﬁeld should focus on addressing'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 26, 'page_label': '27'}, page_content='ethical concerns, exploring new applications, and ensuring the responsible use\\nof ChatGPT and GPT-4. The potential of these models to revolutionize natural\\nlanguage processing is enormous, and we look forward to seeing more develop-\\nments in this ﬁeld.\\nReferences\\n[1] Radford A, Narasimhan K, Salimans T, Sutskever I, et al. Improving\\nlanguage understanding by generative pre-training. OpenAI. 2018.\\n[2] Radford A, Wu J, Child R, Luan D, Amodei D, Sutskever I, et al.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 26, 'page_label': '27'}, page_content='Language models are unsupervised multitask learners. OpenAI blog.\\n2019;1(8):9.\\n[3] Radford A, Wu J, Amodei D, Amodei D, Clark J, Brundage M, et al. Bet-\\nter language models and their implications. OpenAI Blog https://openai\\ncom/blog/better-language-models. 2019;1(2).\\n[4] Brown T, Mann B, Ryder N, Subbiah M, Kaplan JD, Dhariwal P, et al.\\nLanguage models are few-shot learners. Advances in neural information\\nprocessing systems. 2020;33:1877-901.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 26, 'page_label': '27'}, page_content='processing systems. 2020;33:1877-901.\\n[5] Ouyang L, Wu J, Jiang X, Almeida D, Wainwright CL, Mishkin P, et al.\\nTraining language models to follow instructions with human feedback.\\narXiv preprint arXiv:220302155. 2022.\\n[6] Christiano PF, Leike J, Brown T, Martic M, Legg S, Amodei D. Deep\\nreinforcement learning from human preferences. Advances in neural infor-\\nmation processing systems. 2017;30.\\n[7] Frieder S, Pinchetti L, Griﬃths RR, Salvatori T, Lukasiewicz T, Pe-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 26, 'page_label': '27'}, page_content='tersen PC, et al. Mathematical capabilities of chatgpt. arXiv preprint\\narXiv:230113867. 2023.\\n[8] Pardos ZA, Bhandari S. Learning gain diﬀerences between ChatGPT and\\nhuman tutor generated algebra hints. arXiv preprint arXiv:230206871.\\n2023.\\n[9] Shakarian P, Koyyalamudi A, Ngu N, Mareedu L. An Independent Eval-\\nuation of ChatGPT on Mathematical Word Problems (MWP). arXiv\\npreprint arXiv:230213814. 2023.\\n27'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 27, 'page_label': '28'}, page_content='[10] Lehnert K. AI Insights into Theoretical Physics and the Swampland Pro-\\ngram: A Journey Through the Cosmos with ChatGPT. arXiv preprint\\narXiv:230108155. 2023.\\n[11] Kortemeyer G. Could an Artiﬁcial-Intelligence agent pass an introductory\\nphysics course? arXiv preprint arXiv:230112127. 2023.\\n[12] West CG. AI and the FCI: Can ChatGPT Project an Understanding of\\nIntroductory Physics? arXiv preprint arXiv:230301067. 2023.\\n[13] Nov O, Singh N, Mann DM. Putting ChatGPT’s Medical Advice to the'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 27, 'page_label': '28'}, page_content='(Turing) Test. medRxiv. 2023.\\n[14] Tu R, Ma C, Zhang C. Causal-Discovery Performance of ChatGPT in the\\ncontext of Neuropathic Pain Diagnosis. arXiv preprint arXiv:230113819.\\n2023.\\n[15] Glymour C, Zhang K, Spirtes P. Review of Causal Discovery Methods\\nBased on Graphical Models. Frontiers in Genetics. 2019.\\n[16] Guo S, Wang Y, Li S, Saeed N. Semantic Communications with Ordered\\nImportance using ChatGPT. arXiv preprint arXiv:230207142. 2023.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 27, 'page_label': '28'}, page_content='[17] Wang S, Scells H, Koopman B, Zuccon G. Can chatgpt write a good\\nboolean query for systematic review literature search? arXiv preprint\\narXiv:230203495. 2023.\\n[18] Kuzman T, Mozetic I, Ljubeˇ sic N. ChatGPT: Beginning of an End of\\nManual Linguistic Data Annotation? Use Case of Automatic Genre Iden-\\ntiﬁcation. arXiv e-prints. 2023:arXiv-2303.\\n[19] Amin MM, Cambria E, Schuller BW. Will Aﬀective Computing Emerge\\nfrom Foundation Models and General AI? A First Evaluation on Chat-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 27, 'page_label': '28'}, page_content='GPT. arXiv preprint arXiv:230303186. 2023.\\n[20] Zhang B, Ding D, Jing L. How would Stance Detection Techniques Evolve\\nafter the Launch of ChatGPT? arXiv preprint arXiv:221214548. 2022.\\n[21] Huang F, Kwak H, An J. Is ChatGPT better than Human Annota-\\ntors? Potential and Limitations of ChatGPT in Explaining Implicit Hate\\nSpeech. arXiv preprint arXiv:230207736. 2023.\\n[22] Zhang X, Chowdhury RR, Hong D, Gupta RK, Shang J. Model-\\ning Label Semantics Improves Activity Recognition. arXiv preprint'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 27, 'page_label': '28'}, page_content='arXiv:230103462. 2023.\\n[23] Fu Q, Teng Z, Georgaklis M, White J, Schmidt DC. NL2CMD: An Up-\\ndated Workﬂow for Natural Language to Bash Commands Translation.\\narXiv preprint arXiv:230207845. 2023.\\n28'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 28, 'page_label': '29'}, page_content='[24] Chen Y, Eger S. Transformers go for the LOLs: Generating (humourous)\\ntitles from scientiﬁc abstracts end-to-end. arXiv preprint arXiv:221210522.\\n2022.\\n[25] Chen N, Wang Y, Jiang H, Cai D, Chen Z, Li J. What would Harry\\nsay? Building Dialogue Agents for Characters in a Story. arXiv preprint\\narXiv:221106869. 2022.\\n[26] Jeblick K, Schachtner B, Dexl J, Mittermeier A, St¨ uber AT, Topalis J,\\net al. ChatGPT Makes Medicine Easy to Swallow: An Exploratory Case'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 28, 'page_label': '29'}, page_content='Study on Simpliﬁed Radiology Reports. arXiv preprint arXiv:221214882.\\n2022.\\n[27] Xia CS, Zhang L. Conversational automated program repair. arXiv\\npreprint arXiv:230113246. 2023.\\n[28] Jiao W, ZhaopengTu WJtX. Is ChatGPT A Good Translator? Yes With\\nGPT-4 As The Engine.\\n[29] Prieto SA, Mengiste ET, de Soto BG. Investigating the Use of\\nChatGPT for the Scheduling of Construction Projects. Buildings.\\n2023 mar;13(4):857. Available from: https://doi.org/10.3390%\\n2Fbuildings13040857.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 28, 'page_label': '29'}, page_content='2Fbuildings13040857.\\n[30] Michail A, Konstantinou S, Clematide S. UZH CLyp at SemEval-2023\\nTask 9: Head-First Fine-Tuning and ChatGPT Data Generation for\\nCross-Lingual Learning in Tweet Intimacy Prediction. arXiv preprint\\narXiv:230301194. 2023.\\n[31] Wang J, Liang Y, Meng F, Li Z, Qu J, Zhou J. Cross-Lingual Summa-\\nrization via ChatGPT. arXiv preprint arXiv:230214229. 2023.\\n[32] Yang X, Li Y, Zhang X, Chen H, Cheng W. Exploring the limits of'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 28, 'page_label': '29'}, page_content='chatgpt for query or aspect-based text summarization. arXiv preprint\\narXiv:230208081. 2023.\\n[33] Belouadi J, Eger S. ByGPT5: End-to-End Style-conditioned Po-\\netry Generation with Token-free Language Models. arXiv preprint\\narXiv:221210474. 2022.\\n[34] Blanco-Gonzalez A, Cabezon A, Seco-Gonzalez A, Conde-Torres D,\\nAntelo-Riveiro P, Pineiro A, et al. The Role of AI in Drug Discovery: Chal-\\nlenges, Opportunities, and Strategies. arXiv preprint arXiv:221208104.\\n2022.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 28, 'page_label': '29'}, page_content='2022.\\n[35] Khalil M, Er E. Will ChatGPT get you caught? Rethinking of plagiarism\\ndetection. arXiv preprint arXiv:230204335. 2023.\\n[36] Basic Z, Banovac A, Kruzic I, Jerkovic I. Better by you, better than\\nme, chatgpt3 as writing assistance in students essays. arXiv preprint\\narXiv:230204536. 2023.\\n29'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 29, 'page_label': '30'}, page_content='[37] Noever D, Ciolino M. The Turing Deception. arXiv preprint\\narXiv:221206721. 2022.\\n[38] Megahed FM, Chen YJ, Ferris JA, Knoth S, Jones-Farmer LA. How\\nGenerative AI models such as ChatGPT can be (Mis) Used in SPC Prac-\\ntice, Education, and Research? An Exploratory Study. arXiv preprint\\narXiv:230210916. 2023.\\n[39] Treude C. Navigating Complexity in Software Engineering: A Prototype\\nfor Comparing GPT-n Solutions. arXiv preprint arXiv:230112169. 2023.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 29, 'page_label': '30'}, page_content='[40] Sobania D, Briesch M, Hanna C, Petke J. An analysis of the automatic\\nbug ﬁxing performance of chatgpt. arXiv preprint arXiv:230108653. 2023.\\n[41] Noever D, McKee F. Numeracy from Literacy: Data Science as an Emer-\\ngent Skill from Large Language Models. arXiv preprint arXiv:230113382.\\n2023.\\n[42] McKee F, Noever D. Chatbots in a Botnet World. arXiv preprint\\narXiv:221211126. 2022.\\n[43] McKee F, Noever D. Chatbots in a Honeypot World. arXiv preprint\\narXiv:230103771. 2023.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 29, 'page_label': '30'}, page_content='arXiv:230103771. 2023.\\n[44] Susnjak T. Applying BERT and ChatGPT for Sentiment Analysis of\\nLyme Disease in Scientiﬁc Literature. arXiv preprint arXiv:230206474.\\n2023.\\n[45] Tang Z, Kejriwal M. A Pilot Evaluation of ChatGPT and DALL-E 2 on\\nDecision Making and Spatial Reasoning. arXiv preprint arXiv:230209068.\\n2023.\\n[46] Ortega-Mart´ ın M, Garc´ ıa-Sierra´O, Ardoiz A, ´Alvarez J, Armenteros JC,\\nAlonso A. Linguistic ambiguity analysis in ChatGPT. arXiv preprint\\narXiv:230206426. 2023.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 29, 'page_label': '30'}, page_content='arXiv:230206426. 2023.\\n[47] Maddigan P, Susnjak T. Chat2vis: Generating data visualisations via\\nnatural language using chatgpt, codex and gpt-3 large language models.\\narXiv preprint arXiv:230202094. 2023.\\n[48] Luo Y, Tang J, Li G. nvBench: A Large-Scale Synthesized Dataset for\\nCross-Domain Natural Language to Visualization Task. arXiv preprint\\narXiv:211212926. 2021.\\n[49] Liu C, Han Y, Jiang R, Yuan X. Advisor: Automatic visualization answer'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 29, 'page_label': '30'}, page_content='for natural-language question on tabular data. In: 2021 IEEE 14th Paciﬁc\\nVisualization Symposium (PaciﬁcVis). IEEE; 2021. p. 11-20.\\n30'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 30, 'page_label': '31'}, page_content='[50] Narechania A, Srinivasan A, Stasko J. NL4DV: A toolkit for generat-\\ning analytic speciﬁcations for data visualization from natural language\\nqueries. IEEE Transactions on Visualization and Computer Graphics.\\n2020;27(2):369-79.\\n[51] Wei X, Cui X, Cheng N, Wang X, Zhang X, Huang S, et al. Zero-\\nShot Information Extraction via Chatting with ChatGPT. arXiv preprint\\narXiv:230210205. 2023.\\n[52] Takanobu R, Zhang T, Liu J, Huang M. A hierarchical framework for'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 30, 'page_label': '31'}, page_content='relation extraction with reinforcement learning. In: Proceedings of the\\nAAAI conference on artiﬁcial intelligence. vol. 33; 2019. p. 7072-9.\\n[53] Li S, He W, Shi Y, Jiang W, Liang H, Jiang Y, et al. Duie: A large-\\nscale chinese dataset for information extraction. In: Natural Language\\nProcessing and Chinese Computing: 8th CCF International Conference,\\nNLPCC 2019, Dunhuang, China, October 9–14, 2019, Proceedings, Part\\nII 8. Springer; 2019. p. 791-800.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 30, 'page_label': '31'}, page_content='II 8. Springer; 2019. p. 791-800.\\n[54] Wang Z, Shang J, Liu L, Lu L, Liu J, Han J. Crossweigh: Training named\\nentity tagger from imperfect annotations. arXiv preprint arXiv:190901441.\\n2019.\\n[55] Levow GA. The third international Chinese language processing bakeoﬀ:\\nWord segmentation and named entity recognition. In: Proceedings of the\\nFifth SIGHAN workshop on Chinese language processing; 2006. p. 108-17.\\n[56] Li X, Li F, Pan L, Chen Y, Peng W, Wang Q, et al. DuEE: a large-scale'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 30, 'page_label': '31'}, page_content='dataset for Chinese event extraction in real-world scenarios. In: Natu-\\nral Language Processing and Chinese Computing: 9th CCF International\\nConference, NLPCC 2020, Zhengzhou, China, October 14–18, 2020, Pro-\\nceedings, Part II 9. Springer; 2020. p. 534-45.\\n[57] Gormley MR, Yu M, Dredze M. Improved relation extraction with feature-\\nrich compositional embedding models. arXiv preprint arXiv:150502419.\\n2015.\\n[58] Hoﬀmann R, Zhang C, Ling X, Zettlemoyer L, Weld DS. Knowledge-based'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 30, 'page_label': '31'}, page_content='weak supervision for information extraction of overlapping relations. In:\\nProceedings of the 49th annual meeting of the association for computa-\\ntional linguistics: human language technologies; 2011. p. 541-50.\\n[59] Gao J, Zhao H, Yu C, Xu R. Exploring the feasibility of ChatGPT for\\nevent extraction. arXiv preprint arXiv:230303836. 2023.\\n[60] Lu Y, Lin H, Xu J, Han X, Tang J, Li A, et al. Text2event: Controllable\\nsequence-to-structure generation for end-to-end event extraction. arXiv'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 30, 'page_label': '31'}, page_content='preprint arXiv:210609232. 2021.\\n31'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 31, 'page_label': '32'}, page_content='[61] Du X, Cardie C. Event extraction by answering (almost) natural ques-\\ntions. arXiv preprint arXiv:200413625. 2020.\\n[62] Tang R, Han X, Jiang X, Hu X. Does Synthetic Data Generation of LLMs\\nHelp Clinical Text Mining? arXiv preprint arXiv:230304360. 2023.\\n[63] He J, Wang L, Hu Y, Liu N, Liu H, Xu X, et al. ICL-D3IE: In-Context\\nLearning with Diverse Demonstrations Updating for Document Informa-\\ntion Extraction. arXiv preprint arXiv:230305063. 2023.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 31, 'page_label': '32'}, page_content='[64] Jaume G, Ekenel HK, Thiran JP. Funsd: A dataset for form under-\\nstanding in noisy scanned documents. In: 2019 International Conference\\non Document Analysis and Recognition Workshops (ICDARW). vol. 2.\\nIEEE; 2019. p. 1-6.\\n[65] Park S, Shin S, Lee B, Lee J, Surh J, Seo M, et al. CORD: a consoli-\\ndated receipt dataset for post-OCR parsing. In: Workshop on Document\\nIntelligence at NeurIPS 2019; 2019. .\\n[66] Huang Z, Chen K, He J, Bai X, Karatzas D, Lu S, et al. Icdar2019 com-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 31, 'page_label': '32'}, page_content='petition on scanned receipt ocr and information extraction. In: 2019 In-\\nternational Conference on Document Analysis and Recognition (ICDAR).\\nIEEE; 2019. p. 1516-20.\\n[67] Polak MP, Morgan D. Extracting Accurate Materials Data from Research\\nPapers with Conversational Language Models and Prompt Engineering–\\nExample of ChatGPT. arXiv preprint arXiv:230305352. 2023.\\n[68] Kocmi T, Federmann C. Large language models are state-of-the-art eval-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 31, 'page_label': '32'}, page_content='uators of translation quality. arXiv preprint arXiv:230214520. 2023.\\n[69] Freitag M, Rei R, Mathur N, Lo Ck, Stewart C, Avramidis E, et al.\\nResults of WMT22 metrics shared task: Stop using BLEU–neural metrics\\nare better and more robust. In: Proceedings of the Seventh Conference\\non Machine Translation (WMT); 2022. p. 46-68.\\n[70] Kocmi T, Federmann C, Grundkiewicz R, Junczys-Dowmunt M, Mat-\\nsushita H, Menezes A. To ship or not to ship: An extensive eval-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 31, 'page_label': '32'}, page_content='uation of automatic metrics for machine translation. arXiv preprint\\narXiv:210710821. 2021.\\n[71] Freitag M, Rei R, Mathur N, Lo Ck, Stewart C, Avramidis E, et al.\\nResults of WMT22 metrics shared task: Stop using BLEU–neural metrics\\nare better and more robust. In: Proceedings of the Seventh Conference\\non Machine Translation (WMT); 2022. p. 46-68.\\n[72] Wang J, Liang Y, Meng F, Shi H, Li Z, Xu J, et al. Is chatgpt a good nlg\\nevaluator? a preliminary study. arXiv preprint arXiv:230304048. 2023.\\n32'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 32, 'page_label': '33'}, page_content='[73] Hermann KM, Kocisky T, Grefenstette E, Espeholt L, Kay W, Suleyman\\nM, et al. Teaching machines to read and comprehend. Advances in neural\\ninformation processing systems. 2015;28.\\n[74] Zar JH. Spearman rank correlation. Encyclopedia of biostatistics. 2005;7.\\n[75] Mukaka MM. A guide to appropriate use of correlation coeﬃcient in\\nmedical research. Malawi medical journal. 2012;24(3):69-71.\\n[76] Kendall MG. A new measure of rank correlation. Biometrika.\\n1938;30(1/2):81-93.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 32, 'page_label': '33'}, page_content='1938;30(1/2):81-93.\\n[77] Dai H, Liu Z, Liao W, Huang X, Wu Z, Zhao L, et al. ChatAug: Leveraging\\nChatGPT for Text Data Augmentation. arXiv preprint arXiv:230213007.\\n2023.\\n[78] Wu C, Yin S, Qi W, Wang X, Tang Z, Duan N. Visual chatgpt: Talk-\\ning, drawing and editing with visual foundation models. arXiv preprint\\narXiv:230304671. 2023.\\n[79] Zheng O, Abdel-Aty M, Wang D, Wang Z, Ding S. ChatGPT is on the\\nhorizon: Could a large language model be all we need for Intelligent Trans-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 32, 'page_label': '33'}, page_content='portation? arXiv preprint arXiv:230305382. 2023.\\n[80] White J, Fu Q, Hays S, Sandborn M, Olea C, Gilbert H, et al. A prompt\\npattern catalog to enhance prompt engineering with chatgpt. arXiv\\npreprint arXiv:230211382. 2023.\\n[81] Ahmad A, Waseem M, Liang P, Fehmideh M, Aktar MS, Mikkonen T.\\nTowards Human-Bot Collaborative Software Architecting with ChatGPT.\\narXiv preprint arXiv:230214600. 2023.\\n[82] Lanzi PL, Loiacono D. ChatGPT and Other Large Language Models as'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 32, 'page_label': '33'}, page_content='Evolutionary Engines for Online Interactive Collaborative Game Design.\\narXiv preprint arXiv:230302155. 2023.\\n[83] Wang S, Zhao Z, Ouyang X, Wang Q, Shen D. Chatcad: Interactive\\ncomputer-aided diagnosis on medical image using large language models.\\narXiv preprint arXiv:230207257. 2023.\\n[84] Hartmann J, Schwenzow J, Witte M. The political ideology of conver-\\nsational AI: Converging evidence on ChatGPT’s pro-environmental, left-\\nlibertarian orientation. arXiv preprint arXiv:230101768. 2023.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 32, 'page_label': '33'}, page_content='[85] Kr¨ ugel S, Ostermaier A, Uhl M. The moral authority of ChatGPT. arXiv\\npreprint arXiv:230107098. 2023.\\n[86] Borji A. A categorical archive of chatgpt failures. arXiv preprint\\narXiv:230203494. 2023.\\n33'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 33, 'page_label': '34'}, page_content='[87] Hacker P, Engel A, Mauer M. Regulating chatgpt and other large gener-\\native ai models. arXiv preprint arXiv:230202337. 2023.\\n[88] Hacker P. The European AI Liability Directives–Critique of a Half-\\nHearted Approach and Lessons for the Future. arXiv preprint\\narXiv:221113960. 2022.\\n[89] Kirk HR, Vidgen B, R¨ ottger P, Hale SA. Personalisation within bounds:\\nA risk taxonomy and policy framework for the alignment of large language'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 33, 'page_label': '34'}, page_content='models with personalised feedback. arXiv preprint arXiv:230305453. 2023.\\n[90] Bang Y, Cahyawijaya S, Lee N, Dai W, Su D, Wilie B, et al. A multitask,\\nmultilingual, multimodal evaluation of chatgpt on reasoning, hallucina-\\ntion, and interactivity. arXiv preprint arXiv:230204023. 2023.\\n[91] Koco´ n J, Cichecki I, Kaszyca O, Kochanek M, Szyd lo D, Baran J,\\net al. ChatGPT: Jack of all trades, master of none. arXiv preprint\\narXiv:230210724. 2023.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 33, 'page_label': '34'}, page_content='arXiv:230210724. 2023.\\n[92] Qin C, Zhang A, Zhang Z, Chen J, Yasunaga M, Yang D. Is chatgpt a\\ngeneral-purpose natural language processing task solver? arXiv preprint\\narXiv:230206476. 2023.\\n[93] Zhong Q, Ding L, Liu J, Du B, Tao D. Can chatgpt understand too?\\na comparative study on chatgpt and ﬁne-tuned bert. arXiv preprint\\narXiv:230210198. 2023.\\n[94] Zhou C, Qiu C, Acuna DE. Paraphrase Identiﬁcation with Deep Learning:\\nA Review of Datasets and Methods. arXiv preprint arXiv:221206933.\\n2022.'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 33, 'page_label': '34'}, page_content='2022.\\n[95] de Winter J. Can ChatGPT Pass High School Exams on English Language\\nComprehension? 2023.\\n[96] Yeadon W, Inyang OO, Mizouri A, Peach A, Testrow C. The Death of the\\nShort-Form Physics Essay in the Coming AI Revolution. arXiv preprint\\narXiv:221211661. 2022.\\n[97] Susnjak T. ChatGPT: The End of Online Exam Integrity? arXiv preprint\\narXiv:221209292. 2022.\\n[98] Haque MU, Dharmadasa I, Sworna ZT, Rajapakse RN, Ahmad H.\\n” I think this is the most disruptive technology”: Exploring Senti-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 33, 'page_label': '34'}, page_content='ments of ChatGPT Early Adopters using Twitter Data. arXiv preprint\\narXiv:221205856. 2022.\\n[99] Luan L, Lin X, Li W. Exploring the Cognitive Dynamics of Artiﬁcial\\nIntelligence in the Post-COVID-19 and Learning 3.0 Era: A Case Study\\nof ChatGPT. arXiv preprint arXiv:230204818. 2023.\\n34'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 34, 'page_label': '35'}, page_content='[100] Subhash V. Can Large Language Models Change User Preference Adver-\\nsarially? arXiv preprint arXiv:230210291. 2023.\\n[101] Zhao L, Zhang L, Wu Z, Chen Y, Dai H, Yu X, et al. When Brain-inspired\\nAI Meets AGI. arXiv preprint arXiv:230315935. 2023.\\n[102] Liu Z, Yu X, Zhang L, Wu Z, Cao C, Dai H, et al. DeID-GPT:\\nZero-shot Medical Text De-Identiﬁcation by GPT-4. arXiv preprint\\narXiv:230311032. 2023.\\n[103] Liu D, Chen Y, Wu Z. Digital Twin (DT)-CycleGAN: Enabling Zero-'), Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-04-05T00:33:07+00:00', 'author': '', 'keywords': '', 'moddate': '2023-04-05T00:33:07+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': './data/Summary of ChatGPTGPT-4 Research.pdf', 'total_pages': 35, 'page': 34, 'page_label': '35'}, page_content='Shot Sim-to-Real Transfer of Visual Grasping Models. IEEE Robotics\\nand Automation Letters. 2023.\\n35')]\n",
      "Based on the provided context, ChatGPT is commonly used for question and answering in the education field. Specifically, it mentions that users can use ChatGPT to:\n",
      "\n",
      "1. Learn different academic subjects such as physics, mathematics, and chemistry\n",
      "2. Compare answers \n",
      "3. Verify answers\n",
      "\n",
      "So while the context doesn't provide specific locations or platforms, it suggests that ChatGPT can be used in educational settings and for studying purposes related to various academic subjects. It allows users to interactively ask questions and get answers to assist with their learning.\n"
     ]
    }
   ],
   "execution_count": 31
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-05-29T09:27:10.523142447Z",
     "start_time": "2026-05-29T09:27:08.378022399Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 출처 페이지 정보와 답변 함께 반환\n",
    "def format_docs_with_source(docs):\n",
    "    result = []\n",
    "    for doc in docs:\n",
    "        src = doc.metadata.get(\"source\",\"알 수 없음\")\n",
    "        page = doc.metadata.get(\"page\",\"?\")\n",
    "        result.append(f\"[출처 : {src} p.{page}\\n{doc.page_content}]\")\n",
    "    return \"\\n\\n\".join(result)\n",
    "\n",
    "\n",
    "rag_chain = {\n",
    "    \"context\": retriever | format_docs_with_source,\n",
    "    \"question\" : RunnablePassthrough()\n",
    "} | rag_prompt | watson_llm | StrOutputParser()\n",
    "\n",
    "rag_with_source = RunnableParallel(answer=rag_chain, sources=retriever)\n",
    "\n",
    "result = rag_with_source.invoke(\"where can i use ChatGPT?\")\n",
    "print(\"=== 답변 ===\")\n",
    "print(result['answer'])\n",
    "print(\"=== 출처 ===\")\n",
    "for doc in result['sources']:\n",
    "    print(f\"  - {doc.metadata.get('source')} p.{doc.metadata.get('page','')}\")"
   ],
   "id": "133c31b557d1fe0f",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "=== 답변 ===\n",
      "According to the context provided, ChatGPT is commonly used for question and answering testing in the education sector. The text specifically mentions:\n",
      "\n",
      "\"ChatGPT is commonly used for question and answers testing in the education sector. Users can use ChatGPT to learn, compare and verify answers for different academic subjects such as physics, mathematics, and chemistry.\"\n",
      "\n",
      "So based on this context, ChatGPT can be used in educational settings to assist with learning, comparing answers, and verifying responses for subjects like physics, math and chemistry. However, the context is limited and ChatGPT likely has other applications mentioned elsewhere not provided here. In general, ChatGPT could potentially be used wherever natural language conversations and text generation are useful, like in customer service, content creation, research assistance and more. But I don't have enough information from the excerpts to say for certain what other domains or use cases it may be applied to outside of education.\n",
      "=== 출처 ===\n",
      "  - ./data/Summary of ChatGPTGPT-4 Research.pdf p.3\n",
      "  - ./data/Summary of ChatGPTGPT-4 Research.pdf p.3\n",
      "  - ./data/Summary of ChatGPTGPT-4 Research.pdf p.3\n"
     ]
    }
   ],
   "execution_count": 32
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv (3.12.3)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}