[실습] Nori Analyzer를 활용해 한글(korean)이 제대로 검색되게 만들기

author

JSCODE 박재성

✅ Nori Analyzer를 활용해 한글(korean)이 제대로 검색되게 만들기

기존 컨테이너 종료하기

Nori Analyzer를 사용하려면 플러그인을 설치해야 한다. 플러그인을 설치한 채로 Docker 컨테이너를 다시 띄우기 위해 기존에 실행하던 컨테이너를 종료한다.


# 실행되고 있는 컨테이너 확인
$ docker ps

# compose.yml 파일이 있는 경로에서 아래 명령어 입력
$ docker compose down 

# 잘 종료됐나 확인
$ docker ps

파일 작성하기

Dockerfile


FROM docker.elastic.co/elasticsearch/elasticsearch:8.17.4 

# Nori Analyzer 플러그인 설치
RUN bin/elasticsearch-plugin install analysis-nori

compose.yml


services:
  elastic:
    image: docker.elastic.co/elasticsearch/elasticsearch:8.17.4 # 8.17.4 버전
    build:
      context: .
      dockerfile: Dockerfile 
    ports:
      - 9200:9200 # 9200번 포트에서 Elasticsearch 실행
    environment:
      # 아래 설정은 개발/테스트 환경에서 간단하게 테스트하기 위한 옵션 (운영 환경에서는 설정하면 안 됨)
      - discovery.type=single-node # 단일 노드 (지금은 알 필요 없음)
      - xpack.security.enabled=false # 보안 설정
      - xpack.security.http.ssl.enabled=false # 보안 설정
  kibana:
    image: docker.elastic.co/kibana/kibana:8.17.4 # 8.17.4 버전
    ports:
      - 5601:5601 # 5601번 포트에서 kibana 실행
    environment:
      - ELASTICSEARCH_HOSTS=http://elastic:9200 # kibana에게 통신할 Elasticsearch 주소 알려주기

컨테이너 띄우기


$ docker compose up -d

# 잘 실행됐나 확인
$ docker ps

Analyze API 활용해 디버깅해보기


// 방법 1
GET /_analyze
{
  "text": "백화점에서 쇼핑을 하다가 친구를 만났다.",
  "analyzer": "nori"
}

// 방법 2 (nori analyzer의 구성을 직접 명시)
GET /_analyze
{
  "text": "백화점에서 쇼핑을 하다가 친구를 만났다.",
  "char_filter": [], 
	"tokenizer": "nori_tokenizer", 
	"filter": ["nori_part_of_speech", "nori_readingform", "lowercase"]
}

nori_part_of_speech : 의미 없는 조사(을, 의 등), 접속사 등을 제거

nori_readingform : 한자를 한글로 바꿔서 토큰으로 저장

응답값


{
  "tokens": [
    {
      "token": "백화",
      "start_offset": 0,
      "end_offset": 2,
      "type": "word",
      "position": 0
    },
    {
      "token": "점",
      "start_offset": 2,
      "end_offset": 3,
      "type": "word",
      "position": 1
    },
    {
      "token": "쇼핑",
      "start_offset": 6,
      "end_offset": 8,
      "type": "word",
      "position": 3
    },
    {
      "token": "하",
      "start_offset": 10,
      "end_offset": 11,
      "type": "word",
      "position": 5
    },
    {
      "token": "친구",
      "start_offset": 14,
      "end_offset": 16,
      "type": "word",
      "position": 7
    },
    {
      "token": "만나",
      "start_offset": 18,
      "end_offset": 20,
      "type": "word",
      "position": 9
    }
  ]
}

응답값을 확인해보니 이전에 standard analyze를 썼을 때보다 훨씬 한글의 의미 단위로 토큰이 잘 나뉘어졌다. 그럼 검색을 했을 때도 잘 나오는 지 확인해보자.

인덱스 생성하기


// 기존 인덱스 삭제
DELETE /boards

// 인덱스 생성 + 매핑 정의 + Custom Analyzer 적용
PUT /boards
{
  "settings": {
    "analysis": {
      "analyzer": {
        "boards_content_analyzer": {
          "char_filter": [],
          "tokenizer": "nori_tokenizer",
          "filter": ["nori_part_of_speech", "nori_readingform", "lowercase"]
        }
      }
    }
  },
  "mappings": {
	  "properties": {
	    "content": {
	      "type": "text",
	      "analyzer": "boards_content_analyzer"
	    }
	  }
	}
}

// 잘 생성됐는 지 확인
GET /boards

데이터 삽입하기


POST /boards/_doc
{
  "content": "백화점에서 쇼핑을 하다가 친구를 만났다."
}

검색해보기


GET /boards/_search
{
  "query": {
    "match": {
      "content": "백화점"
    }
  }
}

GET /boards/_search
{
  "query": {
    "match": {
      "content": "쇼핑"
    }
  }
}

GET /boards/_search
{
  "query": {
    "match": {
      "content": "친구"
    }
  }
}

위 3가지 전부 다 데이터가 잘 조회되는 걸 확인할 수 있다.

👨🏻‍🏫

Nori analyzer를 활용하면 한글로 구성된 문장도 토큰으로 잘 분해해서 역인덱스에 저장하는 걸 직접 눈으로 확인했다. 그럼 다음 강의에서는 한글과 영어가 섞인 글은 어떻게 처리해야 하는 지 알아보자.

author

JSCODE 박재성

category

Elasticsearch

createdAt

Dec 6, 2025 03:54 AM

isPublic

series

실전에서 바로 써먹는 Elasticsearch 입문 (검색 최적화편)

slug

type

series-footer

updatedAt

📎

이 글은 실전에서 바로 써먹는 Elasticsearch 입문 (검색 최적화편) 강의의 수업 자료 중 일부입니다.

services: elastic: image: docker.elastic.co/elasticsearch/elasticsearch:8.17.4 # 8.17.4 버전 build: context: . dockerfile: Dockerfile ports: - 9200:9200 # 9200번 포트에서 Elasticsearch 실행 environment: # 아래 설정은 개발/테스트 환경에서 간단하게 테스트하기 위한 옵션 (운영 환경에서는 설정하면 안 됨) - discovery.type=single-node # 단일 노드 (지금은 알 필요 없음) - xpack.security.enabled=false # 보안 설정 - xpack.security.http.ssl.enabled=false # 보안 설정 kibana: image: docker.elastic.co/kibana/kibana:8.17.4 # 8.17.4 버전 ports: - 5601:5601 # 5601번 포트에서 kibana 실행 environment: - ELASTICSEARCH_HOSTS=http://elastic:9200 # kibana에게 통신할 Elasticsearch 주소 알려주기

// 방법 1 GET /_analyze { "text": "백화점에서 쇼핑을 하다가 친구를 만났다.", "analyzer": "nori" } // 방법 2 (nori analyzer의 구성을 직접 명시) GET /_analyze { "text": "백화점에서 쇼핑을 하다가 친구를 만났다.", "char_filter": [], "tokenizer": "nori_tokenizer", "filter": ["nori_part_of_speech", "nori_readingform", "lowercase"] }

{ "tokens": [ { "token": "백화", "start_offset": 0, "end_offset": 2, "type": "word", "position": 0 }, { "token": "점", "start_offset": 2, "end_offset": 3, "type": "word", "position": 1 }, { "token": "쇼핑", "start_offset": 6, "end_offset": 8, "type": "word", "position": 3 }, { "token": "하", "start_offset": 10, "end_offset": 11, "type": "word", "position": 5 }, { "token": "친구", "start_offset": 14, "end_offset": 16, "type": "word", "position": 7 }, { "token": "만나", "start_offset": 18, "end_offset": 20, "type": "word", "position": 9 } ] }

// 기존 인덱스 삭제 DELETE /boards // 인덱스 생성 + 매핑 정의 + Custom Analyzer 적용 PUT /boards { "settings": { "analysis": { "analyzer": { "boards_content_analyzer": { "char_filter": [], "tokenizer": "nori_tokenizer", "filter": ["nori_part_of_speech", "nori_readingform", "lowercase"] } } } }, "mappings": { "properties": { "content": { "type": "text", "analyzer": "boards_content_analyzer" } } } } // 잘 생성됐는 지 확인 GET /boards

GET /boards/_search { "query": { "match": { "content": "백화점" } } } GET /boards/_search { "query": { "match": { "content": "쇼핑" } } } GET /boards/_search { "query": { "match": { "content": "친구" } } }