> ## Documentation Index
> Fetch the complete documentation index at: https://docs.paradedb.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Create an Index

> Index a Postgres table for full text search

Before a table can be searched, it must be indexed. ParadeDB uses a custom index type called the BM25 index.
The following code block creates a BM25 index over several columns in the `mock_items` table.

<CodeGroup>
  ```sql SQL theme={null}
  CREATE INDEX search_idx ON mock_items
  USING bm25 (id, description, category)
  WITH (key_field='id');
  ```

  ```ts Drizzle theme={null}
  import { indexing } from "@paradedb/drizzle-paradedb";

  indexing
    .bm25Index("search_idx")
    .on(mockItems.id, mockItems.description, mockItems.category);
  ```

  ```python Django theme={null}
  from django.db import connection
  from paradedb.indexes import BM25Index

  with connection.schema_editor() as schema_editor:
      schema_editor.add_index(
          MockItem,
          BM25Index(
              fields={
                  "id": {},
                  "description": {},
                  "category": {},
              },
              key_field="id",
              name="search_idx",
          ),
      )
  ```

  ```python SQLAlchemy theme={null}
  from sqlalchemy import Index
  from paradedb.sqlalchemy import indexing

  idx = Index(
      "search_idx",
      indexing.BM25Field(MockItem.id),
      indexing.BM25Field(MockItem.description),
      indexing.BM25Field(MockItem.category),
      postgresql_using="bm25",
      postgresql_with={"key_field": "id"},
  )

  with engine.begin() as conn:
      idx.create(conn)
  ```

  ```ruby Rails theme={null}
  ActiveRecord::Base.connection.add_bm25_index(
    :mock_items,
    fields: {
      id: {},
      description: {},
      category: {}
    },
    key_field: :id,
    name: :search_idx
  )
  ```

  ```cs EF Core theme={null}
  modelBuilder.Entity<MockItem>()
      .HasBm25Index("search_idx", e => e.Id)
      .HasField(e => e.Description)
      .HasField(e => e.Category);
  ```
</CodeGroup>

<Note>
  See the [getting started guide](/documentation/getting-started/environment)
  for more detail on how to set up your ORM to run index creation commands.
</Note>

<Note>
  You'll need to drop the existing `search_idx` before you can create a new one:

  <CodeGroup>
    ```sql SQL theme={null}
    DROP INDEX search_idx;
    ```

    ```ts Drizzle theme={null}
    import { sql } from "drizzle-orm";

    await db.execute(sql`DROP INDEX search_idx`);
    ```

    ```python Django theme={null}
    from django.db import connection

    with connection.cursor() as cursor:
        cursor.execute("DROP INDEX search_idx")
    ```

    ```python SQLAlchemy theme={null}
    from sqlalchemy import text

    with engine.begin() as conn:
        conn.execute(text("DROP INDEX search_idx"))
    ```

    ```ruby Rails theme={null}
    ActiveRecord::Base.connection.remove_bm25_index(:mock_items, name: :search_idx)
    ```

    ```cs EF Core theme={null}
    await dbContext.Database.ExecuteSqlRawAsync("DROP INDEX search_idx");
    ```
  </CodeGroup>
</Note>

By default, text columns are tokenized using the [unicode](/documentation/tokenizers/available-tokenizers/unicode) tokenizer, which splits text according to the
Unicode segmentation standard. Because index creation is a time-consuming operation, we recommend experimenting with the [available tokenizers](/documentation/tokenizers/overview)
to find the most suitable one before running `CREATE INDEX`.

For instance, if a column contains multiple languages, the ICU tokenizer may be more appropriate.

<CodeGroup>
  ```sql SQL theme={null}
  CREATE INDEX search_idx ON mock_items
  USING bm25 (id, (description::pdb.icu), category)
  WITH (key_field='id');
  ```

  ```ts Drizzle theme={null}
  import { indexing, tokenizer } from "@paradedb/drizzle-paradedb";

  indexing
    .bm25Index("search_idx")
    .on(
      mockItems.id,
      indexing.bm25Field(mockItems.description, tokenizer.icu()),
      mockItems.category,
    );
  ```

  ```python Django theme={null}
  from django.db import connection
  from paradedb.indexes import BM25Index
  from paradedb.search import Tokenizer

  with connection.schema_editor() as schema_editor:
      schema_editor.add_index(
          MockItem,
          BM25Index(
              fields={
                  "id": {},
                  "description": {"tokenizer": Tokenizer.icu()},
                  "category": {},
              },
              key_field="id",
              name="search_idx",
          ),
      )
  ```

  ```python SQLAlchemy theme={null}
  from sqlalchemy import Index
  from paradedb.sqlalchemy import indexing, tokenizer

  idx = Index(
      "search_idx",
      indexing.BM25Field(MockItem.id),
      indexing.BM25Field(
          MockItem.description,
          tokenizer=tokenizer.icu(),
      ),
      indexing.BM25Field(MockItem.category),
      postgresql_using="bm25",
      postgresql_with={"key_field": "id"},
  )

  with engine.begin() as conn:
      idx.create(conn)
  ```

  ```ruby Rails theme={null}
  ActiveRecord::Base.connection.add_bm25_index(
    :mock_items,
    fields: {
      id: {},
      description: { tokenizer: Tokenizer.icu() },
      category: {}
    },
    key_field: :id,
    name: :search_idx
  )
  ```

  ```cs EF Core theme={null}
  modelBuilder.Entity<MockItem>()
      .HasBm25Index("search_idx", e => e.Id)
      .HasField(e => e.Description, Tokenizer.Icu())
      .HasField(e => e.Category);
  ```
</CodeGroup>

Only one BM25 index can exist per table. We recommend indexing all columns in a table that may be present in a search query,
including columns used for sorting, grouping, filtering, and aggregations.

<CodeGroup>
  ```sql SQL theme={null}
  CREATE INDEX search_idx ON mock_items
  USING bm25 (id, description, category, rating, in_stock, created_at, metadata, weight_range)
  WITH (key_field='id');
  ```

  ```ts Drizzle theme={null}
  import { indexing } from "@paradedb/drizzle-paradedb";

  indexing
    .bm25Index("search_idx")
    .on(
      mockItems.id,
      mockItems.description,
      mockItems.category,
      mockItems.rating,
      mockItems.inStock,
      mockItems.createdAt,
      mockItems.metadata,
      mockItems.weightRange,
    );
  ```

  ```python Django theme={null}
  from django.db import connection
  from paradedb.indexes import BM25Index

  with connection.schema_editor() as schema_editor:
      schema_editor.add_index(
          MockItem,
          BM25Index(
              fields={
                  "id": {},
                  "description": {},
                  "category": {},
                  "rating": {},
                  "in_stock": {},
                  "created_at": {},
                  "metadata": {},
                  "weight_range": {},
              },
              key_field="id",
              name="search_idx",
          ),
      )
  ```

  ```python SQLAlchemy theme={null}
  from sqlalchemy import Index
  from paradedb.sqlalchemy import indexing

  idx = Index(
      "search_idx",
      indexing.BM25Field(MockItem.id),
      indexing.BM25Field(MockItem.description),
      indexing.BM25Field(MockItem.category),
      indexing.BM25Field(MockItem.rating),
      indexing.BM25Field(MockItem.in_stock),
      indexing.BM25Field(MockItem.created_at),
      indexing.BM25Field(MockItem.metadata_),
      indexing.BM25Field(MockItem.weight_range),
      postgresql_using="bm25",
      postgresql_with={"key_field": "id"},
  )

  with engine.begin() as conn:
      idx.create(conn)
  ```

  ```ruby Rails theme={null}
  ActiveRecord::Base.connection.add_bm25_index(
    :mock_items,
    fields: {
      id: {},
      description: {},
      category: {},
      rating: {},
      in_stock: {},
      created_at: {},
      metadata: {},
      weight_range: {}
    },
    key_field: :id,
    name: :search_idx
  )
  ```

  ```cs EF Core theme={null}
  modelBuilder.Entity<MockItem>()
      .HasBm25Index("search_idx", e => e.Id)
      .HasField(e => e.Description)
      .HasField(e => e.Category)
      .HasField(e => e.Rating)
      .HasField(e => e.InStock)
      .HasField(e => e.CreatedAt)
      .HasField(e => e.Metadata)
      .HasField(e => e.WeightRange);
  ```
</CodeGroup>

Most Postgres types, including text, JSON, numeric, timestamp, range, boolean, and arrays, can be indexed.

## Track Create Index Progress

To monitor the progress of a long-running `CREATE INDEX`, open a separate Postgres connection and query `pg_stat_progress_create_index`:

```sql theme={null}
SELECT pid, phase, blocks_done, blocks_total
FROM pg_stat_progress_create_index;
```

Comparing `blocks_done` to `blocks_total` will provide a good approximation of the progress so far. If `blocks_done` equals
`blocks_total`, that means that all rows have been indexed and the index is being flushed to disk.

## Choosing a Key Field

In the `CREATE INDEX` statement above, note the mandatory `key_field` option.
Every BM25 index needs a `key_field`, which is the name of a column that will function as a row’s unique identifier within the index.

The `key_field` must:

1. Have a `UNIQUE` constraint. Usually this means the table's `PRIMARY KEY`.
2. Be the first column in the column list.
3. Be untokenized, if it is a text field.

## Token Filters

After tokens are created, [token filters](/documentation/token-filters/overview) can be configured to apply further processing like lowercasing, stemming, or unaccenting.
For example, the following code block adds English stemming to `description`:

<CodeGroup>
  ```sql SQL theme={null}
  CREATE INDEX search_idx ON mock_items
  USING bm25 (id, (description::pdb.simple('stemmer=english')), category)
  WITH (key_field='id');
  ```

  ```ts Drizzle theme={null}
  import { indexing, tokenizer } from "@paradedb/drizzle-paradedb";

  indexing
    .bm25Index("search_idx")
    .on(
      mockItems.id,
      indexing.bm25Field(
        mockItems.description,
        tokenizer.simple({ stemmer: "english" }),
      ),
      mockItems.category,
    );
  ```

  ```python Django theme={null}
  from django.db import connection
  from paradedb.indexes import BM25Index
  from paradedb.search import Tokenizer

  with connection.schema_editor() as schema_editor:
      schema_editor.add_index(
          MockItem,
          BM25Index(
              fields={
                  "id": {},
                  "description": {
                      "tokenizer": Tokenizer.simple(
                          options={"stemmer": "english"}
                      ),
                  },
                  "category": {},
              },
              key_field="id",
              name="search_idx",
          ),
      )
  ```

  ```python SQLAlchemy theme={null}
  from sqlalchemy import Index
  from paradedb.sqlalchemy import indexing, tokenizer

  idx = Index(
      "search_idx",
      indexing.BM25Field(MockItem.id),
      indexing.BM25Field(
          MockItem.description,
          tokenizer=tokenizer.simple(options={"stemmer": "english"}),
      ),
      indexing.BM25Field(MockItem.category),
      postgresql_using="bm25",
      postgresql_with={"key_field": "id"},
  )

  with engine.begin() as conn:
      idx.create(conn)
  ```

  ```ruby Rails theme={null}
  ActiveRecord::Base.connection.add_bm25_index(
    :mock_items,
    fields: {
      id: {},
      description: {
        tokenizer: Tokenizer.simple(options: { stemmer: "english" })
      },
      category: {}
    },
    key_field: :id,
    name: :search_idx
  )
  ```

  ```cs EF Core theme={null}
  modelBuilder.Entity<MockItem>()
      .HasBm25Index("search_idx", e => e.Id)
      .HasField(e => e.Description, Tokenizer.Simple(new() { ["stemmer"] = "english" }))
      .HasField(e => e.Category);
  ```
</CodeGroup>
