User
Initial commit
979c7a7
raw
history blame
1.77 kB
import fasttext
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import numpy as np
app = FastAPI(
title="Language Detection API",
description="Language detection API using FastText v0.9.2 and lid.176.bin model",
version="1.0.0"
)
# Load the language identification model
# Model: lid.176.bin (v1.0)
# - Trained on Wikipedia, Tatoeba and SETimes
# - Supports 176 languages
# - Uses character n-grams (minn=3, maxn=6 by default)
# - Vector dimension: 16
model = fasttext.load_model("/app/lid.176.bin")
# Monkey patch fastText's predict method to use np.asarray
# This is needed because FastText's native predict method returns a tuple of lists,
# but we need numpy arrays for better performance and compatibility
original_predict = model.predict
def safe_predict(text, k=-1, threshold=0.0):
labels, probs = original_predict(text, k, threshold)
return np.asarray(labels), np.asarray(probs)
model.predict = safe_predict
class TextRequest(BaseModel):
text: str
class PredictionResponse(BaseModel):
language: str
confidence: float
@app.post("/detect", response_model=PredictionResponse)
async def detect_language(request: TextRequest):
try:
# Get prediction
predictions = model.predict(request.text)
# Extract language and confidence
language = predictions[0][0].replace("__label__", "")
confidence = float(predictions[1][0])
return PredictionResponse(
language=language,
confidence=confidence
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/")
async def root():
return {"message": "Language Detection API is running. Use /docs for the API documentation."}