User commited on
Commit
70ada71
·
2 Parent(s): 5c84fd4 01e181e

Initial commit

Browse files
Files changed (7) hide show
  1. Desktop/Apps/llm/.gitattributes +1 -0
  2. Dockerfile +30 -0
  3. README.md +52 -0
  4. Spacefile +4 -0
  5. app.py +55 -0
  6. create_space.py +16 -0
  7. requirements.txt +5 -0
Desktop/Apps/llm/.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.bin filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install build essentials and wget
6
+ RUN apt-get update && \
7
+ apt-get install -y build-essential wget git && \
8
+ rm -rf /var/lib/apt/lists/*
9
+
10
+ # Clone and install fastText v0.9.2 (stable release)
11
+ RUN git clone --branch v0.9.2 https://github.com/facebookresearch/fastText.git && \
12
+ cd fastText && \
13
+ pip install .
14
+
15
+ # Download the language identification model (v1.0)
16
+ # Model details: https://fasttext.cc/docs/en/language-identification.html
17
+ RUN wget https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin
18
+
19
+ # Copy requirements and install dependencies
20
+ COPY requirements.txt .
21
+ RUN pip install -r requirements.txt
22
+
23
+ # Copy application code
24
+ COPY app.py .
25
+
26
+ # Expose port
27
+ EXPOSE 8000
28
+
29
+ # Run the application
30
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Language Detection API
3
+ emoji: 🌐
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ sdk_version: "3.9"
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+
13
+ # Language Detection API
14
+
15
+ This is a FastAPI application that provides language detection capabilities using Facebook's FastText model.
16
+
17
+ ## Features
18
+
19
+ - Language detection for 176 different languages
20
+ - High accuracy using FastText's pre-trained model (lid.176.bin)
21
+ - Simple REST API interface
22
+ - Docker containerized
23
+
24
+ ## API Endpoints
25
+
26
+ ### GET /
27
+ Health check endpoint that confirms the API is running.
28
+
29
+ ### POST /detect
30
+ Detects the language of the provided text.
31
+
32
+ Request body:
33
+ ```json
34
+ {
35
+ "text": "Your text here"
36
+ }
37
+ ```
38
+
39
+ Response:
40
+ ```json
41
+ {
42
+ "language": "en",
43
+ "confidence": 0.976
44
+ }
45
+ ```
46
+
47
+ ## Technical Details
48
+
49
+ - Built with FastAPI and Python 3.9
50
+ - Uses FastText v0.9.2
51
+ - Containerized with Docker
52
+ - Hosted on Hugging Face Spaces
Spacefile ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Spacefile Docs: https://huggingface.co/docs/hub/spaces-config-reference
2
+ title: Language Detection API
3
+ sdk: docker
4
+ port: 8000
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fasttext
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
+ import numpy as np
5
+
6
+ app = FastAPI(
7
+ title="Language Detection API",
8
+ description="Language detection API using FastText v0.9.2 and lid.176.bin model",
9
+ version="1.0.0"
10
+ )
11
+
12
+ # Load the language identification model
13
+ # Model: lid.176.bin (v1.0)
14
+ # - Trained on Wikipedia, Tatoeba and SETimes
15
+ # - Supports 176 languages
16
+ # - Uses character n-grams (minn=3, maxn=6 by default)
17
+ # - Vector dimension: 16
18
+ model = fasttext.load_model("/app/lid.176.bin")
19
+
20
+ # Monkey patch fastText's predict method to use np.asarray
21
+ # This is needed because FastText's native predict method returns a tuple of lists,
22
+ # but we need numpy arrays for better performance and compatibility
23
+ original_predict = model.predict
24
+ def safe_predict(text, k=-1, threshold=0.0):
25
+ labels, probs = original_predict(text, k, threshold)
26
+ return np.asarray(labels), np.asarray(probs)
27
+ model.predict = safe_predict
28
+
29
+ class TextRequest(BaseModel):
30
+ text: str
31
+
32
+ class PredictionResponse(BaseModel):
33
+ language: str
34
+ confidence: float
35
+
36
+ @app.post("/detect", response_model=PredictionResponse)
37
+ async def detect_language(request: TextRequest):
38
+ try:
39
+ # Get prediction
40
+ predictions = model.predict(request.text)
41
+
42
+ # Extract language and confidence
43
+ language = predictions[0][0].replace("__label__", "")
44
+ confidence = float(predictions[1][0])
45
+
46
+ return PredictionResponse(
47
+ language=language,
48
+ confidence=confidence
49
+ )
50
+ except Exception as e:
51
+ raise HTTPException(status_code=500, detail=str(e))
52
+
53
+ @app.get("/")
54
+ async def root():
55
+ return {"message": "Language Detection API is running. Use /docs for the API documentation."}
create_space.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import HfApi
2
+ import os
3
+
4
+ # Initialize the Hugging Face API client
5
+ api = HfApi()
6
+
7
+ # Create a new Space
8
+ space_name = "language-detection-api"
9
+ api.create_repo(
10
+ repo_id=space_name,
11
+ repo_type="space",
12
+ space_sdk="docker",
13
+ private=False
14
+ )
15
+
16
+ print(f"Space created successfully: https://huggingface.co/spaces/{os.getenv('HUGGING_FACE_HUB_TOKEN').split('/')[0]}/{space_name}")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn==0.24.0
3
+ python-multipart==0.0.6
4
+ numpy==1.24.3
5
+ scipy==1.10.1