Spaces:

vivien
/

clip

Running

App Files Files Community

Vivien commited on Jan 22, 2022

Commit

a55de09

1 Parent(s): b59b1d0

Switch from ViT-B32 to ViT-B16

Browse files

Files changed (4) hide show

app.py +58 -32
data.csv +0 -0
embeddings.npy +1 -1
embeddings2.npy +1 -1

app.py CHANGED Viewed

@@ -4,21 +4,31 @@ from html import escape
 import os
 from transformers import CLIPProcessor, CLIPModel
-@st.cache(show_spinner=False,
-          hash_funcs={CLIPModel: lambda _: None,
-                      CLIPProcessor: lambda _: None,
-                      dict: lambda _: None})
 def load():
-  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
-  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
-  df = {0: pd.read_csv('data.csv'), 1: pd.read_csv('data2.csv')}
-  embeddings = {0: np.load('embeddings.npy'), 1: np.load('embeddings2.npy')}
-  for k in [0, 1]:
-    embeddings[k] = np.divide(embeddings[k], np.sqrt(np.sum(embeddings[k]**2, axis=1, keepdims=True)))
-  return model, processor, df, embeddings
 model, processor, df, embeddings = load()
-source = {0: '\nSource: Unsplash', 1: '\nSource: The Movie Database (TMDB)'}
 def get_html(url_list, height=200):
     html = "<div style='margin-top: 20px; max-width: 1200px; display: flex; flex-wrap: wrap; justify-content: space-evenly'>"
@@ -30,20 +40,32 @@ def get_html(url_list, height=200):
     html += "</div>"
     return html
 def compute_text_embeddings(list_of_strings):
     inputs = processor(text=list_of_strings, return_tensors="pt", padding=True)
     return model.get_text_features(**inputs)
 st.cache(show_spinner=False)
 def image_search(query, corpus, n_results=24):
     text_embeddings = compute_text_embeddings([query]).detach().numpy()
-    k = 0 if corpus == 'Unsplash' else 1
-    results = np.argsort((embeddings[k]@text_embeddings.T)[:, 0])[-1:-n_results-1:-1]
-    return [(df[k].iloc[i]['path'],
-             df[k].iloc[i]['tooltip'] + source[k],
-             df[k].iloc[i]['link']) for i in results]
-description = '''
 # Semantic image search
 **Enter your query and hit enter**
@@ -51,10 +73,12 @@ description = '''
 *Built with OpenAI's [CLIP](https://openai.com/blog/clip/) model, 🤗 Hugging Face's [transformers library](https://huggingface.co/transformers/), [Streamlit](https://streamlit.io/), 25k images from [Unsplash](https://unsplash.com/) and 8k images from [The Movie Database (TMDB)](https://www.themoviedb.org/)*
 *Inspired by [Unsplash Image Search](https://github.com/haltakov/natural-language-image-search) from Vladimir Haltakov and [Alph, The Sacred River](https://github.com/thoppe/alph-the-sacred-river) from Travis Hoppe*
-'''
 def main():
-  st.markdown('''
               <style>
               .block-container{
                 max-width: 1200px;
@@ -83,15 +107,17 @@ def main():
               footer {
                 visibility: hidden;
               }
-              </style>''',
-              unsafe_allow_html=True)
-  st.sidebar.markdown(description)
-  _, c, _ = st.columns((1, 3, 1))
-  query = c.text_input('', value='clouds at sunset')
-  corpus = st.radio('', ["Unsplash","Movies"])
-  if len(query) > 0:
-    results = image_search(query, corpus)
-    st.markdown(get_html(results), unsafe_allow_html=True)
-if __name__ == '__main__':
-  main()

 import os
 from transformers import CLIPProcessor, CLIPModel
+@st.cache(
+    show_spinner=False,
+    hash_funcs={
+        CLIPModel: lambda _: None,
+        CLIPProcessor: lambda _: None,
+        dict: lambda _: None,
+    },
+)
 def load():
+    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
+    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
+    df = {0: pd.read_csv("data.csv"), 1: pd.read_csv("data2.csv")}
+    embeddings = {0: np.load("embeddings.npy"), 1: np.load("embeddings2.npy")}
+    for k in [0, 1]:
+        embeddings[k] = np.divide(
+            embeddings[k], np.sqrt(np.sum(embeddings[k] ** 2, axis=1, keepdims=True))
+        )
+    return model, processor, df, embeddings
 model, processor, df, embeddings = load()
+source = {0: "\nSource: Unsplash", 1: "\nSource: The Movie Database (TMDB)"}
 def get_html(url_list, height=200):
     html = "<div style='margin-top: 20px; max-width: 1200px; display: flex; flex-wrap: wrap; justify-content: space-evenly'>"
     html += "</div>"
     return html
 def compute_text_embeddings(list_of_strings):
     inputs = processor(text=list_of_strings, return_tensors="pt", padding=True)
     return model.get_text_features(**inputs)
 st.cache(show_spinner=False)
 def image_search(query, corpus, n_results=24):
     text_embeddings = compute_text_embeddings([query]).detach().numpy()
+    k = 0 if corpus == "Unsplash" else 1
+    results = np.argsort((embeddings[k] @ text_embeddings.T)[:, 0])[
+        -1 : -n_results - 1 : -1
+    ]
+    return [
+        (
+            df[k].iloc[i]["path"],
+            df[k].iloc[i]["tooltip"] + source[k],
+            df[k].iloc[i]["link"],
+        )
+        for i in results
+    ]
+description = """
 # Semantic image search
 **Enter your query and hit enter**
 *Built with OpenAI's [CLIP](https://openai.com/blog/clip/) model, 🤗 Hugging Face's [transformers library](https://huggingface.co/transformers/), [Streamlit](https://streamlit.io/), 25k images from [Unsplash](https://unsplash.com/) and 8k images from [The Movie Database (TMDB)](https://www.themoviedb.org/)*
 *Inspired by [Unsplash Image Search](https://github.com/haltakov/natural-language-image-search) from Vladimir Haltakov and [Alph, The Sacred River](https://github.com/thoppe/alph-the-sacred-river) from Travis Hoppe*
+"""
 def main():
+    st.markdown(
+        """
               <style>
               .block-container{
                 max-width: 1200px;
               footer {
                 visibility: hidden;
               }
+              </style>""",
+        unsafe_allow_html=True,
+    )
+    st.sidebar.markdown(description)
+    _, c, _ = st.columns((1, 3, 1))
+    query = c.text_input("", value="clouds at sunset")
+    corpus = st.radio("", ["Unsplash", "Movies"])
+    if len(query) > 0:
+        results = image_search(query, corpus)
+        st.markdown(get_html(results), unsafe_allow_html=True)
+if __name__ == "__main__":
+    main()

data.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

embeddings.npy CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f8c171e32276739be6b020592edc8a2c06e029ff6505a9d1d4efe3cafa073bd
 size 51200128

 version https://git-lfs.github.com/spec/v1
+oid sha256:125430e11a4a415ec0c0fc5339f97544f0447e4b0a24c20f2e59f8852e706afc
 size 51200128

embeddings2.npy CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9664e980f31e81c4a34e07833539fea32795d83a4262c9828ceae445fa2e412a
 size 16732288

 version https://git-lfs.github.com/spec/v1
+oid sha256:153cf3fae2385d51fe8729d3a1c059f611ca47a3fc501049708114d1bbf79049
 size 16732288