Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import pandas_profiling as pp
|
2 |
import pandas as pd
|
3 |
import tensorflow as tf
|
4 |
|
@@ -44,33 +44,6 @@ CHOICES = ["SNOMED", "LOINC", "CQM"]
|
|
44 |
JSONOBJ = """{"items":{"item":[{"id": "0001","type": null,"is_good": false,"ppu": 0.55,"batters":{"batter":[{ "id": "1001", "type": "Regular" },{ "id": "1002", "type": "Chocolate" },{ "id": "1003", "type": "Blueberry" },{ "id": "1004", "type": "Devil's Food" }]},"topping":[{ "id": "5001", "type": "None" },{ "id": "5002", "type": "Glazed" },{ "id": "5005", "type": "Sugar" },{ "id": "5007", "type": "Powdered Sugar" },{ "id": "5006", "type": "Chocolate with Sprinkles" },{ "id": "5003", "type": "Chocolate" },{ "id": "5004", "type": "Maple" }]}]}}"""
|
45 |
|
46 |
|
47 |
-
def profile_dataset(dataset=datasetSNOMED, username="awacke1", token=HF_TOKEN, dataset_name="awacke1/SNOMED-CT-Code-Value-Semantic-Set.csv"):
|
48 |
-
df = pd.read_csv(dataset.Description)
|
49 |
-
if len(df.columns) <= 15:
|
50 |
-
profile = pp.ProfileReport(df, title=f"{dataset_name} Report")
|
51 |
-
else:
|
52 |
-
profile = pp.ProfileReport(df, title=f"{dataset_name} Report", minimal = True)
|
53 |
-
|
54 |
-
repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)
|
55 |
-
|
56 |
-
profile.to_file("./index.html")
|
57 |
-
|
58 |
-
upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
|
59 |
-
readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
|
60 |
-
with open("README.md", "w+") as f:
|
61 |
-
f.write(readme)
|
62 |
-
upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
|
63 |
-
return f"Your dataset report will be ready at {repo_url}"
|
64 |
-
|
65 |
-
#def lowercase_title(example):
|
66 |
-
# return {"Description": example[title].lower()}
|
67 |
-
|
68 |
-
# demonstrate map function of dataset
|
69 |
-
#JSONOBJ_MAP=datasetLOINC.map(lowercase_title)
|
70 |
-
#JSONOBJ_MAP=datasetLOINC.filter(lambda example: example["Description"].startswith("Mental health"))
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
|
75 |
def concatenate_text(examples):
|
76 |
return {
|
@@ -180,13 +153,7 @@ def fn( text1, text2, num, slider1, slider2, single_checkbox,
|
|
180 |
print(start_with_searchTermLOINC )
|
181 |
print(start_with_searchTermSNOMED )
|
182 |
print(start_with_searchTermCQM)
|
183 |
-
|
184 |
-
#print(start_with_searchTermLOINC["train"][0] )
|
185 |
-
#print(start_with_searchTermSNOMED["train"][0] )
|
186 |
-
#print(start_with_searchTermCQM["train"][0] )
|
187 |
-
|
188 |
-
#returnMsg=profile_dataset()
|
189 |
-
#print(returnMsg)
|
190 |
|
191 |
# try:
|
192 |
#top1matchLOINC = json.loads(start_with_searchTermLOINC['train'])
|
|
|
1 |
+
# import pandas_profiling as pp
|
2 |
import pandas as pd
|
3 |
import tensorflow as tf
|
4 |
|
|
|
44 |
JSONOBJ = """{"items":{"item":[{"id": "0001","type": null,"is_good": false,"ppu": 0.55,"batters":{"batter":[{ "id": "1001", "type": "Regular" },{ "id": "1002", "type": "Chocolate" },{ "id": "1003", "type": "Blueberry" },{ "id": "1004", "type": "Devil's Food" }]},"topping":[{ "id": "5001", "type": "None" },{ "id": "5002", "type": "Glazed" },{ "id": "5005", "type": "Sugar" },{ "id": "5007", "type": "Powdered Sugar" },{ "id": "5006", "type": "Chocolate with Sprinkles" },{ "id": "5003", "type": "Chocolate" },{ "id": "5004", "type": "Maple" }]}]}}"""
|
45 |
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
def concatenate_text(examples):
|
49 |
return {
|
|
|
153 |
print(start_with_searchTermLOINC )
|
154 |
print(start_with_searchTermSNOMED )
|
155 |
print(start_with_searchTermCQM)
|
156 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
# try:
|
159 |
#top1matchLOINC = json.loads(start_with_searchTermLOINC['train'])
|