fearlessbj4 commited on
Commit
d0e0652
·
verified ·
1 Parent(s): 3ffd89f

Upload /sim_case_sug_demo/g_h.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. sim_case_sug_demo/g_h.py +317 -38
sim_case_sug_demo/g_h.py CHANGED
@@ -5,7 +5,7 @@ _config={
5
  "sug_based_list":["dispute","plaintiff"],
6
  "sug_pool_list":["corpus3835","2022~2023"],
7
  "embedder_list":["ftlf","ftrob"],
8
- "based_index":1,
9
  "pool_index":1,
10
  "emb_index":1,
11
  "sug_th":20,
@@ -177,6 +177,37 @@ def html_hl(lst):
177
  tp_lst.append(temp)
178
 
179
  return "".join(tp_lst)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  def ansi_to_html(_f,file_path,_tranpose=True):
181
 
182
  if _tranpose:
@@ -297,6 +328,164 @@ def ansi_to_image(ansi_text, font_size=20, image_path="./test.png"):
297
  #image_path = ansi_to_image(ansi_content)
298
  #
299
  #---------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  def suggesting(the_pool,target_name,case_dict):
301
  global ANSI_COLORS,_th,c_th,sug_th,corpus_dict,corpus_pd_f,vec_lst,id_lst,sen_lst,corpus_clust_label,_cluster_core_dict,_embedder
302
  global bilstm_len,cnn_len,emb_dim,inset_th,clust_th
@@ -461,55 +650,100 @@ emb_model_path={\
461
  color_lst=[Back.BLUE,Back.GREEN,Back.MAGENTA,Back.YELLOW,Back.RED,Back.CYAN]#[Fore.RED,Fore.GREEN,Fore.YELLOW,Fore.BLUE,Fore.MAGENTA,Fore.CYAN]
462
 
463
 
464
- log_f=json.load(open("./src/plaintiff_logistic_features.json","r"))["BiLSTM_CNN"]
465
- x_r=np.array(log_f)[:,:-1]
466
- y_r=np.array(log_f)[:,-1]
467
 
468
 
469
 
470
  #pd_path,dis_path,s_path,v_path,c_path,t_path,cr_path,br_path=["TAIDE-LX-8B.jsonl","llama3_taide_8b_re_3_o_c.json","sentence.json","vector.json","hdb_cluster.json","hdb_ternary_array.json","hdb_cnn_result.json","hdb_sa_result.json"]
471
 
472
- pd_f=corpus_pd_f=json.load(open("./src/corpus3835_raw.json","r"))["claim"]
473
- s_f=json.load(open("./src/plaintiff_corpus3835_sen.json","r"))
474
- v_f=json.load(open("./src/plaintiff_corpus3835_vec.json","r"))#json.load(open(_dir+v_path,"r"))
475
 
476
- o_c_f=json.load(open("./src/plaintiff_corpus3835_cluster.json","r"))["clusters"]
477
- c_f=clust_2_dict(o_c_f)
478
- t_f=json.load(open("./src/plaintiff_ter.json","r"))
479
 
 
 
 
 
 
 
 
480
 
481
- if pool_type=="corpus3835":
482
- corpus_clust_label=clust_label(o_c_f)
 
 
 
483
 
484
- vec_lst=v_f["vector"]
485
- id_lst=v_f["id"]
486
- sen_lst=s_f["sentence"]
487
 
488
- corpus_dict={}
489
- for i in range(len(id_lst)):
490
- fid=id_lst[i].split("@")[0]
491
- if fid not in corpus_dict:
492
- corpus_dict[fid]=[sen_lst[i]]
493
- else:
494
- corpus_dict[fid].append(sen_lst[i])
495
- corpus_pd_f=json.load(open("./src/corpus3835_raw.json","r"))["claim"]
496
- else:
497
- vec_f=json.load(open("./src/plaintiff_2022~2023_vec.json","r"))
498
- vec_lst=[_e for i in vec_f for _e in vec_f[i]]
499
 
500
 
501
- corpus_dict=json.load(open("./src/plaintiff_2022~2023_raw.json","r"))
502
- corpus_pd_f=json.load(open("./src/2022~2023_raw.json","r"))["claim"]
503
- corpus_clust_f=json.load(open("./src/plaintiff_2022~2023_clust.json","r"))
504
-
505
- sen_lst=[_e for i in corpus_dict for _e in corpus_dict[i]]
506
- id_lst=[i+"@"+str(_e) for i in corpus_dict for _e in range(len(corpus_dict[i]))]
507
- corpus_clust_label={_e:corpus_clust_f[_e[:_e.find("@")]][int(_e[_e.find("@")+1:])] for _e in id_lst}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
  ###
510
 
511
- new_point_f=lst_2_dict(jl("../law/2022~2023/gpt-4-turbo-0409-0.3-new22_23.jsonl"))
512
- new_pd_f=json.load(open("../law/2022~2023/new22_23_3k3_corpus_raw.json","r"))["claim"]
513
 
514
 
515
 
@@ -539,6 +773,48 @@ from colorama import Fore,Style,Back
539
 
540
  import gradio as gr
541
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  def case_sug(file_name,plaintiff,p_point):
543
  global new_pd_f,new_point_f,corpus_dict
544
 
@@ -577,6 +853,9 @@ def case_sug(file_name,plaintiff,p_point):
577
  else:
578
  output_list.append(out_path)
579
  return output_list
580
-
581
- demo = gr.Interface(fn=case_sug, inputs=["text","text","text"], outputs=[gr.outputs.File() for i in range(sug_th)])
582
- demo.launch(share=True,server_port=4096,show_error=True)
 
 
 
 
5
  "sug_based_list":["dispute","plaintiff"],
6
  "sug_pool_list":["corpus3835","2022~2023"],
7
  "embedder_list":["ftlf","ftrob"],
8
+ "based_index":0,
9
  "pool_index":1,
10
  "emb_index":1,
11
  "sug_th":20,
 
177
  tp_lst.append(temp)
178
 
179
  return "".join(tp_lst)
180
+ def ansi_to_html_dis(_f,file_path,_tranpose=True):
181
+
182
+ if _tranpose:
183
+ _dict={"item":["plaintiff","defendant","dispute","score"],_f["target"]+"(target)":["plaintiff_anchor2","defendant_anchor2","dispute_anchor2",""],_f["case_id"]:["plaintiff_anchor1","defendant_anchor1","dispute_anchor1","score_anchor"]}
184
+ else:
185
+ _dict={"case_name":[_f["case_id"],_f["target"]+"(target)"],"plaintiff":["plaintiff_anchor1","plaintiff_anchor2"],"defendant":["defendant_anchor1","defendant_anchor2"],"dispute":["dispute_anchor1","dispute_anchor2"],"score":["","score_anchor"]}
186
+
187
+
188
+ p1=html_hl(_f["plaintiff_case1"])
189
+ p2=html_hl(_f["plaintiff_case2"])
190
+ d1=html_hl(_f["defendant_case1"])
191
+ d2=html_hl(_f["defendant_case2"])
192
+ dis1=html_hl(_f["dispute_case1"])
193
+ dis2=html_hl(_f["dispute_case2"])
194
+ score_="\n<mark style=\"background:#ffffff;color:"+("green" if _f["ensemble_pred"]>=0.75 else "yellow" if _f["ensemble_pred"]>=0.5 else "red")+"\">"+str(_f["ensemble_pred"])+"</mark>"
195
+ #score_="<mark style=\"color:>"++"\">"+str(_f["ensemble_pred"])+"</mark>"
196
+
197
+ df=pd.DataFrame(_dict)
198
+ html_table_blue_light = build_table(df, 'blue_light')
199
+ #print(type(html_table_blue_light))
200
+ injection="<meta charset=\"UTF-8\">"
201
+ #"<td style = \"background-color: #D9E1F2;font-family: Century Gothic, sans-serif;font-size: medium;text-align: left;padding: 0px 20px 0px 0px;width: auto\">"
202
+ html_table_blue_light=html_table_blue_light[:html_table_blue_light.find("<thead>")+7]+injection+html_table_blue_light[html_table_blue_light.find("<thead>")+7:]
203
+ html_table_blue_light=html_table_blue_light.replace("plaintiff_anchor1",p1).replace("plaintiff_anchor2",p2)\
204
+ .replace("defendant_anchor1",d1).replace("defendant_anchor2",d2)\
205
+ .replace("dispute_anchor1",dis1).replace("dispute_anchor2",dis2)\
206
+ .replace("score_anchor",score_)
207
+
208
+ with open(file_path, 'w',) as f:
209
+ f.write(html_table_blue_light)
210
+ return html_table_blue_light
211
  def ansi_to_html(_f,file_path,_tranpose=True):
212
 
213
  if _tranpose:
 
328
  #image_path = ansi_to_image(ansi_content)
329
  #
330
  #---------------------------------------
331
+ def suggesting_dis(the_pool,target_name,case_dict):
332
+ global ANSI_COLORS,_th,c_th,sug_th,corpus_dict,corpus_pd_f,vec_lst,id_lst,sen_lst,corpus_clust_label,_cluster_core_dict,_embedder
333
+ global bilstm_len,cnn_len,emb_dim,inset_th,clust_th
334
+ lst_2=[_e for _e in case_dict["dispute"]][:bilstm_len]
335
+
336
+ #for _e in lst2:
337
+ # temp=_embedder.encode(_e)
338
+ # vec_lst_2.append()
339
+ vec_lst_2=[_embedder.encode(_e) for _e in lst_2]
340
+
341
+ clst_2=[clust_search(_cluster_core_dict,_e,clust_th) for _e in vec_lst_2]
342
+ plst_2=replace_all("".join(case_dict["plaintiff"]),key_lst,sp_key,1).split(sp_key)
343
+ dlst_2=replace_all("".join(case_dict["defendant"]),key_lst,sp_key,1).split(sp_key)
344
+ v_plst_2=[_embedder.encode(_e) for _e in plst_2]
345
+ v_dlst_2=[_embedder.encode(_e) for _e in dlst_2]
346
+
347
+ print(clst_2)
348
+
349
+ rt_lst=[]
350
+ for i in tqdm(the_pool):
351
+ lst_1=[_e for _e in corpus_dict[i]]
352
+ id_lst_1=[id_lst[sen_lst.index(_e)] for _e in lst_1]
353
+ vec_lst_1=[vec_lst[sen_lst.index(_e)] for _e in lst_1]#[_embedder.encode(_e) for _e in lst_1]
354
+ clst_1=[corpus_clust_label[_e] for _e in id_lst_1]#[clust_search(_cluster_core_dict,_e,0.68) for _e in vec_lst_1]
355
+ #print(clst_1)
356
+ inset=sorted([_e for _e in set(clst_1)&set(clst_2) if _e!=-1])
357
+ temp_ot={}
358
+ if len(inset)>=max(1,inset_th):
359
+ temp_ot["target"]=target_name
360
+ temp_ot["inset"]=inset
361
+ #print(len(inset))
362
+ _img=img_resize(vec2img(vec_lst_1,clst_1,vec_lst_2,clst_2,clust_th),cnn_len)
363
+ cnn_pred=cnn_model.predict(np.array([_img])/255)
364
+
365
+ _con1,_con2=[],[]
366
+ for tp_i in range(bilstm_len):
367
+ if len(lst_1)>tp_i:
368
+ _con1.append(vec_lst_1[tp_i])
369
+ else:
370
+ _con1.append([0]*emb_dim)
371
+ for tp_i in range(bilstm_len):
372
+ if len(lst_2)>tp_i:
373
+ _con2.append(vec_lst_2[tp_i])
374
+ else:
375
+ _con2.append([0]*emb_dim)
376
+ _con1=np.array([_con1])
377
+ _con2=np.array([_con2])
378
+ print(len(_con1),len(_con2),len(_con2[0]))
379
+ #_con1=list(np.array(vec_lst_1).reshape(len(lst_1)*emb_dim))+[0]*(emb_dim*(bilstm_len-len(lst_1))) if len(lst_1)<=bilstm_len else list(np.array(vec_lst_1).reshape(len(lst_1)*emb_dim))[:bilstm_len*emb_dim]
380
+ #_con2=list(np.array(vec_lst_2).reshape(len(lst_2)*emb_dim))+[0]*(emb_dim*(bilstm_len-len(lst_2))) if len(lst_2)<=bilstm_len else list(np.array(vec_lst_2).reshape(len(lst_2)*emb_dim))[:bilstm_len*emb_dim]
381
+ bilstm_pred=bilstm_model.predict([_con1,_con2])
382
+
383
+
384
+ temp_ot["cnn_pred"]=float(cnn_pred[0][0])
385
+ temp_ot["bilstm_pred"]=float(bilstm_pred[0][0])
386
+ #print(cnn_pred)
387
+ #print(bilstm_pred)
388
+ x_e=[[bilstm_pred[0][0],cnn_pred[0][0]]]
389
+ ensemble_pred=logistic(x_r,y_r,x_e)
390
+ temp_ot["ensemble_pred"]=float(ensemble_pred[0])
391
+ #print(ensemble_pred)
392
+
393
+ pre_lst_1=[[color_lst[inset.index(clst_1[_e]) % len(color_lst)],Fore.WHITE,lst_1[_e],Style.RESET_ALL] if clst_1[_e] in inset else [Style.RESET_ALL,lst_1[_e]] for _e in range(len(lst_1))]
394
+ pre_lst_2=[[color_lst[inset.index(clst_2[_e]) % len(color_lst)],Fore.WHITE,lst_2[_e],Style.RESET_ALL] if clst_2[_e] in inset else [Style.RESET_ALL,lst_2[_e]] for _e in range(len(lst_2))]
395
+
396
+ vlst_1=[[vec_lst_1[_e],pre_lst_1[_e][0]] for _e in range(len(pre_lst_1)) if len(pre_lst_1[_e])==4]
397
+ vlst_2=[[vec_lst_2[_e],pre_lst_2[_e][0]] for _e in range(len(pre_lst_2)) if len(pre_lst_2[_e])==4]
398
+
399
+ #print(lst_1)
400
+
401
+ plst_1=replace_all("".join(corpus_pd_f[i.replace("_",",")][0]),key_lst,sp_key,1).split(sp_key)
402
+
403
+ dlst_1=replace_all("".join(corpus_pd_f[i.replace("_",",")][1]),key_lst,sp_key,1).split(sp_key)
404
+
405
+ v_plst_1=[_embedder.encode(_e) for _e in plst_1]
406
+
407
+ v_dlst_1=[_embedder.encode(_e) for _e in dlst_1]
408
+
409
+
410
+ cs_p1=[max([[cos_sim(_e,_v[0]),_v[-1]] for _v in vlst_1]) for _e in v_plst_1]
411
+ cs_d1=[max([[cos_sim(_e,_v[0]),_v[-1]] for _v in vlst_1]) for _e in v_dlst_1]
412
+
413
+ cs_p2=[max([[cos_sim(_e,_v[0]),_v[-1]] for _v in vlst_2]) for _e in v_plst_2]
414
+ cs_d2=[max([[cos_sim(_e,_v[0]),_v[-1]] for _v in vlst_2]) for _e in v_dlst_2]
415
+
416
+ pre_lst_p1=[[cs_p1[_e][-1],Fore.WHITE,plst_1[_e],Style.RESET_ALL] if cs_p1[_e][0]>_th else [Style.RESET_ALL,plst_1[_e]] for _e in range(len(cs_p1))]
417
+ pre_lst_d1=[[cs_d1[_e][-1],Fore.WHITE,dlst_1[_e],Style.RESET_ALL] if cs_d1[_e][0]>_th else [Style.RESET_ALL,dlst_1[_e]] for _e in range(len(cs_d1))]
418
+
419
+ pre_lst_p2=[[cs_p2[_e][-1],Fore.WHITE,plst_2[_e],Style.RESET_ALL] if cs_p2[_e][0]>_th else [Style.RESET_ALL,plst_2[_e]] for _e in range(len(cs_p2))]
420
+ pre_lst_d2=[[cs_d2[_e][-1],Fore.WHITE,dlst_2[_e],Style.RESET_ALL] if cs_d2[_e][0]>_th else [Style.RESET_ALL,dlst_2[_e]] for _e in range(len(cs_d2))]
421
+
422
+
423
+ #if max_dp<max([len(plst_1),len(plst_2),len(dlst_1),len(dlst_2)]):
424
+ # max_dp=max([len(plst_1),len(plst_2),len(dlst_1),len(dlst_2)])
425
+
426
+ #print(plst_1)
427
+ #print(plst_2)
428
+ #print(dlst_1)
429
+ #print(dlst_2)
430
+ draw_lst_1=["".join(_e) for _e in pre_lst_1]
431
+ draw_lst_2=["".join(_e) for _e in pre_lst_2]
432
+
433
+ draw_lst_p1=["".join(_e) for _e in pre_lst_p1]
434
+ draw_lst_p2=["".join(_e) for _e in pre_lst_p2]
435
+ draw_lst_d1=["".join(_e) for _e in pre_lst_d1]
436
+ draw_lst_d2=["".join(_e) for _e in pre_lst_d2]
437
+ #replace_all(temp_c,key_lst,",",0)
438
+
439
+ #print(plst_1)
440
+ tp_str=""
441
+
442
+ #print("---------------------")
443
+ #print(Fore.BLUE+str(i)+Style.RESET_ALL)
444
+ temp_ot["case_id"]=i
445
+ temp_ot["plaintiff_case1"]=[{"background_color":ANSI_COLORS[_e[0]],"font_color":ANSI_COLORS[_e[1]],"content":_e[-2]} if len(_e)==4 else {"background_color":ANSI_COLORS[_e[0]],"font_color":ANSI_COLORS[Fore.WHITE],"content":_e[-1]} for _e in pre_lst_p1]
446
+ temp_ot["defendant_case1"]=[{"background_color":ANSI_COLORS[_e[0]],"font_color":ANSI_COLORS[_e[1]],"content":_e[-2]} if len(_e)==4 else {"background_color":ANSI_COLORS[_e[0]],"font_color":ANSI_COLORS[Fore.WHITE],"content":_e[-1]} for _e in pre_lst_d1]
447
+ temp_ot["dispute_case1"]=[{"background_color":ANSI_COLORS[_e[0]],"font_color":ANSI_COLORS[_e[1]],"content":_e[-2]} if len(_e)==4 else {"background_color":ANSI_COLORS[_e[0]],"font_color":ANSI_COLORS[Fore.WHITE],"content":_e[-1]} for _e in pre_lst_1]
448
+ temp_ot["plaintiff_case2"]=[{"background_color":ANSI_COLORS[_e[0]],"font_color":ANSI_COLORS[_e[1]],"content":_e[-2]} if len(_e)==4 else {"background_color":ANSI_COLORS[_e[0]],"font_color":ANSI_COLORS[Fore.WHITE],"content":_e[-1]} for _e in pre_lst_p2]
449
+ temp_ot["defendant_case2"]=[{"background_color":ANSI_COLORS[_e[0]],"font_color":ANSI_COLORS[_e[1]],"content":_e[-2]} if len(_e)==4 else {"background_color":ANSI_COLORS[_e[0]],"font_color":ANSI_COLORS[Fore.WHITE],"content":_e[-1]} for _e in pre_lst_d2]
450
+ temp_ot["dispute_case2"]=[{"background_color":ANSI_COLORS[_e[0]],"font_color":ANSI_COLORS[_e[1]],"content":_e[-2]} if len(_e)==4 else {"background_color":ANSI_COLORS[_e[0]],"font_color":ANSI_COLORS[Fore.WHITE],"content":_e[-1]} for _e in pre_lst_2]
451
+
452
+ tp_str+=Fore.BLUE+str(i)+Style.RESET_ALL+"\n"
453
+ tp_str+=(Fore.GREEN if temp_ot["ensemble_pred"]>=0.75 else Fore.YELLOW if temp_ot["ensemble_pred"]>=0.5 else Fore.RED)+str(temp_ot["ensemble_pred"])+Style.RESET_ALL+"\n"
454
+ tp_str+=Fore.MAGENTA+"---plaintiff_case1---"+Style.RESET_ALL+"\n"
455
+ tp_str+="".join(draw_lst_p1)+Style.RESET_ALL+"\n"
456
+
457
+ tp_str+=Fore.MAGENTA+"---defendant_case1---"+Style.RESET_ALL+"\n"
458
+ tp_str+="".join(draw_lst_d1)+Style.RESET_ALL+"\n"
459
+
460
+ tp_str+=Fore.MAGENTA+"---dispute_case1---"+Style.RESET_ALL+"\n"
461
+ tp_str+="".join(draw_lst_1)+Style.RESET_ALL+"\n"
462
+ ###
463
+ tp_str+=Fore.BLUE+"target"+Style.RESET_ALL+"\n"
464
+
465
+ tp_str+=Fore.MAGENTA+"---plaintiff_case2---"+Style.RESET_ALL+"\n"
466
+ tp_str+="".join(draw_lst_p2)+Style.RESET_ALL+"\n"
467
+
468
+ tp_str+=Fore.MAGENTA+"---defendant_case2---"+Style.RESET_ALL+"\n"
469
+ tp_str+="".join(draw_lst_d2)+Style.RESET_ALL+"\n"
470
+
471
+ tp_str+=Fore.MAGENTA+"---dispute_case2---"+Style.RESET_ALL+"\n"
472
+ tp_str+="".join(draw_lst_2)+Style.RESET_ALL+"\n"
473
+
474
+ #tp_str+="---------------------"+"\n"
475
+
476
+
477
+
478
+ temp_ot["output"]=tp_str
479
+ rt_lst.append(temp_ot)
480
+ print(tp_str)
481
+ ot=sorted(rt_lst,key=lambda x:x["ensemble_pred"],reverse=True)
482
+ ot_lst=[i["output"] for i in ot[:sug_th]]
483
+
484
+ for i in ot[:sug_th]:
485
+ file=open("./json_file/"+str(target_name).replace(",","_")+"&"+str(i["case_id"])+".json","w",encoding='utf8')
486
+ json.dump({_e:i[_e] for _e in i if _e!="output"},file,indent=4,ensure_ascii=False)
487
+ file.close()
488
+ return ot_lst,ot[:sug_th]
489
  def suggesting(the_pool,target_name,case_dict):
490
  global ANSI_COLORS,_th,c_th,sug_th,corpus_dict,corpus_pd_f,vec_lst,id_lst,sen_lst,corpus_clust_label,_cluster_core_dict,_embedder
491
  global bilstm_len,cnn_len,emb_dim,inset_th,clust_th
 
650
  color_lst=[Back.BLUE,Back.GREEN,Back.MAGENTA,Back.YELLOW,Back.RED,Back.CYAN]#[Fore.RED,Fore.GREEN,Fore.YELLOW,Fore.BLUE,Fore.MAGENTA,Fore.CYAN]
651
 
652
 
653
+ #log_f=json.load(open("./src/plaintiff_logistic_features.json","r"))["BiLSTM_CNN"]
654
+
 
655
 
656
 
657
 
658
  #pd_path,dis_path,s_path,v_path,c_path,t_path,cr_path,br_path=["TAIDE-LX-8B.jsonl","llama3_taide_8b_re_3_o_c.json","sentence.json","vector.json","hdb_cluster.json","hdb_ternary_array.json","hdb_cnn_result.json","hdb_sa_result.json"]
659
 
 
 
 
660
 
 
 
 
661
 
662
+ if sug_type=="plaintiff":
663
+ log_f=json.load(open("./src/plaintiff_logistic_features.json","r"))["BiLSTM_CNN"]
664
+ x_r=np.array(log_f)[:,:-1]
665
+ y_r=np.array(log_f)[:,-1]
666
+ pd_f=corpus_pd_f=json.load(open("./src/corpus3835_raw.json","r"))["claim"]
667
+ s_f=json.load(open("./src/plaintiff_corpus3835_sen.json","r"))
668
+ v_f=json.load(open("./src/plaintiff_corpus3835_vec.json","r"))#json.load(open(_dir+v_path,"r"))
669
 
670
+ o_c_f=json.load(open("./src/plaintiff_corpus3835_cluster.json","r"))["clusters"]
671
+ c_f=clust_2_dict(o_c_f)
672
+ t_f=json.load(open("./src/plaintiff_ter.json","r"))
673
+ if pool_type=="corpus3835":
674
+ corpus_clust_label=clust_label(o_c_f)
675
 
676
+ vec_lst=v_f["vector"]
677
+ id_lst=v_f["id"]
678
+ sen_lst=s_f["sentence"]
679
 
680
+ corpus_dict={}
681
+ for i in range(len(id_lst)):
682
+ fid=id_lst[i].split("@")[0]
683
+ if fid not in corpus_dict:
684
+ corpus_dict[fid]=[sen_lst[i]]
685
+ else:
686
+ corpus_dict[fid].append(sen_lst[i])
687
+ corpus_pd_f=json.load(open("./src/corpus3835_raw.json","r"))["claim"]
688
+ else:
689
+ vec_f=json.load(open("./src/plaintiff_2022~2023_vec.json","r"))
690
+ vec_lst=[_e for i in vec_f for _e in vec_f[i]]
691
 
692
 
693
+ corpus_dict=json.load(open("./src/plaintiff_2022~2023_raw.json","r"))
694
+ corpus_pd_f=json.load(open("./src/2022~2023_raw.json","r"))["claim"]
695
+ corpus_clust_f=json.load(open("./src/plaintiff_2022~2023_clust.json","r"))
696
+
697
+ sen_lst=[_e for i in corpus_dict for _e in corpus_dict[i]]
698
+ id_lst=[i+"@"+str(_e) for i in corpus_dict for _e in range(len(corpus_dict[i]))]
699
+ corpus_clust_label={_e:corpus_clust_f[_e[:_e.find("@")]][int(_e[_e.find("@")+1:])] for _e in id_lst}
700
+
701
+ elif sug_type=="dispute":
702
+ log_f=json.load(open("./src/dispute_logistic_features.json","r"))["BiLSTM_CNN"]
703
+ x_r=np.array(log_f)[:,:-1]
704
+ y_r=np.array(log_f)[:,-1]
705
+ pd_f=corpus_pd_f=json.load(open("./src/corpus3835_raw_dis.json","r"))["claim"]
706
+ s_f=json.load(open("./src/dispute_corpus3835_sen.json","r"))
707
+ v_f=json.load(open("./src/dispute_corpus3835_vec.json","r"))#json.load(open(_dir+v_path,"r"))
708
+
709
+ o_c_f=json.load(open("./src/dispute_corpus3835_cluster.json","r"))["clusters"]
710
+ c_f=clust_2_dict(o_c_f)
711
+ t_f=json.load(open("./src/dispute_ter.json","r"))
712
+ if pool_type=="corpus3835":
713
+ corpus_clust_label=clust_label(o_c_f)
714
+
715
+ vec_lst=v_f["vector"]
716
+ id_lst=v_f["id"]
717
+ sen_lst=s_f["sentence"]
718
+
719
+ corpus_dict={}
720
+ for i in range(len(id_lst)):
721
+ fid=id_lst[i].split("@")[0]
722
+ if fid not in corpus_dict:
723
+ corpus_dict[fid]=[sen_lst[i]]
724
+ else:
725
+ corpus_dict[fid].append(sen_lst[i])
726
+ corpus_pd_f=json.load(open("./src/corpus3835_raw_dis.json","r"))["claim"]
727
+ else:
728
+ vec_f=json.load(open("./src/dispute_2022~2023_vec.json","r"))
729
+ vec_lst=[_e for i in vec_f for _e in vec_f[i]]
730
 
731
+
732
+ corpus_dict=json.load(open("./src/dispute_2022~2023_raw.json","r"))
733
+ corpus_pd_f=json.load(open("./src/new22_23_3k3_corpus_raw.json","r"))["claim"]
734
+ corpus_clust_f=json.load(open("./src/dispute_22~23_clust.json","r"))
735
+
736
+ sen_lst=[_e for i in corpus_dict for _e in corpus_dict[i]]
737
+ id_lst=[i+"@"+str(_e) for i in corpus_dict for _e in range(len(corpus_dict[i]))]
738
+ corpus_clust_label={_e:corpus_clust_f[_e[:_e.find("@")]][int(_e[_e.find("@")+1:])] for _e in id_lst}
739
+
740
+
741
+
742
+ new_point_f=lst_2_dict(jl("./src/gpt-4-turbo-0409-0.3-new22_23.jsonl"))
743
+ new_pd_f=json.load(open("./src/new22_23_3k3_corpus_raw.json","r"))["claim"]
744
  ###
745
 
746
+
 
747
 
748
 
749
 
 
773
 
774
  import gradio as gr
775
 
776
+ def case_sug_dis(file_name,plaintiff,defendant,p_point,d_point,dispute_list):
777
+ global new_pd_f,new_point_f,corpus_dict
778
+
779
+ #print(file_name)
780
+ #print(point_f)
781
+ #print(list(pd_f.keys()).index(file_name))
782
+ if file_name not in new_pd_f:
783
+ print("file not found")
784
+ file_name="user_input"
785
+ else:
786
+ plaintiff=new_pd_f[file_name][0]
787
+ defendant=new_pd_f[file_name][1]
788
+ p_point=new_point_f[file_name][0]
789
+ d_point=new_point_f[file_name][1]
790
+ dispute_list=new_point_f[file_name][2]
791
+
792
+ global sug_th
793
+
794
+
795
+ p_point="。".split(p_point) if type(p_point)==type("111") else p_point
796
+ d_point="。".split(d_point) if type(d_point)==type("111") else d_point
797
+ dispute_list="。".split(dispute_list) if type(dispute_list)==type("111") else dispute_list
798
+ _pool=[i for i in corpus_dict]
799
+ _case_dict={"plaintiff":plaintiff,"defendant":defendant,"p_point":p_point,"d_point":d_point,"dispute":dispute_list}
800
+ ot,ot_dict=suggesting_dis(_pool,file_name,_case_dict)
801
+
802
+
803
+ dispute="\n".join(dispute_list)
804
+ #ot=[Back.BLUE+dispute+Style.RESET_ALL]*10
805
+ output_list=[]
806
+ print("-----")
807
+ print(len(ot_dict))
808
+ out_path="./out_of_range.html"
809
+ for i in range(sug_th):
810
+ if i<len(ot_dict):
811
+ _path="./html_file/test"+str(i)+".html"
812
+ output_html=ansi_to_html_dis(ot_dict[i],_path)
813
+ #output_image = Image.open(_path)
814
+ output_list.append(_path)
815
+ else:
816
+ output_list.append(out_path)
817
+ return output_list
818
  def case_sug(file_name,plaintiff,p_point):
819
  global new_pd_f,new_point_f,corpus_dict
820
 
 
853
  else:
854
  output_list.append(out_path)
855
  return output_list
856
+ if sug_type=="plaintiff":
857
+ demo = gr.Interface(fn=case_sug, inputs=["text","text","text"], outputs=[gr.outputs.File() for i in range(sug_th)])
858
+ demo.launch(share=True,server_port=4096,show_error=True)
859
+ elif sug_type=="dispute":
860
+ demo = gr.Interface(fn=case_sug_dis, inputs=["text","text","text","text","text","text"], outputs=[gr.outputs.File() for i in range(sug_th)])
861
+ demo.launch(share=True,server_port=2048,show_error=True)