|
import streamlit as st |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
from io import BytesIO |
|
from reportlab.lib.pagesizes import letter |
|
from reportlab.pdfgen import canvas |
|
from reportlab.lib.utils import ImageReader |
|
|
|
|
|
|
|
def generate_pdf(data, filtered_data, years, keywords, author): |
|
buffer = BytesIO() |
|
c = canvas.Canvas(buffer, pagesize=letter) |
|
width, height = letter |
|
|
|
|
|
c.setFont("Helvetica-Bold", 18) |
|
c.drawString(100, height - 40, "Rapport d'Analyse des Publications Scientifiques") |
|
|
|
|
|
c.setFont("Helvetica", 12) |
|
filter_info = f"Filtres appliqués:\n- Années: {', '.join(map(str, years))}\n- Mots-clés: {', '.join(keywords)}" |
|
if author: |
|
filter_info += f"\n- Auteur: {author}" |
|
text_lines = filter_info.split('\n') |
|
y_position = height - 80 |
|
for line in text_lines: |
|
c.drawString(50, y_position, line) |
|
y_position -= 20 |
|
|
|
|
|
total_publications = len(filtered_data) |
|
total_citations = filtered_data['Citation Count'].sum() |
|
avg_citations_per_publication = filtered_data['Citation Count'].mean() |
|
top_cited_publication = filtered_data.loc[filtered_data['Citation Count'].idxmax()] |
|
|
|
stats = [ |
|
f"Nombre total de publications: {total_publications}", |
|
f"Nombre total de citations: {total_citations}", |
|
f"Citations moyennes par publication: {avg_citations_per_publication:.2f}", |
|
f"Publication avec le plus de citations: {top_cited_publication['Title']} ({top_cited_publication['Citation Count']} citations)" |
|
] |
|
|
|
c.drawString(50, y_position - 20, "Statistiques Générales:") |
|
for i, stat in enumerate(stats): |
|
c.drawString(70, y_position - 40 - 20 * i, stat) |
|
|
|
|
|
plots = [ |
|
("Distribution des Citations par Publication", lambda ax: ax.hist(filtered_data['Citation Count'], bins=20, color='skyblue', edgecolor='black')), |
|
("Citations par Année", lambda ax: ax.plot(filtered_data.groupby('Year')['Citation Count'].sum().reset_index()['Year'], filtered_data.groupby('Year')['Citation Count'].sum().reset_index()['Citation Count'], marker='o', color='skyblue')), |
|
("Nombre de Publications par Mot-Clé", lambda ax: ax.pie(filtered_data['Keyword'].value_counts(), labels=filtered_data['Keyword'].value_counts().index, autopct='%1.1f%%', colors=plt.cm.Paired(range(len(filtered_data['Keyword'].value_counts()))))), |
|
("Nombre de Publications par Année", lambda ax: ax.plot(filtered_data.groupby('Year').size().reset_index(name='Nombre de Publications')['Year'], filtered_data.groupby('Year').size().reset_index(name='Nombre de Publications')['Nombre de Publications'], marker='o', color='skyblue')), |
|
("Auteurs les Plus Cités", lambda ax: ax.bar(filtered_data.groupby('Authors')['Citation Count'].sum().reset_index().sort_values(by='Citation Count', ascending=False).head(10)['Authors'], filtered_data.groupby('Authors')['Citation Count'].sum().reset_index().sort_values(by='Citation Count', ascending=False).head(10)['Citation Count'], color='skyblue', edgecolor='black')), |
|
("Sujets les Plus Publiés", lambda ax: ax.bar(filtered_data['Keyword'].value_counts().head(10).index, filtered_data['Keyword'].value_counts().head(10), color='skyblue', edgecolor='black')) |
|
] |
|
|
|
for i, (title, plot_func) in enumerate(plots): |
|
fig, ax = plt.subplots() |
|
plot_func(ax) |
|
ax.set_title(title) |
|
|
|
if 'Nombre de Publications' not in title and 'Mot-Clé' not in title: |
|
ax.set_xlabel('Année') |
|
ax.set_ylabel('Nombre de Citations' if 'Citations' in title else 'Nombre de Publications') |
|
|
|
plt.tight_layout() |
|
|
|
|
|
img_buffer = BytesIO() |
|
fig.savefig(img_buffer, format='png') |
|
img_buffer.seek(0) |
|
c.drawImage(ImageReader(img_buffer), 50, height - 320 - 150 * (i + 1), width=500, height=120) |
|
|
|
c.save() |
|
buffer.seek(0) |
|
return buffer |
|
|
|
|
|
if st.sidebar.button("Générer le Rapport PDF"): |
|
years = [2020, 2021, 2022] |
|
keywords = ["NLP", "Artificial Intelligence"] |
|
author = "John Doe" |
|
|
|
|
|
data = pd.read_csv('scopus_data_all_cleaned.csv') |
|
filtered_data = data[(data['Year'].isin(years)) & (data['Keyword'].isin(keywords))] |
|
|
|
buffer = generate_pdf(data, filtered_data, years, keywords, author) |
|
|
|
st.sidebar.success("Rapport PDF généré avec succès !") |
|
st.sidebar.download_button( |
|
label="Télécharger le PDF", |
|
data=buffer, |
|
file_name="rapport_publications_scientifiques.pdf", |
|
mime="application/pdf" |
|
) |
|
|
|
data = pd.read_csv('scopus_data_all_cleaned.csv') |
|
|
|
|
|
st.title("Analyse des Publications Scientifiques avec l'API Scopus") |
|
st.markdown(""" |
|
Ce tableau de bord vous permet d'analyser les publications scientifiques récupérées depuis l'API Scopus. |
|
Utilisez les filtres pour explorer les données et visualiser différentes statistiques. |
|
""") |
|
|
|
|
|
menu = ["Statistiques Générales", "Visualisations"] |
|
choice = st.sidebar.selectbox("Menu", menu) |
|
|
|
|
|
years = st.sidebar.multiselect('Sélectionnez les années', options=data['Year'].unique(), default=data['Year'].unique()) |
|
keywords = st.sidebar.multiselect('Sélectionnez les mots-clés', options=data['Keyword'].unique(), default=data['Keyword'].unique()) |
|
author = st.sidebar.text_input('Rechercher par auteur') |
|
|
|
|
|
filtered_data = data[(data['Year'].isin(years)) & (data['Keyword'].isin(keywords))] |
|
if author: |
|
filtered_data = filtered_data[filtered_data['Authors'].str.contains(author, case=False, na=False)] |
|
|
|
if choice == "Statistiques Générales": |
|
st.subheader("Données Filtrées") |
|
st.write(filtered_data) |
|
|
|
total_publications = len(filtered_data) |
|
total_citations = filtered_data['Citation Count'].sum() |
|
avg_citations_per_publication = filtered_data['Citation Count'].mean() |
|
top_cited_publication = filtered_data.loc[filtered_data['Citation Count'].idxmax()] |
|
|
|
st.subheader("Statistiques") |
|
st.write(f"**Nombre total de publications :** {total_publications}") |
|
st.write(f"**Nombre total de citations :** {total_citations}") |
|
st.write(f"**Citations moyennes par publication :** {avg_citations_per_publication:.2f}") |
|
st.write("**Publication avec le plus de citations :**") |
|
st.write(top_cited_publication) |
|
|
|
if st.button("Télécharger le rapport en PDF"): |
|
buffer = generate_pdf(data, filtered_data, years, keywords, author) |
|
st.download_button( |
|
label="Télécharger le PDF", |
|
data=buffer, |
|
file_name="rapport_publications_scientifiques.pdf", |
|
mime="application/pdf" |
|
) |
|
else: |
|
st.subheader("Visualisations") |
|
|
|
fig, ax = plt.subplots() |
|
ax.hist(filtered_data['Citation Count'], bins=20, color='skyblue', edgecolor='black') |
|
ax.set_title("Distribution des Citations par Publication") |
|
ax.set_xlabel("Nombre de Citations") |
|
ax.set_ylabel("Nombre de Publications") |
|
st.pyplot(fig) |
|
|
|
citations_per_year = filtered_data.groupby('Year')['Citation Count'].sum().reset_index() |
|
fig, ax = plt.subplots() |
|
ax.plot(citations_per_year['Year'], citations_per_year['Citation Count'], marker='o', color='skyblue') |
|
ax.set_title("Citations par Année") |
|
ax.set_xlabel("Année") |
|
ax.set_ylabel("Nombre de Citations") |
|
st.pyplot(fig) |
|
|
|
if author: |
|
publications_per_author = filtered_data['Authors'].value_counts().reset_index() |
|
publications_per_author.columns = ['Auteur', 'Nombre de Publications'] |
|
fig, ax = plt.subplots() |
|
ax.bar(publications_per_author['Auteur'], publications_per_author['Nombre de Publications'], color='skyblue', edgecolor='black') |
|
ax.set_title("Répartition des Publications par Auteur") |
|
ax.set_xlabel("Auteur") |
|
ax.set_ylabel("Nombre de Publications") |
|
ax.tick_params(axis='x', rotation=90) |
|
st.pyplot(fig) |
|
|
|
fig, ax = plt.subplots() |
|
ax.scatter(filtered_data['Year'], filtered_data['Citation Count'], color='skyblue', edgecolor='black') |
|
ax.set_title("Corrélation entre le Nombre de Citations et les Années de Publication") |
|
ax.set_xlabel("Année") |
|
ax.set_ylabel("Nombre de Citations") |
|
st.pyplot(fig) |
|
|
|
publications_per_keyword = filtered_data['Keyword'].value_counts().reset_index() |
|
publications_per_keyword.columns = ['Keyword', 'Nombre de Publications'] |
|
fig, ax = plt.subplots() |
|
ax.pie(publications_per_keyword['Nombre de Publications'], labels=publications_per_keyword['Keyword'], autopct='%1.1f%%', colors=plt.cm.Paired(range(len(publications_per_keyword)))) |
|
ax.set_title("Nombre de Publications par Mot-Clé") |
|
st.pyplot(fig) |
|
|
|
publications_per_year = filtered_data.groupby('Year').size().reset_index(name='Nombre de Publications') |
|
fig, ax = plt.subplots() |
|
ax.plot(publications_per_year['Year'], publications_per_year['Nombre de Publications'], marker='o', color='skyblue') |
|
ax.set_title("Nombre de Publications par Année") |
|
ax.set_xlabel("Année") |
|
ax.set_ylabel("Nombre de Publications") |
|
st.pyplot(fig) |
|
|
|
top_authors = filtered_data.groupby('Authors')['Citation Count'].sum().reset_index().sort_values(by='Citation Count', ascending=False).head(10) |
|
fig, ax = plt.subplots() |
|
ax.bar(top_authors['Authors'], top_authors['Citation Count'], color='skyblue', edgecolor='black') |
|
ax.set_title("Auteurs les Plus Cités") |
|
ax.set_xlabel("Auteur") |
|
ax.set_ylabel("Nombre de Citations") |
|
ax.tick_params(axis='x', rotation=90) |
|
st.pyplot(fig) |
|
|
|
top_keywords = filtered_data['Keyword'].value_counts().reset_index().head(10) |
|
top_keywords.columns = ['Keyword', 'Nombre de Publications'] |
|
fig, ax = plt.subplots() |
|
ax.bar(top_keywords['Keyword'], top_keywords['Nombre de Publications'], color='skyblue', edgecolor='black') |
|
ax.set_title("Sujets les Plus Publiés") |
|
ax.set_xlabel("Mot-Clé") |
|
ax.set_ylabel("Nombre de Publications") |
|
ax.tick_params(axis='x', rotation=90) |
|
st.pyplot(fig) |
|
|