Home > News
News
26.09.2025
Working around references and codes these days, I noticed that some articles were wrongly included because the Scopus search string was mixing author addresses, with the keywords emergency, department, and Switzerland appearing in different authors’ affiliations rather than together. To address this, I wrote another code (Python, see below) for the XML export from EndNote (which I currently use as a backup and for retrieving updates on references) to screen for the keywords within the same affiliation. After reprocessing, I updated the whole database and removed around 600 references. As of today, the database includes 2,174 references.
import xml.etree.ElementTree as ET
import re
def get_text_recursive(elem):
if elem is None:
return ""
return "".join(elem.itertext()).strip()
def clean_ris_field(text):
return text.replace("\n", " ").replace("\r", " ").strip()
def affiliation_matches_flexible(affil, keywords):
"""Return True if all keywords appear in the string (case-insensitive), ignoring punctuation."""
affil_clean = re.sub(r"[^\w\s]", "", affil.lower())
affil_clean = re.sub(r"\s+", " ", affil_clean)
return all(k.lower() in affil_clean for k in keywords)
# Keywords for filtering
keywords = ["emergency", "department", "switzerland"]
# Mapping EndNote ref-type numbers → RIS codes
ref_type_mapping = {
"17": "JOUR", # Journal Article
"6": "BOOK", # Book
"7": "CHAP", # Book Section
"10": "CONF", # Conference Paper
"2": "THES", # Thesis
"34": "ELEC", # Web Page
}
# Load XML
tree = ET.parse("EMresearch_last.xml")
root = tree.getroot()
ris_records = []
count = 0
for record in root.findall(".//record"):
authors = record.findall(".//contributors/authors/author")
if not authors:
continue
# Collect affiliations
auth_address_elem = record.find(".//auth-address")
if auth_address_elem is not None:
all_affils_text = get_text_recursive(auth_address_elem)
affil_segments = [seg.strip() for seg in all_affils_text.replace('\r', ';').split(';') if seg.strip()]
else:
affil_segments = []
# Check if at least one affiliation matches
if not any(affiliation_matches_flexible(a, keywords) for a in affil_segments):
continue # Skip this record
count += 1 # Increment count for matching article
# Build RIS record
ris_lines = []
# Reference type
ref_type_elem = record.find("ref-type")
ref_type_num = get_text_recursive(ref_type_elem) if ref_type_elem is not None else "17"
ref_type_code = ref_type_mapping.get(ref_type_num, "JOUR")
ris_lines.append(f"TY - {ref_type_code}")
# Authors
for author_elem in authors:
author_name = get_text_recursive(author_elem)
if author_name:
ris_lines.append(f"AU - {clean_ris_field(author_name)}")
# Affiliations
for affil in affil_segments:
ris_lines.append(f"AD - {clean_ris_field(affil)}")
# Title
title_elem = record.find(".//titles/title")
if title_elem is not None:
ris_lines.append(f"TI - {clean_ris_field(get_text_recursive(title_elem))}")
# Journal
journal_elem = record.find(".//periodical/full-title")
if journal_elem is not None:
ris_lines.append(f"JO - {clean_ris_field(get_text_recursive(journal_elem))}")
# Volume
volume_elem = record.find("volume")
if volume_elem is not None:
ris_lines.append(f"VL - {clean_ris_field(get_text_recursive(volume_elem))}")
# Issue / Number
number_elem = record.find("number")
if number_elem is not None:
ris_lines.append(f"IS - {clean_ris_field(get_text_recursive(number_elem))}")
# Pages
pages_elem = record.find("pages")
if pages_elem is not None:
ris_lines.append(f"SP - {clean_ris_field(get_text_recursive(pages_elem))}")
# Year
year_elem = record.find(".//dates/year")
if year_elem is not None:
ris_lines.append(f"PY - {clean_ris_field(get_text_recursive(year_elem))}")
# Edition → ET
edition_elem = record.find("edition")
if edition_elem is not None:
ris_lines.append(f"ET - {clean_ris_field(get_text_recursive(edition_elem))}")
# DOI
doi_elem = record.find("electronic-resource-num")
if doi_elem is not None:
ris_lines.append(f"DO - {clean_ris_field(get_text_recursive(doi_elem))}")
# Abstract → AB
abstract_elem = record.find("abstract")
if abstract_elem is not None:
ris_lines.append(f"AB - {clean_ris_field(get_text_recursive(abstract_elem))}")
# Notes → N1
notes_elem = record.find("notes")
if notes_elem is not None:
ris_lines.append(f"N1 - {clean_ris_field(get_text_recursive(notes_elem))}")
# Keywords → KW
for kw_elem in record.findall(".//keywords/keyword"):
ris_lines.append(f"KW - {clean_ris_field(get_text_recursive(kw_elem))}")
# ISSN / ISBN → SN
isbn_elem = record.find("isbn")
if isbn_elem is not None:
for sn in get_text_recursive(isbn_elem).splitlines():
ris_lines.append(f"SN - {clean_ris_field(sn)}")
# URLs → UR
for url_elem in record.findall(".//urls//url"):
ris_lines.append(f"UR - {clean_ris_field(get_text_recursive(url_elem))}")
# Accession Number → AN
acc_num_elem = record.find("accession-num")
if acc_num_elem is not None:
ris_lines.append(f"AN - {clean_ris_field(get_text_recursive(acc_num_elem))}")
# Record Number → ID
rec_num_elem = record.find("rec-number")
if rec_num_elem is not None:
ris_lines.append(f"ID - {clean_ris_field(get_text_recursive(rec_num_elem))}")
# PMC / Related IDs → L2
custom2_elem = record.find("custom2")
if custom2_elem is not None:
ris_lines.append(f"L2 - {clean_ris_field(get_text_recursive(custom2_elem))}")
# End of record
ris_lines.append("ER - ")
ris_records.append("\n".join(ris_lines))
# Save RIS file
with open("filtered_articles.ris", "w", encoding="utf-8") as f:
f.write("\n".join(ris_records))
print(f"RIS file saved with {len(ris_records)} records.")
Next steps: update both scripts to screen directly from the Scopus export, and explore alternatives to EndNote for updating references online (with the aim of keeping the workflow fully open-source). Zotero seems to be developing a feature for updating references by DOI (see: https://forums.zotero.org/discussion/101896/updating-references-based-on-their-doi), although progress is apparently slow. This project might also be useful: https://github.com/northword/zotero-format-metadata.
Finally, besides the change from ET to DA (since Zotero does not recognize ET), I also replace AD with N1(so putting a second note beginning with "AFFILIATIONS") in bash, because Zotero otherwise does not recognize AD (or C1, or N2, etc.).
sed -e '/^DA - /d' -e 's/^ET - /DA - /' -e 's/^AD - /N1 - AFFILIATIONS: /' endnote.ris > zotero.ris
25.09.2025
Senior authors update: The list is now generated automatically from an XML export of the library (EMresearch_last). Because researchers’ affiliations can change over the course of their careers, potentially shifting their domain or specialty, the script (see below, python) captures only authors who appear at least four times with affiliations containing "emergency", "department", and "switzerland".
A remaining unresolved issue, possibly not fully solvable, is handling multiple affiliations. The XML lists affiliations are separated by semicolons, without explicitly associating them to specific authors. By taking the last affiliation, we ensure a match for the last author, but any other affiliations in that segment that also belong to the last author cannot be reliably assigned.
import xml.etree.ElementTree as ET
from collections import Counter, OrderedDict
import csv
def get_text_recursive(elem):
"""Recursively extract all text from an element."""
if elem is None:
return ""
return "".join(elem.itertext()).strip()
def affiliation_matches_flexible(affil, keywords):
"""Check if all keywords appear anywhere in the string (case-insensitive)."""
affil_lower = affil.lower()
return all(k.lower() in affil_lower for k in keywords)
# Keywords for filtering
keywords = ["emergency", "department", "switzerland"]
# Load XML
tree = ET.parse("EMresearch_last.xml")
root = tree.getroot()
# Step 1: Collect last authors with their matching-affiliation records
matching_records = []
for record in root.findall(".//record"):
rec_number_elem = record.find("rec-number")
rec_number = get_text_recursive(rec_number_elem) if rec_number_elem is not None else ""
authors = record.findall(".//contributors/authors/author")
if not authors:
continue
last_author = get_text_recursive(authors[-1])
auth_address_elem = record.find(".//auth-address")
if auth_address_elem is not None:
all_affils_text = get_text_recursive(auth_address_elem)
# Split on line breaks or semicolons
affil_segments = [seg.strip() for seg in all_affils_text.replace('\r', ';').split(';') if seg.strip()]
else:
affil_segments = []
# Map last author to corresponding affiliation segment
if len(affil_segments) >= len(authors):
last_author_affil = affil_segments[len(authors)-1]
else:
last_author_affil = affil_segments[-1] if affil_segments else "N/A"
# Keep only records whose affiliation matches keywords
if affiliation_matches_flexible(last_author_affil, keywords):
matching_records.append({
"last_author": last_author,
"affiliation": last_author_affil,
"rec_number": rec_number
})
# Step 2: Count occurrences of last authors in matching records only
author_counts = Counter(rec["last_author"] for rec in matching_records)
# Step 3: Keep only authors appearing >=4 times with matching affiliation
filtered_authors = {author for author, count in author_counts.items() if count >= 4}
# Step 4: Keep unique authors with first matching affiliation
unique_authors = OrderedDict()
for rec in matching_records:
author = rec["last_author"]
if author in filtered_authors and author not in unique_authors:
unique_authors[author] = {
"affiliation": rec["affiliation"],
"rec_number": rec["rec_number"]
}
# Step 5: Write CSV
with open("senior_authors.csv", "w", newline="", encoding="utf-8") as csvfile:
fieldnames = ["Last Author", "Affiliation", "Record Number"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for author, info in unique_authors.items():
writer.writerow({
"Last Author": author,
"Affiliation": info["affiliation"],
"Record Number": info["rec_number"]
})
print(f"CSV saved as senior_authors.csv with {len(unique_authors)} entries.")
17.04.2025
References were updated (as usual, weekly), however with all dates corrected using the RIS field "DA" (derived from "ET") instead of "PY" for better importing into Zotero and, consequently, into MySQL with the ZotSpip plugin. I am working on a bash script to automatically select senior researchers (i.e. last author with affiliation to a Swiss emergency department and at least 4 articles published) from the RIS file exported from Scopus. I am still making progress, but I’m improving (full names not working, some known authors are missing). If you have any suggestions, please contact me via email. Here is the script so far:
#!/bin/bash
file="$1"
awk '
BEGIN {
RS = "ER -"; FS = "\n"; IGNORECASE = 1
tag_re = "^[A-Z0-9]{2} -"
}
{
last_au = ""
n1_block = ""
in_n1 = 0
ad_block = ""
ad_last = ""
for (i = 1; i <= NF; i++) {
line = $i
if (line ~ /^AU - /) {
last_au = substr(line, 7)
}
if (line ~ /^AD - /) {
ad_block = substr(line, 7)
} else if (ad_block != "" && line !~ tag_re) {
ad_block = ad_block "\n" line
} else if (line ~ tag_re && ad_block != "") {
split(ad_block, ad_lines, "\n")
ad_last = ad_lines[length(ad_lines)]
ad_block = ""
}
if (line ~ /^N1 - /) {
in_n1 = 1
n1_block = substr(line, 7)
} else if (in_n1 && line !~ tag_re) {
n1_block = n1_block "\n" line
} else if (line ~ tag_re) {
in_n1 = 0
}
}
# Final AD block if at end
if (ad_block != "") {
split(ad_block, ad_lines, "\n")
ad_last = ad_lines[length(ad_lines)]
}
lc_ad = tolower(ad_last)
if (lc_ad ~ /switzerland/ && lc_ad ~ /emergency/ && lc_ad ~ /department/) {
full_name = ""
n = split(n1_block, n1_lines, "\n")
for (j = 1; j <= n; j++) {
if (n1_lines[j] ~ last_au) {
full_name = n1_lines[j]
break
}
}
# Count the qualifying last author
count[last_au]++
fullname[last_au] = full_name
lastaffil[last_au] = ad_last
}
}
END {
# Sort authors alphabetically and print their details
for (a in count) {
if (count[a] >= 4) {
# Print the author and their affiliation in alphabetical order
authors_sorted[a] = fullname[a]
}
}
# Sort and print authors alphabetically
n = asorti(authors_sorted, sorted_authors)
for (i = 1; i <= n; i++) {
author = sorted_authors[i]
print author
print lastaffil[author]
print "---"
}
}
' "$file"
30.01.2025
Updated the references using both Scopus and PubMed :-) Unfortunately, however not using open-source/free software at the moment :-( I first imported the library from Zotero into EndNote (X9), then imported the PubMed search results (see search string below), with initially around 5,400 items, without duplicates (EndNote function), resulting in 150 items remaining (after some manual fixing because of some wrong lines). I then exported the list to Excel (csv), where the affiliations were stored in a single cell but across multiple lines. To search for specific affiliations (keywords listed below), I used VBA:
Function CheckKeywordsCHED(cell As Range) As String
Dim lines As Variant, line As Variant
' Split cell content into individual lines
lines = Split(cell.Value, vbLf)
' Loop through each line
For Each line In lines
' Check if all the keywords appear in a single line
If InStr(1, line, "emergency", vbTextCompare) > 0 And _
InStr(1, line, "department", vbTextCompare) > 0 And _
InStr(1, line, "switzerland", vbTextCompare) > 0 Then
CheckKeywordsInLine = "Match"
Exit Function ' Stop checking once a match is found
End If
Next line
' If no match was found in any line
CheckKeywordsInLine = "No Match"
End Function
I selected the "matched" references in EndNote and re-imported them into Zotero. After deduplication, 28 additional articles were identified. I’m not sure if this approach could be useful in the future, but it demonstrates an alternative search strategy. In any case, the Traumox2 trial was correctly identified ;-)
18.01.2025
Updated the references. I am currently working on an R script to search PubMed using the easyPubMed library. The script includes XML parsing to filter affiliations and expand the database. However, I am still encountering challenges due to the heterogeneity of the data in PubMed. Progress is being made, and I am getting closer to a solution.
For your reference, following the latest draft. If you would like to assist or have any suggestions to address this issue, please feel free to reach out to me at info@emresearch.ch.
library(easyPubMed)
library(XML)
# Search PubMed for articles with the query
query <- "(Emergency[Affiliation] AND Department[Affiliation] AND Switzerland[Affiliation])"
pubmed_ids <- get_pubmed_ids(query)
# Fetch article metadata
xml_data <- fetch_pubmed_data(pubmed_ids)
# Parse the XML data to extract affiliations
parsed_data <- xmlParse(xml_data)
affiliations <- xpathSApply(parsed_data, "//Affiliation", xmlValue)
# Filter for affiliations containing all three keywords from the query for each single author and exclude the same terms as in the Scopus query
filtered_affiliations <- affiliations[
grepl("Emergency", affiliations, ignore.case = TRUE) &
grepl("Department", affiliations, ignore.case = TRUE) &
grepl("Switzerland", affiliations, ignore.case = TRUE) &
!grepl("intensive", affiliations, ignore.case = TRUE) &
!grepl("critical", affiliations, ignore.case = TRUE) &
!grepl("anesth*", affiliations, ignore.case = TRUE) &
!grepl("anaesth*", affiliations, ignore.case = TRUE) &
!grepl("psych*", affiliations, ignore.case = TRUE) &
!grepl("radiol*", affiliations, ignore.case = TRUE)
]
20.12.2024
Updated the references and senior researchers’ list. The default citation style is now the Chicago Manual of Style for better readability (removing full links and access tags). Full list of author names is included, aligning with the focus of this collection. Notably, some studies, such as TRAUMOX2, are published but not yet indexed in Scopus, though they are available in PubMed. I will monitor this and consider integrating a PubMed search if needed.
13.12.2024
Updated the references and standardized the publication date format to enable filtering by date and year, ensuring that the newest articles appear first. Senior researchers’ list was last updated on December 5, 2024. Updates are now scheduled weekly.
