Skip to content

Download

download¤

download_metadata(unique_ids, target=None, sleep_time=1) ¤

Download paper data

Parameters:

Name Type Description Default
unique_ids list

list of unique ids to find paper metadata

required
target Optional[Union[str, Path]]

path to save data

None
sleep_time int

time to sleep between requests, defaults to 1

1
Source code in kirsche/download.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def download_metadata(
    unique_ids: list, target: Optional[Union[str, Path]] = None, sleep_time: int = 1
):
    """Download paper data

    :param unique_ids: list of unique ids to find paper metadata
    :param target: path to save data
    :param sleep_time: time to sleep between requests, defaults to 1
    """

    paper_info = []
    fail_flag = 0
    for doi in unique_ids:
        logger.debug(f"Getting info for {doi}")
        if fail_flag <= 10:
            try:
                doi_paper_info = get_paper_info(doi)
                paper_info.append(doi_paper_info)
            except Exception as e:
                logger.error(f"{doi} failed: {e}")
                fail_flag += 1
                continue
        else:
            logger.error(f"Failed too many times downloading data... breaking out")
            break

        time.sleep(sleep_time)

    if target:
        logger.debug(f"Saving to {target}")
        save_batch_json(paper_info, target)
        logger.debug(f"Saved to {target}")

    return paper_info

list_dois(paper_ids=None, bib_file=None) ¤

list_dois loads a list of DOIs from multiple possible sources

Parameters:

Name Type Description Default
paper_ids Optional[Union[list, str]]

list of DOIs

None
bib_file Optional[Union[str, Path]]

path to bib file

None

Returns:

Type Description
list

list of DOIs loaded

Source code in kirsche/download.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def list_dois(
    paper_ids: Optional[Union[list, str]] = None,
    bib_file: Optional[Union[str, Path]] = None,
) -> list:
    """
    list_dois loads a list of DOIs from multiple possible sources

    :param paper_ids: list of DOIs
    :param bib_file: path to bib file
    :return: list of DOIs loaded
    """
    if paper_ids:
        logger.debug(f"Using paper_ids directly...")
        if isinstance(paper_ids, str):
            dois = [paper_ids]
        else:
            dois = paper_ids
    elif bib_file:
        dois = get_unique_ids_from_bib(bib_file)
        logger.debug(f"Retrieved {len(dois)} from {bib_file}")
    else:
        logger.error(f"Specify one of the DOI sources...")
        dois = []

    logger.debug(f"{(len(dois))} DOIs: {dois}")

    return dois

list_unique_ids(bib_file) ¤

list_unique_ids loads a list of unique ids from multiple possible sources

Parameters:

Name Type Description Default
bib_file Union[str, Path]

path to bib file

required

Returns:

Type Description
list

list of unique ids loaded

Source code in kirsche/download.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def list_unique_ids(bib_file: Union[str, Path]) -> list:
    """
    list_unique_ids loads a list of unique ids from multiple possible sources

    :param bib_file: path to bib file
    :return: list of unique ids loaded
    """

    unique_ids = get_unique_ids_from_bib(bib_file)
    logger.debug(f"Retrieved {len(unique_ids)} from {bib_file}")

    logger.debug(f"{(len(unique_ids))} unique ids: {unique_ids}")

    return unique_ids

main(paper_id, bib_file, target, sleep_time) ¤

Download paper data from service provides (e.g., SemanticScholar)

Source code in kirsche/download.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
@click.command()
@click.option("--paper_id", "-p", help="Paper ID", multiple=True)
@click.option("--bib_file", "-b", help="Bib file path")
@click.option("--target", "-t", help="Target data file path")
@click.option("--sleep_time", "-s", default=1, help="Sleep time between requests")
def main(paper_id, bib_file, target, sleep_time):
    """Download paper data from service provides (e.g., SemanticScholar)"""

    if bib_file:
        paper_id = list_unique_ids(bib_file)
    elif isinstance(paper_id, str):
        paper_id = [paper_id]

    paper_info = download_metadata(paper_id, target, sleep_time)

    return paper_info