Skip to content

Connect

download¤

append_connections(papers, connection_field_name='local__referenced_to') ¤

find connections between papers based on citation doi

Parameters:

Name Type Description Default
papers list

list of paper metadata

required
connection_field_name Optional[str]

name of the field to save the connection info

'local__referenced_to'

Returns:

Type Description
list

list of paper metadata with connection info

Source code in kirsche/connect.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def append_connections(
    papers: list, connection_field_name: Optional[str] = "local__referenced_to"
) -> list:
    """find connections between papers based on citation doi

    :param papers: list of paper metadata
    :param connection_field_name: name of the field to save the connection info
    :return: list of paper metadata with connection info
    """

    if connection_field_name is None:
        connection_field_name = "local__referenced_to"

    logger.debug(f"Appending connections to {len(papers)} papers...")

    enhanced_papers = []

    for ps in papers:
        # Convert doi to lower case
        ps_doi = ps.get("doi", "")
        if not ps_doi:
            ps_doi = ""
        ps_doi = ps_doi.lower()
        ps["doi"] = ps_doi

        # find references that are in the current papers list
        ps_references = ps["references"]
        ps_reference_dois = [
            psr.get("doi", "").lower() for psr in ps_references if psr.get("doi")
        ]
        ps_referenced_to = []
        for pt in papers:
            pt_doi = pt.get("doi", "")
            if not pt_doi:
                pt_doi = ""
            else:
                pt_doi = pt_doi.lower()
            if pt_doi in ps_reference_dois:
                ps_referenced_to.append(pt_doi)

        ps[connection_field_name] = ps_referenced_to
        enhanced_papers.append(ps)

    logger.debug(
        f"enhanced {len([p for p in enhanced_papers if p.get(connection_field_name)])}"
    )

    return enhanced_papers

append_connections_for_file(data_file, target=None, save_keys=None, connection_field_name='local__referenced_to') ¤

connect papers based on citation doi

Parameters:

Name Type Description Default
data_file Union[str, Path]

path to json file that contains the downloaded paper metadata

required
target Optional[Union[str, Path]]

path to json file to save the enhanced paper metadata

None
save_keys Optional[list]

list of keys to save from the original paper metadata

None
connection_field_name Optional[str]

name of the field to save the connection info

'local__referenced_to'

Returns:

Type Description

list of paper metadata with connection info

Source code in kirsche/connect.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def append_connections_for_file(
    data_file: Union[str, Path],
    target: Optional[Union[str, Path]] = None,
    save_keys: Optional[list] = None,
    connection_field_name: Optional[str] = "local__referenced_to",
):
    """connect papers based on citation doi

    :param data_file: path to json file that contains the downloaded paper metadata
    :param target: path to json file to save the enhanced paper metadata
    :param save_keys: list of keys to save from the original paper metadata
    :param connection_field_name: name of the field to save the connection info
    :return: list of paper metadata with connection info
    """

    if connection_field_name is None:
        connection_field_name = "local__referenced_to"

    papers = load_json(data_file)

    c_p = append_connections(papers, connection_field_name=connection_field_name)

    # Filter out unnecessary keys in the dictionary
    c_p = save_connected_papers(
        c_p,
        target=target,
        save_keys=save_keys,
        connection_field_name=connection_field_name,
    )

    return c_p