page_links - Code Metrics - erivlis/graphinate - Measure and Improve Code Quality continuously with Scrutinizer

page_links A
last analyzed 2026-01-10 17:55 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	68
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	7
eloc	38
dl	0
loc	68
rs	10
c	0
b	0
f	0

1 Function

Rating	Name	Duplication	Size	Complexity
B	page_links_graph_model()	0	43	7

from urllib.parse import urlparse

import requests
from bs4 import BeautifulSoup
from loguru import logger

import graphinate

DEFAULT_MAX_DEPTH = 0


def page_links_graph_model(max_depth: int = DEFAULT_MAX_DEPTH):
    """
    Create a graph model based on page links.

    Args:
        max_depth (int, optional): The maximum depth to crawl for page links. Defaults to DEFAULT_MAX_DEPTH.

    Returns:
        GraphModel: A graph model representing the page links.
    """

    def _links(url: str, depth=0, **kwargs):
        reqs = requests.get(url)
        logger.debug('Analyzing Page: {url}')
        soup = BeautifulSoup(reqs.text, 'lxml')
        logger.debug('Done Analyzing Page: {url}')
        for link in soup.find_all('a', href=True):
            child_url = link.get('href')

            if child_url.startswith('javascript:'):  # Skip JavaScript links
                continue

            if child_url.startswith('//'):  # Handle protocol-relative URLs
                child_url = f"https:{child_url}"

            if not bool(urlparse(child_url).netloc):  # Skip relative URLs
                # child_url = urljoin(url, child_url)
                continue

            if not child_url.startswith('http'):  # Skip non-HTTP URLs
                continue

            yield {'source': url, 'target': child_url}
            if depth < max_depth:
                yield from _links(child_url, depth=depth + 1, **kwargs)

    graph_model = graphinate.model(name='Web')

    @graph_model.edge()
    def link(url, **kwargs):
        yield from _links(url, **kwargs)

    return graph_model


if __name__ == '__main__':
    model = page_links_graph_model(1)

    params = {
        # 'url': 'https://github.com/erivlis/graphinate'
        'url': 'https://erivlis.github.io/graphinate/'
    }

    builder = graphinate.builders.GraphQLBuilder(model, graph_type=graphinate.GraphType.DiGraph)
    schema = builder.build(default_node_attributes={'type': 'url'}, **params)
    graphinate.graphql.server(schema)


1			from urllib.parse import urlparse
2
3			import requests
4			from bs4 import BeautifulSoup
5			from loguru import logger
6
7			import graphinate
8
9			DEFAULT_MAX_DEPTH = 0
10
11
12			def page_links_graph_model(max_depth: int = DEFAULT_MAX_DEPTH):
13			"""
14			Create a graph model based on page links.
15
16			Args:
17			max_depth (int, optional): The maximum depth to crawl for page links. Defaults to DEFAULT_MAX_DEPTH.
18
19			Returns:
20			GraphModel: A graph model representing the page links.
21			"""
22
23			def _links(url: str, depth=0, **kwargs):
24			reqs = requests.get(url)
25			logger.debug('Analyzing Page: {url}')
26			soup = BeautifulSoup(reqs.text, 'lxml')
27			logger.debug('Done Analyzing Page: {url}')
28			for link in soup.find_all('a', href=True):
29			child_url = link.get('href')
30
31			if child_url.startswith('javascript:'): # Skip JavaScript links
32			continue
33
34			if child_url.startswith('//'): # Handle protocol-relative URLs
35			child_url = f"https:{child_url}"
36
37			if not bool(urlparse(child_url).netloc): # Skip relative URLs
38			# child_url = urljoin(url, child_url)
39			continue
40
41			if not child_url.startswith('http'): # Skip non-HTTP URLs
42			continue
43
44			yield {'source': url, 'target': child_url}
45			if depth < max_depth:
46			yield from _links(child_url, depth=depth + 1, **kwargs)
47
48			graph_model = graphinate.model(name='Web')
49
50			@graph_model.edge()
51			def link(url, **kwargs):
52			yield from _links(url, **kwargs)
53
54			return graph_model
55
56
57			if __name__ == '__main__':
58			model = page_links_graph_model(1)
59
60			params = {
61			# 'url': 'https://github.com/erivlis/graphinate'
62			'url': 'https://erivlis.github.io/graphinate/'
63			}
64
65			builder = graphinate.builders.GraphQLBuilder(model, graph_type=graphinate.GraphType.DiGraph)
66			schema = builder.build(default_node_attributes={'type': 'url'}, **params)
67			graphinate.graphql.server(schema)
68

erivlis / graphinate

page_links A last analyzed 2026-01-10 17:55 UTC

Complexity

Size/Duplication

Importance

1 Function

Duplication Side-by-Side

Filter issues like

page_links A
last analyzed 2026-01-10 17:55 UTC