1 | # Licensed to the StackStorm, Inc ('StackStorm') under one or more |
||
2 | # contributor license agreements. See the NOTICE file distributed with |
||
3 | # this work for additional information regarding copyright ownership. |
||
4 | # The ASF licenses this file to You under the Apache License, Version 2.0 |
||
5 | # (the "License"); you may not use this file except in compliance with |
||
6 | # the License. You may obtain a copy of the License at |
||
7 | # |
||
8 | # http://www.apache.org/licenses/LICENSE-2.0 |
||
9 | # |
||
10 | # Unless required by applicable law or agreed to in writing, software |
||
11 | # distributed under the License is distributed on an "AS IS" BASIS, |
||
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||
13 | # See the License for the specific language governing permissions and |
||
14 | # limitations under the License. |
||
15 | |||
16 | from __future__ import absolute_import |
||
17 | from jsonpath_rw import parse |
||
18 | import re |
||
19 | |||
20 | # A simple expression is defined as a series of letters [a-zA-Z], numbers [0-9], |
||
21 | # dashes '-', and underscores '_' separated by one period '.'. |
||
22 | # A simple expression must not start with a period. |
||
23 | # A simple expression must not end with a period. |
||
24 | # A simple expression must not have more than one period in succession, ie. in |
||
25 | # between each period must be one or more of the valid non-period characters. |
||
26 | # |
||
27 | # Examples of valid "simple expressions": |
||
28 | # abc |
||
29 | # abc.def.ghi |
||
30 | # |
||
31 | # Examples of non-simple expressions: |
||
32 | # .aaa |
||
33 | # a..b |
||
34 | # abc. |
||
35 | # a(* |
||
36 | SIMPLE_EXPRESSION_REGEX = "^([a-zA-Z0-9\-_]+\.)*([a-zA-Z0-9\-_]+)$" |
||
0 ignored issues
–
show
A suspicious escape sequence
\. was found. Did you maybe forget to add an r prefix?
Escape sequences in Python are generally interpreted according to rules similar
to standard C. Only if strings are prefixed with The escape sequence that was used indicates that you might have intended to write a regular expression. Learn more about the available escape sequences. in the Python documentation.
Loading history...
|
|||
37 | SIMPLE_EXPRESSION_REGEX_CMPL = re.compile(SIMPLE_EXPRESSION_REGEX) |
||
38 | |||
39 | |||
40 | def _get_value_simple(doc, key): |
||
41 | """ |
||
42 | Extracts a value from a nested set of dictionaries 'doc' based on |
||
43 | a 'key' string. |
||
44 | The key string is expected to be of the format 'x.y.z' |
||
45 | where each component in the string is a key in a dictionary separated |
||
46 | by '.' to denote the next key is in a nested dictionary. |
||
47 | |||
48 | Returns the extracted value from the key specified (if found) |
||
49 | Returns None if the key can not be found |
||
50 | """ |
||
51 | split_key = key.split('.') |
||
52 | if not split_key: |
||
53 | return None |
||
54 | |||
55 | value = doc |
||
56 | for k in split_key: |
||
57 | if isinstance(value, dict) and k in value: |
||
58 | value = value[k] |
||
59 | else: |
||
60 | return None |
||
61 | return value |
||
62 | |||
63 | |||
64 | def _get_value_complex(doc, key): |
||
65 | """ |
||
66 | Extracts a value from a nested set of dictionaries 'doc' based on |
||
67 | a 'key' string. |
||
68 | The key is expected to be a jsonpath_rw expression: |
||
69 | http://jsonpath-rw.readthedocs.io/en/stable/ |
||
70 | |||
71 | Returns the extracted value from the key specified (if found) |
||
72 | Returns None if the key can not be found |
||
73 | """ |
||
74 | jsonpath_expr = parse(key) |
||
75 | matches = jsonpath_expr.find(doc) |
||
76 | value = None if len(matches) < 1 else matches[0].value |
||
77 | return value |
||
78 | |||
79 | |||
80 | def get_value(doc, key): |
||
81 | if not key: |
||
82 | raise ValueError("key is None or empty: '{}'".format(key)) |
||
83 | |||
84 | if not isinstance(doc, dict): |
||
85 | raise ValueError("doc is not an instance of dict: type={} value='{}'".format(type(doc), |
||
86 | doc)) |
||
87 | # jsonpath_rw can be very slow when processing expressions. |
||
88 | # In the case of a simple expression we've created a "fast path" that avoids |
||
89 | # the complexity introduced by running jsonpath_rw code. |
||
90 | # For more complex expressions we fall back to using jsonpath_rw. |
||
91 | # This provides flexibility and increases performance in the base case. |
||
92 | match = SIMPLE_EXPRESSION_REGEX_CMPL.match(key) |
||
93 | if match: |
||
94 | return _get_value_simple(doc, key) |
||
95 | else: |
||
96 | return _get_value_complex(doc, key) |
||
97 | |||
98 | |||
99 | def get_kvps(doc, keys): |
||
100 | """ |
||
101 | Extracts one or more keys ('keys' can be a string or list of strings) |
||
102 | from the dictionary 'doc'. |
||
103 | |||
104 | Return a subset of 'doc' with only the 'keys' specified as members, all |
||
105 | other data in the dictionary will be filtered out. |
||
106 | Return an empty dict if no keys are found. |
||
107 | """ |
||
108 | if not isinstance(keys, list): |
||
109 | keys = [keys] |
||
110 | |||
111 | new_doc = {} |
||
112 | for key in keys: |
||
113 | value = get_value(doc, key) |
||
114 | if value is not None: |
||
115 | nested = new_doc |
||
116 | while '.' in key: |
||
117 | attr = key[:key.index('.')] |
||
118 | if attr not in nested: |
||
119 | nested[attr] = {} |
||
120 | nested = nested[attr] |
||
121 | key = key[key.index('.') + 1:] |
||
122 | nested[key] = value |
||
123 | |||
124 | return new_doc |
||
125 |
Escape sequences in Python are generally interpreted according to rules similar to standard C. Only if strings are prefixed with
r
orR
are they interpreted as regular expressions.The escape sequence that was used indicates that you might have intended to write a regular expression.
Learn more about the available escape sequences. in the Python documentation.