Test Failed
Push — main ( 973aa1...436074 )
by Christian
02:37
created

text.*Decoder.parseNext   F

Complexity

Conditions 83

Size

Total Lines 262
Code Lines 186

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 83
eloc 186
nop 1
dl 0
loc 262
rs 0
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like text.*Decoder.parseNext often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
// Copyright 2018 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
package text
6
7
import (
8
	"bytes"
9
	"fmt"
10
	"io"
11
	"regexp"
12
	"strconv"
13
	"unicode/utf8"
14
15
	"google.golang.org/protobuf/internal/errors"
16
)
17
18
// Decoder is a token-based textproto decoder.
19
type Decoder struct {
20
	// lastCall is last method called, either readCall or peekCall.
21
	// Initial value is readCall.
22
	lastCall call
23
24
	// lastToken contains the last read token.
25
	lastToken Token
26
27
	// lastErr contains the last read error.
28
	lastErr error
29
30
	// openStack is a stack containing the byte characters for MessageOpen and
31
	// ListOpen kinds. The top of stack represents the message or the list that
32
	// the current token is nested in. An empty stack means the current token is
33
	// at the top level message. The characters '{' and '<' both represent the
34
	// MessageOpen kind.
35
	openStack []byte
36
37
	// orig is used in reporting line and column.
38
	orig []byte
39
	// in contains the unconsumed input.
40
	in []byte
41
}
42
43
// NewDecoder returns a Decoder to read the given []byte.
44
func NewDecoder(b []byte) *Decoder {
45
	return &Decoder{orig: b, in: b}
46
}
47
48
// ErrUnexpectedEOF means that EOF was encountered in the middle of the input.
49
var ErrUnexpectedEOF = errors.New("%v", io.ErrUnexpectedEOF)
50
51
// call specifies which Decoder method was invoked.
52
type call uint8
53
54
const (
55
	readCall call = iota
56
	peekCall
57
)
58
59
// Peek looks ahead and returns the next token and error without advancing a read.
60
func (d *Decoder) Peek() (Token, error) {
61
	defer func() { d.lastCall = peekCall }()
62
	if d.lastCall == readCall {
63
		d.lastToken, d.lastErr = d.Read()
64
	}
65
	return d.lastToken, d.lastErr
66
}
67
68
// Read returns the next token.
69
// It will return an error if there is no valid token.
70
func (d *Decoder) Read() (Token, error) {
71
	defer func() { d.lastCall = readCall }()
72
	if d.lastCall == peekCall {
73
		return d.lastToken, d.lastErr
74
	}
75
76
	tok, err := d.parseNext(d.lastToken.kind)
77
	if err != nil {
78
		return Token{}, err
79
	}
80
81
	switch tok.kind {
82
	case comma, semicolon:
83
		tok, err = d.parseNext(tok.kind)
84
		if err != nil {
85
			return Token{}, err
86
		}
87
	}
88
	d.lastToken = tok
89
	return tok, nil
90
}
91
92
const (
93
	mismatchedFmt = "mismatched close character %q"
94
	unexpectedFmt = "unexpected character %q"
95
)
96
97
// parseNext parses the next Token based on given last kind.
98
func (d *Decoder) parseNext(lastKind Kind) (Token, error) {
99
	// Trim leading spaces.
100
	d.consume(0)
101
	isEOF := false
102
	if len(d.in) == 0 {
103
		isEOF = true
104
	}
105
106
	switch lastKind {
107
	case EOF:
108
		return d.consumeToken(EOF, 0, 0), nil
109
110
	case bof:
111
		// Start of top level message. Next token can be EOF or Name.
112
		if isEOF {
113
			return d.consumeToken(EOF, 0, 0), nil
114
		}
115
		return d.parseFieldName()
116
117
	case Name:
118
		// Next token can be MessageOpen, ListOpen or Scalar.
119
		if isEOF {
120
			return Token{}, ErrUnexpectedEOF
121
		}
122
		switch ch := d.in[0]; ch {
123
		case '{', '<':
124
			d.pushOpenStack(ch)
125
			return d.consumeToken(MessageOpen, 1, 0), nil
126
		case '[':
127
			d.pushOpenStack(ch)
128
			return d.consumeToken(ListOpen, 1, 0), nil
129
		default:
130
			return d.parseScalar()
131
		}
132
133
	case Scalar:
134
		openKind, closeCh := d.currentOpenKind()
135
		switch openKind {
136
		case bof:
137
			// Top level message.
138
			// 	Next token can be EOF, comma, semicolon or Name.
139
			if isEOF {
140
				return d.consumeToken(EOF, 0, 0), nil
141
			}
142
			switch d.in[0] {
143
			case ',':
144
				return d.consumeToken(comma, 1, 0), nil
145
			case ';':
146
				return d.consumeToken(semicolon, 1, 0), nil
147
			default:
148
				return d.parseFieldName()
149
			}
150
151
		case MessageOpen:
152
			// Next token can be MessageClose, comma, semicolon or Name.
153
			if isEOF {
154
				return Token{}, ErrUnexpectedEOF
155
			}
156
			switch ch := d.in[0]; ch {
157
			case closeCh:
158
				d.popOpenStack()
159
				return d.consumeToken(MessageClose, 1, 0), nil
160
			case otherCloseChar[closeCh]:
161
				return Token{}, d.newSyntaxError(mismatchedFmt, ch)
162
			case ',':
163
				return d.consumeToken(comma, 1, 0), nil
164
			case ';':
165
				return d.consumeToken(semicolon, 1, 0), nil
166
			default:
167
				return d.parseFieldName()
168
			}
169
170
		case ListOpen:
171
			// Next token can be ListClose or comma.
172
			if isEOF {
173
				return Token{}, ErrUnexpectedEOF
174
			}
175
			switch ch := d.in[0]; ch {
176
			case ']':
177
				d.popOpenStack()
178
				return d.consumeToken(ListClose, 1, 0), nil
179
			case ',':
180
				return d.consumeToken(comma, 1, 0), nil
181
			default:
182
				return Token{}, d.newSyntaxError(unexpectedFmt, ch)
183
			}
184
		}
185
186
	case MessageOpen:
187
		// Next token can be MessageClose or Name.
188
		if isEOF {
189
			return Token{}, ErrUnexpectedEOF
190
		}
191
		_, closeCh := d.currentOpenKind()
192
		switch ch := d.in[0]; ch {
193
		case closeCh:
194
			d.popOpenStack()
195
			return d.consumeToken(MessageClose, 1, 0), nil
196
		case otherCloseChar[closeCh]:
197
			return Token{}, d.newSyntaxError(mismatchedFmt, ch)
198
		default:
199
			return d.parseFieldName()
200
		}
201
202
	case MessageClose:
203
		openKind, closeCh := d.currentOpenKind()
204
		switch openKind {
205
		case bof:
206
			// Top level message.
207
			// Next token can be EOF, comma, semicolon or Name.
208
			if isEOF {
209
				return d.consumeToken(EOF, 0, 0), nil
210
			}
211
			switch ch := d.in[0]; ch {
212
			case ',':
213
				return d.consumeToken(comma, 1, 0), nil
214
			case ';':
215
				return d.consumeToken(semicolon, 1, 0), nil
216
			default:
217
				return d.parseFieldName()
218
			}
219
220
		case MessageOpen:
221
			// Next token can be MessageClose, comma, semicolon or Name.
222
			if isEOF {
223
				return Token{}, ErrUnexpectedEOF
224
			}
225
			switch ch := d.in[0]; ch {
226
			case closeCh:
227
				d.popOpenStack()
228
				return d.consumeToken(MessageClose, 1, 0), nil
229
			case otherCloseChar[closeCh]:
230
				return Token{}, d.newSyntaxError(mismatchedFmt, ch)
231
			case ',':
232
				return d.consumeToken(comma, 1, 0), nil
233
			case ';':
234
				return d.consumeToken(semicolon, 1, 0), nil
235
			default:
236
				return d.parseFieldName()
237
			}
238
239
		case ListOpen:
240
			// Next token can be ListClose or comma
241
			if isEOF {
242
				return Token{}, ErrUnexpectedEOF
243
			}
244
			switch ch := d.in[0]; ch {
245
			case closeCh:
246
				d.popOpenStack()
247
				return d.consumeToken(ListClose, 1, 0), nil
248
			case ',':
249
				return d.consumeToken(comma, 1, 0), nil
250
			default:
251
				return Token{}, d.newSyntaxError(unexpectedFmt, ch)
252
			}
253
		}
254
255
	case ListOpen:
256
		// Next token can be ListClose, MessageStart or Scalar.
257
		if isEOF {
258
			return Token{}, ErrUnexpectedEOF
259
		}
260
		switch ch := d.in[0]; ch {
261
		case ']':
262
			d.popOpenStack()
263
			return d.consumeToken(ListClose, 1, 0), nil
264
		case '{', '<':
265
			d.pushOpenStack(ch)
266
			return d.consumeToken(MessageOpen, 1, 0), nil
267
		default:
268
			return d.parseScalar()
269
		}
270
271
	case ListClose:
272
		openKind, closeCh := d.currentOpenKind()
273
		switch openKind {
274
		case bof:
275
			// Top level message.
276
			// Next token can be EOF, comma, semicolon or Name.
277
			if isEOF {
278
				return d.consumeToken(EOF, 0, 0), nil
279
			}
280
			switch ch := d.in[0]; ch {
281
			case ',':
282
				return d.consumeToken(comma, 1, 0), nil
283
			case ';':
284
				return d.consumeToken(semicolon, 1, 0), nil
285
			default:
286
				return d.parseFieldName()
287
			}
288
289
		case MessageOpen:
290
			// Next token can be MessageClose, comma, semicolon or Name.
291
			if isEOF {
292
				return Token{}, ErrUnexpectedEOF
293
			}
294
			switch ch := d.in[0]; ch {
295
			case closeCh:
296
				d.popOpenStack()
297
				return d.consumeToken(MessageClose, 1, 0), nil
298
			case otherCloseChar[closeCh]:
299
				return Token{}, d.newSyntaxError(mismatchedFmt, ch)
300
			case ',':
301
				return d.consumeToken(comma, 1, 0), nil
302
			case ';':
303
				return d.consumeToken(semicolon, 1, 0), nil
304
			default:
305
				return d.parseFieldName()
306
			}
307
308
		default:
309
			// It is not possible to have this case. Let it panic below.
310
		}
311
312
	case comma, semicolon:
313
		openKind, closeCh := d.currentOpenKind()
314
		switch openKind {
315
		case bof:
316
			// Top level message. Next token can be EOF or Name.
317
			if isEOF {
318
				return d.consumeToken(EOF, 0, 0), nil
319
			}
320
			return d.parseFieldName()
321
322
		case MessageOpen:
323
			// Next token can be MessageClose or Name.
324
			if isEOF {
325
				return Token{}, ErrUnexpectedEOF
326
			}
327
			switch ch := d.in[0]; ch {
328
			case closeCh:
329
				d.popOpenStack()
330
				return d.consumeToken(MessageClose, 1, 0), nil
331
			case otherCloseChar[closeCh]:
332
				return Token{}, d.newSyntaxError(mismatchedFmt, ch)
333
			default:
334
				return d.parseFieldName()
335
			}
336
337
		case ListOpen:
338
			if lastKind == semicolon {
339
				// It is not be possible to have this case as logic here
340
				// should not have produced a semicolon Token when inside a
341
				// list. Let it panic below.
342
				break
343
			}
344
			// Next token can be MessageOpen or Scalar.
345
			if isEOF {
346
				return Token{}, ErrUnexpectedEOF
347
			}
348
			switch ch := d.in[0]; ch {
349
			case '{', '<':
350
				d.pushOpenStack(ch)
351
				return d.consumeToken(MessageOpen, 1, 0), nil
352
			default:
353
				return d.parseScalar()
354
			}
355
		}
356
	}
357
358
	line, column := d.Position(len(d.orig) - len(d.in))
359
	panic(fmt.Sprintf("Decoder.parseNext: bug at handling line %d:%d with lastKind=%v", line, column, lastKind))
360
}
361
362
var otherCloseChar = map[byte]byte{
363
	'}': '>',
364
	'>': '}',
365
}
366
367
// currentOpenKind indicates whether current position is inside a message, list
368
// or top-level message by returning MessageOpen, ListOpen or bof respectively.
369
// If the returned kind is either a MessageOpen or ListOpen, it also returns the
370
// corresponding closing character.
371
func (d *Decoder) currentOpenKind() (Kind, byte) {
372
	if len(d.openStack) == 0 {
373
		return bof, 0
374
	}
375
	openCh := d.openStack[len(d.openStack)-1]
376
	switch openCh {
377
	case '{':
378
		return MessageOpen, '}'
379
	case '<':
380
		return MessageOpen, '>'
381
	case '[':
382
		return ListOpen, ']'
383
	}
384
	panic(fmt.Sprintf("Decoder: openStack contains invalid byte %c", openCh))
385
}
386
387
func (d *Decoder) pushOpenStack(ch byte) {
388
	d.openStack = append(d.openStack, ch)
389
}
390
391
func (d *Decoder) popOpenStack() {
392
	d.openStack = d.openStack[:len(d.openStack)-1]
393
}
394
395
// parseFieldName parses field name and separator.
396
func (d *Decoder) parseFieldName() (tok Token, err error) {
397
	defer func() {
398
		if err == nil && d.tryConsumeChar(':') {
399
			tok.attrs |= hasSeparator
400
		}
401
	}()
402
403
	// Extension or Any type URL.
404
	if d.in[0] == '[' {
405
		return d.parseTypeName()
406
	}
407
408
	// Identifier.
409
	if size := parseIdent(d.in, false); size > 0 {
410
		return d.consumeToken(Name, size, uint8(IdentName)), nil
411
	}
412
413
	// Field number. Identify if input is a valid number that is not negative
414
	// and is decimal integer within 32-bit range.
415
	if num := parseNumber(d.in); num.size > 0 {
416
		if !num.neg && num.kind == numDec {
417
			if _, err := strconv.ParseInt(string(d.in[:num.size]), 10, 32); err == nil {
418
				return d.consumeToken(Name, num.size, uint8(FieldNumber)), nil
419
			}
420
		}
421
		return Token{}, d.newSyntaxError("invalid field number: %s", d.in[:num.size])
422
	}
423
424
	return Token{}, d.newSyntaxError("invalid field name: %s", errRegexp.Find(d.in))
425
}
426
427
// parseTypeName parses Any type URL or extension field name. The name is
428
// enclosed in [ and ] characters. The C++ parser does not handle many legal URL
429
// strings. This implementation is more liberal and allows for the pattern
430
// ^[-_a-zA-Z0-9]+([./][-_a-zA-Z0-9]+)*`). Whitespaces and comments are allowed
431
// in between [ ], '.', '/' and the sub names.
432
func (d *Decoder) parseTypeName() (Token, error) {
433
	startPos := len(d.orig) - len(d.in)
434
	// Use alias s to advance first in order to use d.in for error handling.
435
	// Caller already checks for [ as first character.
436
	s := consume(d.in[1:], 0)
437
	if len(s) == 0 {
438
		return Token{}, ErrUnexpectedEOF
439
	}
440
441
	var name []byte
442
	for len(s) > 0 && isTypeNameChar(s[0]) {
443
		name = append(name, s[0])
444
		s = s[1:]
445
	}
446
	s = consume(s, 0)
447
448
	var closed bool
449
	for len(s) > 0 && !closed {
450
		switch {
451
		case s[0] == ']':
452
			s = s[1:]
453
			closed = true
454
455
		case s[0] == '/', s[0] == '.':
456
			if len(name) > 0 && (name[len(name)-1] == '/' || name[len(name)-1] == '.') {
457
				return Token{}, d.newSyntaxError("invalid type URL/extension field name: %s",
458
					d.orig[startPos:len(d.orig)-len(s)+1])
459
			}
460
			name = append(name, s[0])
461
			s = s[1:]
462
			s = consume(s, 0)
463
			for len(s) > 0 && isTypeNameChar(s[0]) {
464
				name = append(name, s[0])
465
				s = s[1:]
466
			}
467
			s = consume(s, 0)
468
469
		default:
470
			return Token{}, d.newSyntaxError(
471
				"invalid type URL/extension field name: %s", d.orig[startPos:len(d.orig)-len(s)+1])
472
		}
473
	}
474
475
	if !closed {
476
		return Token{}, ErrUnexpectedEOF
477
	}
478
479
	// First character cannot be '.'. Last character cannot be '.' or '/'.
480
	size := len(name)
481
	if size == 0 || name[0] == '.' || name[size-1] == '.' || name[size-1] == '/' {
482
		return Token{}, d.newSyntaxError("invalid type URL/extension field name: %s",
483
			d.orig[startPos:len(d.orig)-len(s)])
484
	}
485
486
	d.in = s
487
	endPos := len(d.orig) - len(d.in)
488
	d.consume(0)
489
490
	return Token{
491
		kind:  Name,
492
		attrs: uint8(TypeName),
493
		pos:   startPos,
494
		raw:   d.orig[startPos:endPos],
495
		str:   string(name),
496
	}, nil
497
}
498
499
func isTypeNameChar(b byte) bool {
500
	return (b == '-' || b == '_' ||
501
		('0' <= b && b <= '9') ||
502
		('a' <= b && b <= 'z') ||
503
		('A' <= b && b <= 'Z'))
504
}
505
506
func isWhiteSpace(b byte) bool {
507
	switch b {
508
	case ' ', '\n', '\r', '\t':
509
		return true
510
	default:
511
		return false
512
	}
513
}
514
515
// parseIdent parses an unquoted proto identifier and returns size.
516
// If allowNeg is true, it allows '-' to be the first character in the
517
// identifier. This is used when parsing literal values like -infinity, etc.
518
// Regular expression matches an identifier: `^[_a-zA-Z][_a-zA-Z0-9]*`
519
func parseIdent(input []byte, allowNeg bool) int {
520
	var size int
521
522
	s := input
523
	if len(s) == 0 {
524
		return 0
525
	}
526
527
	if allowNeg && s[0] == '-' {
528
		s = s[1:]
529
		size++
530
		if len(s) == 0 {
531
			return 0
532
		}
533
	}
534
535
	switch {
536
	case s[0] == '_',
537
		'a' <= s[0] && s[0] <= 'z',
538
		'A' <= s[0] && s[0] <= 'Z':
539
		s = s[1:]
540
		size++
541
	default:
542
		return 0
543
	}
544
545
	for len(s) > 0 && (s[0] == '_' ||
546
		'a' <= s[0] && s[0] <= 'z' ||
547
		'A' <= s[0] && s[0] <= 'Z' ||
548
		'0' <= s[0] && s[0] <= '9') {
549
		s = s[1:]
550
		size++
551
	}
552
553
	if len(s) > 0 && !isDelim(s[0]) {
554
		return 0
555
	}
556
557
	return size
558
}
559
560
// parseScalar parses for a string, literal or number value.
561
func (d *Decoder) parseScalar() (Token, error) {
562
	if d.in[0] == '"' || d.in[0] == '\'' {
563
		return d.parseStringValue()
564
	}
565
566
	if tok, ok := d.parseLiteralValue(); ok {
567
		return tok, nil
568
	}
569
570
	if tok, ok := d.parseNumberValue(); ok {
571
		return tok, nil
572
	}
573
574
	return Token{}, d.newSyntaxError("invalid scalar value: %s", errRegexp.Find(d.in))
575
}
576
577
// parseLiteralValue parses a literal value. A literal value is used for
578
// bools, special floats and enums. This function simply identifies that the
579
// field value is a literal.
580
func (d *Decoder) parseLiteralValue() (Token, bool) {
581
	size := parseIdent(d.in, true)
582
	if size == 0 {
583
		return Token{}, false
584
	}
585
	return d.consumeToken(Scalar, size, literalValue), true
586
}
587
588
// consumeToken constructs a Token for given Kind from d.in and consumes given
589
// size-length from it.
590
func (d *Decoder) consumeToken(kind Kind, size int, attrs uint8) Token {
591
	// Important to compute raw and pos before consuming.
592
	tok := Token{
593
		kind:  kind,
594
		attrs: attrs,
595
		pos:   len(d.orig) - len(d.in),
596
		raw:   d.in[:size],
597
	}
598
	d.consume(size)
599
	return tok
600
}
601
602
// newSyntaxError returns a syntax error with line and column information for
603
// current position.
604
func (d *Decoder) newSyntaxError(f string, x ...interface{}) error {
605
	e := errors.New(f, x...)
606
	line, column := d.Position(len(d.orig) - len(d.in))
607
	return errors.New("syntax error (line %d:%d): %v", line, column, e)
608
}
609
610
// Position returns line and column number of given index of the original input.
611
// It will panic if index is out of range.
612
func (d *Decoder) Position(idx int) (line int, column int) {
613
	b := d.orig[:idx]
614
	line = bytes.Count(b, []byte("\n")) + 1
615
	if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
616
		b = b[i+1:]
617
	}
618
	column = utf8.RuneCount(b) + 1 // ignore multi-rune characters
619
	return line, column
620
}
621
622
func (d *Decoder) tryConsumeChar(c byte) bool {
623
	if len(d.in) > 0 && d.in[0] == c {
624
		d.consume(1)
625
		return true
626
	}
627
	return false
628
}
629
630
// consume consumes n bytes of input and any subsequent whitespace or comments.
631
func (d *Decoder) consume(n int) {
632
	d.in = consume(d.in, n)
633
	return
634
}
635
636
// consume consumes n bytes of input and any subsequent whitespace or comments.
637
func consume(b []byte, n int) []byte {
638
	b = b[n:]
639
	for len(b) > 0 {
640
		switch b[0] {
641
		case ' ', '\n', '\r', '\t':
642
			b = b[1:]
643
		case '#':
644
			if i := bytes.IndexByte(b, '\n'); i >= 0 {
645
				b = b[i+len("\n"):]
646
			} else {
647
				b = nil
648
			}
649
		default:
650
			return b
651
		}
652
	}
653
	return b
654
}
655
656
// Any sequence that looks like a non-delimiter (for error reporting).
657
var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9\/]+|.)`)
658
659
// isDelim returns true if given byte is a delimiter character.
660
func isDelim(c byte) bool {
661
	return !(c == '-' || c == '+' || c == '.' || c == '_' ||
662
		('a' <= c && c <= 'z') ||
663
		('A' <= c && c <= 'Z') ||
664
		('0' <= c && c <= '9'))
665
}
666