Code Duplication    Length = 64-70 lines in 2 locations

fuel/converters/ilsvrc2010.py 1 location

@@ 461-530 (lines=70) @@
458
    return synsets, cost_matrix, raw_valid_groundtruth
459
460
461
def read_metadata_mat_file(meta_mat):
462
    """Read ILSVRC2010 metadata from the distributed MAT file.
463
464
    Parameters
465
    ----------
466
    meta_mat : str or file-like object
467
        The filename or file-handle for `meta.mat` from the
468
        ILSVRC2010 development kit.
469
470
    Returns
471
    -------
472
    synsets : ndarray, 1-dimensional, compound dtype
473
        A table containing ILSVRC2010 metadata for the "synonym sets"
474
        or "synsets" that comprise the classes and superclasses,
475
        including the following fields:
476
         * `ILSVRC2010_ID`: the integer ID used in the original
477
           competition data.
478
         * `WNID`: A string identifier that uniquely identifies
479
           a synset in ImageNet and WordNet.
480
         * `wordnet_height`: The length of the longest path to
481
           a leaf node in the FULL ImageNet/WordNet hierarchy
482
           (leaf nodes in the FULL ImageNet/WordNet hierarchy
483
           have `wordnet_height` 0).
484
         * `gloss`: A string representation of an English
485
           textual description of the concept represented by
486
           this synset.
487
         * `num_children`: The number of children in the hierarchy
488
           for this synset.
489
         * `words`: A string representation, comma separated,
490
           of different synoym words or phrases for the concept
491
           represented by this synset.
492
         * `children`: A vector of `ILSVRC2010_ID`s of children
493
           of this synset, padded with -1. Note that these refer
494
           to `ILSVRC2010_ID`s from the original data and *not*
495
           the zero-based index in the table.
496
         * `num_train_images`: The number of training images for
497
           this synset.
498
    cost_matrix : ndarray, 2-dimensional, uint8
499
        A 1000x1000 matrix containing the precomputed pairwise
500
        cost (based on distance in the hierarchy) for all
501
        low-level synsets (i.e. the thousand possible output
502
        classes with training data associated).
503
504
    """
505
    mat = loadmat(meta_mat, squeeze_me=True)
506
    synsets = mat['synsets']
507
    cost_matrix = mat['cost_matrix']
508
    new_dtype = numpy.dtype([
509
        ('ILSVRC2010_ID', numpy.int16),
510
        ('WNID', ('S', max(map(len, synsets['WNID'])))),
511
        ('wordnet_height', numpy.int8),
512
        ('gloss', ('S', max(map(len, synsets['gloss'])))),
513
        ('num_children', numpy.int8),
514
        ('words', ('S', max(map(len, synsets['words'])))),
515
        ('children', (numpy.int8, max(synsets['num_children']))),
516
        ('num_train_images', numpy.uint16)
517
    ])
518
    new_synsets = numpy.empty(synsets.shape, dtype=new_dtype)
519
    for attr in ['ILSVRC2010_ID', 'WNID', 'wordnet_height', 'gloss',
520
                 'num_children', 'words', 'num_train_images']:
521
        new_synsets[attr] = synsets[attr]
522
    children = [numpy.atleast_1d(ch) for ch in synsets['children']]
523
    padded_children = [
524
        numpy.concatenate((c,
525
                           -numpy.ones(new_dtype['children'].shape[0] - len(c),
526
                                       dtype=numpy.int16)))
527
        for c in children
528
    ]
529
    new_synsets['children'] = padded_children
530
    return new_synsets, cost_matrix
531
532
533
def extract_patch_images(f, which_set):

fuel/converters/ilsvrc2012.py 1 location

@@ 231-294 (lines=64) @@
228
    return synsets, raw_valid_groundtruth
229
230
231
def read_metadata_mat_file(meta_mat):
232
    """Read ILSVRC2012 metadata from the distributed MAT file.
233
234
    Parameters
235
    ----------
236
    meta_mat : str or file-like object
237
        The filename or file-handle for `meta.mat` from the
238
        ILSVRC2012 development kit.
239
240
    Returns
241
    -------
242
    synsets : ndarray, 1-dimensional, compound dtype
243
        A table containing ILSVRC2012 metadata for the "synonym sets"
244
        or "synsets" that comprise the classes and superclasses,
245
        including the following fields:
246
         * `ILSVRC2012_ID`: the integer ID used in the original
247
           competition data.
248
         * `WNID`: A string identifier that uniquely identifies
249
           a synset in ImageNet and WordNet.
250
         * `wordnet_height`: The length of the longest path to
251
           a leaf node in the FULL ImageNet/WordNet hierarchy
252
           (leaf nodes in the FULL ImageNet/WordNet hierarchy
253
           have `wordnet_height` 0).
254
         * `gloss`: A string representation of an English
255
           textual description of the concept represented by
256
           this synset.
257
         * `num_children`: The number of children in the hierarchy
258
           for this synset.
259
         * `words`: A string representation, comma separated,
260
           of different synoym words or phrases for the concept
261
           represented by this synset.
262
         * `children`: A vector of `ILSVRC2012_ID`s of children
263
           of this synset, padded with -1. Note that these refer
264
           to `ILSVRC2012_ID`s from the original data and *not*
265
           the zero-based index in the table.
266
         * `num_train_images`: The number of training images for
267
           this synset.
268
269
    """
270
    mat = loadmat(meta_mat, squeeze_me=True)
271
    synsets = mat['synsets']
272
    new_dtype = numpy.dtype([
273
        ('ILSVRC2012_ID', numpy.int16),
274
        ('WNID', ('S', max(map(len, synsets['WNID'])))),
275
        ('wordnet_height', numpy.int8),
276
        ('gloss', ('S', max(map(len, synsets['gloss'])))),
277
        ('num_children', numpy.int8),
278
        ('words', ('S', max(map(len, synsets['words'])))),
279
        ('children', (numpy.int8, max(synsets['num_children']))),
280
        ('num_train_images', numpy.uint16)
281
    ])
282
    new_synsets = numpy.empty(synsets.shape, dtype=new_dtype)
283
    for attr in ['ILSVRC2012_ID', 'WNID', 'wordnet_height', 'gloss',
284
                 'num_children', 'words', 'num_train_images']:
285
        new_synsets[attr] = synsets[attr]
286
    children = [numpy.atleast_1d(ch) for ch in synsets['children']]
287
    padded_children = [
288
        numpy.concatenate((c,
289
                           -numpy.ones(new_dtype['children'].shape[0] - len(c),
290
                                       dtype=numpy.int16)))
291
        for c in children
292
    ]
293
    new_synsets['children'] = padded_children
294
    return new_synsets
295