faiss - Code Metrics - milvus-io/milvus - Measure and Improve Code Quality continuously with Scrutinizer

faiss F
last analyzed 2021-04-15 09:02 UTC

↳ Parent: Project
Complexity

Total Complexity
Size/Duplication

Total Lines	813
Duplicated Lines	9.72 %
Importance

Changes
Metric	Value
eloc	570
dl	79
loc	813
rs	2.56
c	0
b	0
f	0
wmc	73
33 Functions

Rating	Name	Duplication	Size	Complexity
A	vector_to_array()	0	9	2
A	add_ref_in_function()	0	9	1
A	handle_ParameterSpace()	0	8	1
A	deserialize_index_binary()	0	4	1
A	rand()	0	4	1
A	copy_array_to_vector()	0	12	2
A	replace_method()	0	12	4
A	index_cpu_to_all_gpus()	0	3	1
A	handle_Quantizer()	0	24	1
A	normalize_L2()	0	2	1
A	handle_VectorTransform()	0	28	1
A	serialize_index_binary()	0	5	1
B	handle_Index()	11	118	4
A	add_ref_in_constructor()	0	19	2
A	index_cpu_to_gpu_multiple_py()	0	14	3
A	randn()	0	4	1
A	replacement_map_search_multiple()	0	5	1
A	kmin()	15	15	1
A	randint()	0	7	2
A	index_cpu_to_gpus_list()	0	10	5
A	kmax()	15	15	1
A	handle_Clustering()	0	20	3
A	handle_MatrixStats()	0	8	1
A	eval_intersection()	0	10	2
A	handle_AutoTuneCriterion()	0	15	3
A	deserialize_index()	0	4	1
A	vector_float_to_array()	0	2	1
A	replacement_map_add()	0	4	1
A	pairwise_distances()	0	19	2
A	add_ref_in_method()	0	10	2
A	serialize_index()	0	5	1
A	instruction_set()	0	12	5
B	handle_IndexBinary()	11	62	2
6 Methods

Rating	Name	Duplication	Size	Complexity
A	Kmeans.__init__()	0	17	3
A	ResultHeap.__init__()	12	12	1
A	Kmeans.assign()	0	6	1
B	Kmeans.train()	0	22	5
A	ResultHeap.finalize()	2	2	1
A	ResultHeap.add_result()	7	7	1
How to fix Duplicated Code Complexity

# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

#@nolint

# not linting this file because it imports * form swigfaiss, which
# causes a ton of useless warnings.

import numpy as np
import sys
import inspect
import pdb
import platform
import subprocess
import logging


logger = logging.getLogger(__name__)


def instruction_set():
    if platform.system() == "Darwin":
        if subprocess.check_output(["/usr/sbin/sysctl", "hw.optional.avx2_0"])[-1] == '1':
            return "AVX2"
        else:
            return "default"
    elif platform.system() == "Linux":
        import numpy.distutils.cpuinfo
        if "avx2" in numpy.distutils.cpuinfo.cpu.info[0].get('flags', ""):
            return "AVX2"
        else:
            return "default"


try:
    instr_set = instruction_set()
    if instr_set == "AVX2":
        logger.info("Loading faiss with AVX2 support.")
        from .swigfaiss_avx2 import *
    else:
        logger.info("Loading faiss.")
        from .swigfaiss import *

except ImportError:
    # we import * so that the symbol X can be accessed as faiss.X
    logger.info("Loading faiss.")
    from .swigfaiss import *


__version__ = "%d.%d.%d" % (FAISS_VERSION_MAJOR,

                            FAISS_VERSION_MINOR,

                            FAISS_VERSION_PATCH)


##################################################################
# The functions below add or replace some methods for classes
# this is to be able to pass in numpy arrays directly
# The C++ version of the classnames will be suffixed with _c
##################################################################


def replace_method(the_class, name, replacement, ignore_missing=False):
    try:
        orig_method = getattr(the_class, name)
    except AttributeError:
        if ignore_missing:
            return
        raise
    if orig_method.__name__ == 'replacement_' + name:
        # replacement was done in parent class
        return
    setattr(the_class, name + '_c', orig_method)
    setattr(the_class, name, replacement)


def handle_Clustering():
    def replacement_train(self, x, index, weights=None):
        n, d = x.shape
        assert d == self.d
        if weights is not None:
            assert weights.shape == (n, )
            self.train_c(n, swig_ptr(x), index, swig_ptr(weights))

        else:
            self.train_c(n, swig_ptr(x), index)
    def replacement_train_encoded(self, x, codec, index, weights=None):
        n, d = x.shape
        assert d == codec.sa_code_size()
        assert codec.d == index.d
        if weights is not None:
            assert weights.shape == (n, )
            self.train_encoded_c(n, swig_ptr(x), codec, index, swig_ptr(weights))

        else:
            self.train_encoded_c(n, swig_ptr(x), codec, index)
    replace_method(Clustering, 'train', replacement_train)

    replace_method(Clustering, 'train_encoded', replacement_train_encoded)


handle_Clustering()


def handle_Quantizer(the_class):

    def replacement_train(self, x):
        n, d = x.shape
        assert d == self.d
        self.train_c(n, swig_ptr(x))


    def replacement_compute_codes(self, x):
        n, d = x.shape
        assert d == self.d
        codes = np.empty((n, self.code_size), dtype='uint8')
        self.compute_codes_c(swig_ptr(x), swig_ptr(codes), n)

        return codes

    def replacement_decode(self, codes):
        n, cs = codes.shape
        assert cs == self.code_size
        x = np.empty((n, self.d), dtype='float32')
        self.decode_c(swig_ptr(codes), swig_ptr(x), n)

        return x

    replace_method(the_class, 'train', replacement_train)
    replace_method(the_class, 'compute_codes', replacement_compute_codes)
    replace_method(the_class, 'decode', replacement_decode)


handle_Quantizer(ProductQuantizer)

handle_Quantizer(ScalarQuantizer)



def handle_Index(the_class):

    def replacement_add(self, x):
        assert x.flags.contiguous
        n, d = x.shape
        assert d == self.d
        self.add_c(n, swig_ptr(x))


    def replacement_add_with_ids(self, x, ids):
        n, d = x.shape
        assert d == self.d
        assert ids.shape == (n, ), 'not same nb of vectors as ids'
        self.add_with_ids_c(n, swig_ptr(x), swig_ptr(ids))


    def replacement_assign(self, x, k):
        n, d = x.shape
        assert d == self.d
        labels = np.empty((n, k), dtype=np.int64)
        self.assign_c(n, swig_ptr(x), swig_ptr(labels), k)

        return labels

    def replacement_train(self, x):
        assert x.flags.contiguous
        n, d = x.shape
        assert d == self.d
        self.train_c(n, swig_ptr(x))


    def replacement_search(self, x, k):
        n, d = x.shape
        assert d == self.d
        distances = np.empty((n, k), dtype=np.float32)
        labels = np.empty((n, k), dtype=np.int64)
        self.search_c(n, swig_ptr(x),

                      k, swig_ptr(distances),
                      swig_ptr(labels))
        return distances, labels

    def replacement_search_and_reconstruct(self, x, k):
        n, d = x.shape
        assert d == self.d
        distances = np.empty((n, k), dtype=np.float32)
        labels = np.empty((n, k), dtype=np.int64)
        recons = np.empty((n, k, d), dtype=np.float32)
        self.search_and_reconstruct_c(n, swig_ptr(x),

                                      k, swig_ptr(distances),
                                      swig_ptr(labels),
                                      swig_ptr(recons))
        return distances, labels, recons

    def replacement_remove_ids(self, x):
        if isinstance(x, IDSelector):

            sel = x
        else:
            assert x.ndim == 1
            index_ivf = try_extract_index_ivf (self)

            if index_ivf and index_ivf.direct_map.type == DirectMap.Hashtable:

                sel = IDSelectorArray(x.size, swig_ptr(x))

            else:
                sel = IDSelectorBatch(x.size, swig_ptr(x))

        return self.remove_ids_c(sel)

    def replacement_reconstruct(self, key):
        x = np.empty(self.d, dtype=np.float32)
        self.reconstruct_c(key, swig_ptr(x))

        return x

    def replacement_reconstruct_n(self, n0, ni):
        x = np.empty((ni, self.d), dtype=np.float32)
        self.reconstruct_n_c(n0, ni, swig_ptr(x))

        return x

    def replacement_update_vectors(self, keys, x):
        n = keys.size
        assert keys.shape == (n, )
        assert x.shape == (n, self.d)
        self.update_vectors_c(n, swig_ptr(keys), swig_ptr(x))


    def replacement_range_search(self, x, thresh):

        n, d = x.shape
        assert d == self.d
        res = RangeSearchResult(n)

        self.range_search_c(n, swig_ptr(x), thresh, res)

        # get pointers and copy them
        lims = rev_swig_ptr(res.lims, n + 1).copy()

        nd = int(lims[-1])
        D = rev_swig_ptr(res.distances, nd).copy()
        I = rev_swig_ptr(res.labels, nd).copy()
        return lims, D, I

    def replacement_sa_encode(self, x):
        n, d = x.shape
        assert d == self.d
        codes = np.empty((n, self.sa_code_size()), dtype='uint8')
        self.sa_encode_c(n, swig_ptr(x), swig_ptr(codes))

        return codes

    def replacement_sa_decode(self, codes):
        n, cs = codes.shape
        assert cs == self.sa_code_size()
        x = np.empty((n, self.d), dtype='float32')
        self.sa_decode_c(n, swig_ptr(codes), swig_ptr(x))

        return x

    replace_method(the_class, 'add', replacement_add)
    replace_method(the_class, 'add_with_ids', replacement_add_with_ids)
    replace_method(the_class, 'assign', replacement_assign)
    replace_method(the_class, 'train', replacement_train)
    replace_method(the_class, 'search', replacement_search)
    replace_method(the_class, 'remove_ids', replacement_remove_ids)
    replace_method(the_class, 'reconstruct', replacement_reconstruct)
    replace_method(the_class, 'reconstruct_n', replacement_reconstruct_n)
    replace_method(the_class, 'range_search', replacement_range_search)
    replace_method(the_class, 'update_vectors', replacement_update_vectors,
                   ignore_missing=True)
    replace_method(the_class, 'search_and_reconstruct',
                   replacement_search_and_reconstruct, ignore_missing=True)
    replace_method(the_class, 'sa_encode', replacement_sa_encode)
    replace_method(the_class, 'sa_decode', replacement_sa_decode)

def handle_IndexBinary(the_class):

    def replacement_add(self, x):
        assert x.flags.contiguous
        n, d = x.shape
        assert d * 8 == self.d
        self.add_c(n, swig_ptr(x))


    def replacement_add_with_ids(self, x, ids):
        n, d = x.shape
        assert d * 8 == self.d
        assert ids.shape == (n, ), 'not same nb of vectors as ids'
        self.add_with_ids_c(n, swig_ptr(x), swig_ptr(ids))


    def replacement_train(self, x):
        assert x.flags.contiguous
        n, d = x.shape
        assert d * 8 == self.d
        self.train_c(n, swig_ptr(x))


    def replacement_reconstruct(self, key):
        x = np.empty(self.d // 8, dtype=np.uint8)
        self.reconstruct_c(key, swig_ptr(x))

        return x

    def replacement_search(self, x, k):
        n, d = x.shape
        assert d * 8 == self.d
        distances = np.empty((n, k), dtype=np.int32)
        labels = np.empty((n, k), dtype=np.int64)
        self.search_c(n, swig_ptr(x),

                      k, swig_ptr(distances),
                      swig_ptr(labels))
        return distances, labels

    def replacement_range_search(self, x, thresh):

        n, d = x.shape
        assert d * 8 == self.d
        res = RangeSearchResult(n)

        self.range_search_c(n, swig_ptr(x), thresh, res)

        # get pointers and copy them
        lims = rev_swig_ptr(res.lims, n + 1).copy()

        nd = int(lims[-1])
        D = rev_swig_ptr(res.distances, nd).copy()
        I = rev_swig_ptr(res.labels, nd).copy()
        return lims, D, I

    def replacement_remove_ids(self, x):
        if isinstance(x, IDSelector):

            sel = x
        else:
            assert x.ndim == 1
            sel = IDSelectorBatch(x.size, swig_ptr(x))

        return self.remove_ids_c(sel)

    replace_method(the_class, 'add', replacement_add)
    replace_method(the_class, 'add_with_ids', replacement_add_with_ids)
    replace_method(the_class, 'train', replacement_train)
    replace_method(the_class, 'search', replacement_search)
    replace_method(the_class, 'range_search', replacement_range_search)
    replace_method(the_class, 'reconstruct', replacement_reconstruct)
    replace_method(the_class, 'remove_ids', replacement_remove_ids)


def handle_VectorTransform(the_class):

    def apply_method(self, x):
        assert x.flags.contiguous
        n, d = x.shape
        assert d == self.d_in
        y = np.empty((n, self.d_out), dtype=np.float32)
        self.apply_noalloc(n, swig_ptr(x), swig_ptr(y))

        return y

    def replacement_reverse_transform(self, x):
        n, d = x.shape
        assert d == self.d_out
        y = np.empty((n, self.d_in), dtype=np.float32)
        self.reverse_transform_c(n, swig_ptr(x), swig_ptr(y))

        return y

    def replacement_vt_train(self, x):
        assert x.flags.contiguous
        n, d = x.shape
        assert d == self.d_in
        self.train_c(n, swig_ptr(x))


    replace_method(the_class, 'train', replacement_vt_train)
    # apply is reserved in Pyton...
    the_class.apply_py = apply_method
    replace_method(the_class, 'reverse_transform',
                   replacement_reverse_transform)


def handle_AutoTuneCriterion(the_class):
    def replacement_set_groundtruth(self, D, I):
        if D:
            assert I.shape == D.shape
        self.nq, self.gt_nnn = I.shape
        self.set_groundtruth_c(
            self.gt_nnn, swig_ptr(D) if D else None, swig_ptr(I))


    def replacement_evaluate(self, D, I):
        assert I.shape == D.shape
        assert I.shape == (self.nq, self.nnn)
        return self.evaluate_c(swig_ptr(D), swig_ptr(I))


    replace_method(the_class, 'set_groundtruth', replacement_set_groundtruth)
    replace_method(the_class, 'evaluate', replacement_evaluate)


def handle_ParameterSpace(the_class):
    def replacement_explore(self, index, xq, crit):
        assert xq.shape == (crit.nq, index.d)
        ops = OperatingPoints()

        self.explore_c(index, crit.nq, swig_ptr(xq),

                       crit, ops)
        return ops
    replace_method(the_class, 'explore', replacement_explore)


def handle_MatrixStats(the_class):
    original_init = the_class.__init__

    def replacement_init(self, m):
        assert len(m.shape) == 2
        original_init(self, m.shape[0], m.shape[1], swig_ptr(m))


    the_class.__init__ = replacement_init

handle_MatrixStats(MatrixStats)



this_module = sys.modules[__name__]


for symbol in dir(this_module):
    obj = getattr(this_module, symbol)
    # print symbol, isinstance(obj, (type, types.ClassType))
    if inspect.isclass(obj):
        the_class = obj
        if issubclass(the_class, Index):

            handle_Index(the_class)

        if issubclass(the_class, IndexBinary):

            handle_IndexBinary(the_class)

        if issubclass(the_class, VectorTransform):

            handle_VectorTransform(the_class)

        if issubclass(the_class, AutoTuneCriterion):

            handle_AutoTuneCriterion(the_class)

        if issubclass(the_class, ParameterSpace):

            handle_ParameterSpace(the_class)


###########################################
# Add Python references to objects
# we do this at the Python class wrapper level.
###########################################

def add_ref_in_constructor(the_class, parameter_no):
    # adds a reference to parameter parameter_no in self
    # so that that parameter does not get deallocated before self
    original_init = the_class.__init__

    def replacement_init(self, *args):
        original_init(self, *args)
        self.referenced_objects = [args[parameter_no]]

    def replacement_init_multiple(self, *args):
        original_init(self, *args)
        pset = parameter_no[len(args)]
        self.referenced_objects = [args[no] for no in pset]

    if type(parameter_no) == dict:
        # a list of parameters to keep, depending on the number of arguments
        the_class.__init__ = replacement_init_multiple
    else:
        the_class.__init__ = replacement_init

def add_ref_in_method(the_class, method_name, parameter_no):
    original_method = getattr(the_class, method_name)
    def replacement_method(self, *args):
        ref = args[parameter_no]
        if not hasattr(self, 'referenced_objects'):
            self.referenced_objects = [ref]
        else:
            self.referenced_objects.append(ref)
        return original_method(self, *args)
    setattr(the_class, method_name, replacement_method)

def add_ref_in_function(function_name, parameter_no):
    # assumes the function returns an object
    original_function = getattr(this_module, function_name)
    def replacement_function(*args):
        result = original_function(*args)
        ref = args[parameter_no]
        result.referenced_objects = [ref]
        return result
    setattr(this_module, function_name, replacement_function)

add_ref_in_constructor(IndexIVFFlat, 0)

add_ref_in_constructor(IndexIVFFlatDedup, 0)

add_ref_in_constructor(IndexPreTransform, {2: [0, 1], 1: [0]})

add_ref_in_method(IndexPreTransform, 'prepend_transform', 0)
add_ref_in_constructor(IndexIVFPQ, 0)

add_ref_in_constructor(IndexIVFPQR, 0)

add_ref_in_constructor(Index2Layer, 0)

add_ref_in_constructor(Level1Quantizer, 0)

add_ref_in_constructor(IndexIVFScalarQuantizer, 0)

add_ref_in_constructor(IndexIDMap, 0)

add_ref_in_constructor(IndexIDMap2, 0)

add_ref_in_constructor(IndexHNSW, 0)

add_ref_in_method(IndexShards, 'add_shard', 0)

add_ref_in_method(IndexBinaryShards, 'add_shard', 0)

add_ref_in_constructor(IndexRefineFlat, 0)

add_ref_in_constructor(IndexBinaryIVF, 0)

add_ref_in_constructor(IndexBinaryFromFloat, 0)

add_ref_in_constructor(IndexBinaryIDMap, 0)

add_ref_in_constructor(IndexBinaryIDMap2, 0)


add_ref_in_method(IndexReplicas, 'addIndex', 0)

add_ref_in_method(IndexBinaryReplicas, 'addIndex', 0)


add_ref_in_constructor(BufferedIOWriter, 0)

add_ref_in_constructor(BufferedIOReader, 0)


# seems really marginal...
# remove_ref_from_method(IndexReplicas, 'removeIndex', 0)

if hasattr(this_module, 'GpuIndexFlat'):
    # handle all the GPUResources refs
    add_ref_in_function('index_cpu_to_gpu', 0)
    add_ref_in_constructor(GpuIndexFlat, 0)

    add_ref_in_constructor(GpuIndexFlatIP, 0)

    add_ref_in_constructor(GpuIndexFlatL2, 0)

    add_ref_in_constructor(GpuIndexIVFFlat, 0)

    add_ref_in_constructor(GpuIndexIVFScalarQuantizer, 0)

    add_ref_in_constructor(GpuIndexIVFPQ, 0)

    add_ref_in_constructor(GpuIndexBinaryFlat, 0)




###########################################
# GPU functions
###########################################


def index_cpu_to_gpu_multiple_py(resources, index, co=None, gpus=None):
    """ builds the C++ vectors for the GPU indices and the
    resources. Handles the case where the resources are assigned to
    the list of GPUs """
    if gpus is None:
        gpus = range(len(resources))
    vres = GpuResourcesVector()

    vdev = IntVector()

    for i, res in zip(gpus, resources):
        vdev.push_back(i)
        vres.push_back(res)
    index = index_cpu_to_gpu_multiple(vres, vdev, index, co)

    index.referenced_objects = resources
    return index


def index_cpu_to_all_gpus(index, co=None, ngpu=-1):
    index_gpu = index_cpu_to_gpus_list(index, co=co, gpus=None, ngpu=ngpu)
    return index_gpu


def index_cpu_to_gpus_list(index, co=None, gpus=None, ngpu=-1):
    """ Here we can pass list of GPU ids as a parameter or ngpu to
    use first n GPU's. gpus mut be a list or None"""
    if (gpus is None) and (ngpu == -1):  # All blank
        gpus = range(get_num_gpus())

    elif (gpus is None) and (ngpu != -1):  # Get number of GPU's only
        gpus = range(ngpu)
    res = [StandardGpuResources() for _ in gpus]

    index_gpu = index_cpu_to_gpu_multiple_py(res, index, co, gpus)
    return index_gpu


###########################################
# numpy array / std::vector conversions
###########################################

# mapping from vector names in swigfaiss.swig and the numpy dtype names
vector_name_map = {
    'Float': 'float32',
    'Byte': 'uint8',
    'Char': 'int8',
    'Uint64': 'uint64',
    'Long': 'int64',
    'Int': 'int32',
    'Double': 'float64'
    }

def vector_to_array(v):
    """ convert a C++ vector to a numpy array """
    classname = v.__class__.__name__
    assert classname.endswith('Vector')
    dtype = np.dtype(vector_name_map[classname[:-6]])
    a = np.empty(v.size(), dtype=dtype)
    if v.size() > 0:
        memcpy(swig_ptr(a), v.data(), a.nbytes)

    return a


def vector_float_to_array(v):
    return vector_to_array(v)


def copy_array_to_vector(a, v):
    """ copy a numpy array to a vector """
    n, = a.shape
    classname = v.__class__.__name__
    assert classname.endswith('Vector')
    dtype = np.dtype(vector_name_map[classname[:-6]])
    assert dtype == a.dtype, (
        'cannot copy a %s array to a %s (should be %s)' % (
            a.dtype, classname, dtype))
    v.resize(n)
    if n > 0:
        memcpy(v.data(), swig_ptr(a), a.nbytes)



###########################################
# Wrapper for a few functions
###########################################

def kmin(array, k):

    """return k smallest values (and their indices) of the lines of a
    float32 array"""
    m, n = array.shape
    I = np.zeros((m, k), dtype='int64')
    D = np.zeros((m, k), dtype='float32')
    ha = float_maxheap_array_t()

    ha.ids = swig_ptr(I)

    ha.val = swig_ptr(D)
    ha.nh = m
    ha.k = k
    ha.heapify()
    ha.addn(n, swig_ptr(array))
    ha.reorder()
    return D, I


def kmax(array, k):

    """return k largest values (and their indices) of the lines of a
    float32 array"""
    m, n = array.shape
    I = np.zeros((m, k), dtype='int64')
    D = np.zeros((m, k), dtype='float32')
    ha = float_minheap_array_t()

    ha.ids = swig_ptr(I)

    ha.val = swig_ptr(D)
    ha.nh = m
    ha.k = k
    ha.heapify()
    ha.addn(n, swig_ptr(array))
    ha.reorder()
    return D, I


def pairwise_distances(xq, xb, mt=METRIC_L2, metric_arg=0):
    """compute the whole pairwise distance matrix between two sets of
    vectors"""
    nq, d = xq.shape
    nb, d2 = xb.shape
    assert d == d2
    dis = np.empty((nq, nb), dtype='float32')
    if mt == METRIC_L2:

        pairwise_L2sqr(

            d, nq, swig_ptr(xq),

            nb, swig_ptr(xb),
            swig_ptr(dis))
    else:
        pairwise_extra_distances(

            d, nq, swig_ptr(xq),
            nb, swig_ptr(xb),
            mt, metric_arg,
            swig_ptr(dis))
    return dis




def rand(n, seed=12345):
    res = np.empty(n, dtype='float32')
    float_rand(swig_ptr(res), res.size, seed)

    return res


def randint(n, seed=12345, vmax=None):
    res = np.empty(n, dtype='int64')
    if vmax is None:
        int64_rand(swig_ptr(res), res.size, seed)

    else:
        int64_rand_max(swig_ptr(res), res.size, vmax, seed)

    return res

lrand = randint

def randn(n, seed=12345):
    res = np.empty(n, dtype='float32')
    float_randn(swig_ptr(res), res.size, seed)

    return res


def eval_intersection(I1, I2):
    """ size of intersection between each line of two result tables"""
    n = I1.shape[0]
    assert I2.shape[0] == n
    k1, k2 = I1.shape[1], I2.shape[1]
    ninter = 0
    for i in range(n):
        ninter += ranklist_intersection_size(

            k1, swig_ptr(I1[i]), k2, swig_ptr(I2[i]))

    return ninter


def normalize_L2(x):
    fvec_renorm_L2(x.shape[1], x.shape[0], swig_ptr(x))


# MapLong2Long interface

def replacement_map_add(self, keys, vals):
    n, = keys.shape
    assert (n,) == keys.shape
    self.add_c(n, swig_ptr(keys), swig_ptr(vals))


def replacement_map_search_multiple(self, keys):
    n, = keys.shape
    vals = np.empty(n, dtype='int64')
    self.search_multiple_c(n, swig_ptr(keys), swig_ptr(vals))

    return vals

replace_method(MapLong2Long, 'add', replacement_map_add)

replace_method(MapLong2Long, 'search_multiple', replacement_map_search_multiple)


###########################################
# Kmeans object
###########################################


class Kmeans:
    """shallow wrapper around the Clustering object. The important method
    is train()."""

    def __init__(self, d, k, **kwargs):
        """d: input dimension, k: nb of centroids. Additional
         parameters are passed on the ClusteringParameters object,
         including niter=25, verbose=False, spherical = False
        """
        self.d = d
        self.k = k
        self.gpu = False
        self.cp = ClusteringParameters()

        for k, v in kwargs.items():
            if k == 'gpu':
                self.gpu = v
            else:
                # if this raises an exception, it means that it is a non-existent field
                getattr(self.cp, k)
                setattr(self.cp, k, v)
        self.centroids = None

    def train(self, x, weights=None):
        n, d = x.shape
        assert d == self.d
        clus = Clustering(d, self.k, self.cp)

        if self.cp.spherical:
            self.index = IndexFlatIP(d)

        else:
            self.index = IndexFlatL2(d)

        if self.gpu:
            if self.gpu == True:
                ngpu = -1
            else:
                ngpu = self.gpu
            self.index = index_cpu_to_all_gpus(self.index, ngpu=ngpu)
        clus.train(x, self.index, weights)
        centroids = vector_float_to_array(clus.centroids)
        self.centroids = centroids.reshape(self.k, d)
        stats = clus.iteration_stats
        self.obj = np.array([
            stats.at(i).obj for i in range(stats.size())
        ])
        return self.obj[-1] if self.obj.size > 0 else 0.0

    def assign(self, x):
        assert self.centroids is not None, "should train before assigning"
        self.index.reset()
        self.index.add(self.centroids)
        D, I = self.index.search(x, 1)
        return D.ravel(), I.ravel()

# IndexProxy was renamed to IndexReplicas, remap the old name for any old code
# people may have
IndexProxy = IndexReplicas
ConcatenatedInvertedLists = HStackInvertedLists


###########################################
# serialization of indexes to byte arrays
###########################################

def serialize_index(index):
    """ convert an index to a numpy uint8 array  """
    writer = VectorIOWriter()

    write_index(index, writer)

    return vector_to_array(writer.data)

def deserialize_index(data):
    reader = VectorIOReader()

    copy_array_to_vector(data, reader.data)
    return read_index(reader)


def serialize_index_binary(index):
    """ convert an index to a numpy uint8 array  """
    writer = VectorIOWriter()

    write_index_binary(index, writer)

    return vector_to_array(writer.data)

def deserialize_index_binary(data):
    reader = VectorIOReader()

    copy_array_to_vector(data, reader.data)
    return read_index_binary(reader)



###########################################
# ResultHeap
###########################################

class ResultHeap:

    """Accumulate query results from a sliced dataset. The final result will
    be in self.D, self.I."""

    def __init__(self, nq, k):
        " nq: number of query vectors, k: number of results per query "
        self.I = np.zeros((nq, k), dtype='int64')
        self.D = np.zeros((nq, k), dtype='float32')
        self.nq, self.k = nq, k
        heaps = float_maxheap_array_t()

        heaps.k = k
        heaps.nh = nq
        heaps.val = swig_ptr(self.D)

        heaps.ids = swig_ptr(self.I)
        heaps.heapify()
        self.heaps = heaps

    def add_result(self, D, I):
        """D, I do not need to be in a particular order (heap or sorted)"""
        assert D.shape == (self.nq, self.k)
        assert I.shape == (self.nq, self.k)
        self.heaps.addn_with_ids(
            self.k, faiss.swig_ptr(D),

            faiss.swig_ptr(I), self.k)

    def finalize(self):
        self.heaps.reorder()

1			# Copyright (c) Facebook, Inc. and its affiliates.
2			#
3			# This source code is licensed under the MIT license found in the
4			# LICENSE file in the root directory of this source tree.
5
6			#@nolint
7
8			# not linting this file because it imports * form swigfaiss, which
9			# causes a ton of useless warnings.
10
11			import numpy as np
12			import sys
13			import inspect
14			import pdb
15			import platform
16			import subprocess
17			import logging
18
19
20			logger = logging.getLogger(__name__)
21
22
23			def instruction_set():
24			if platform.system() == "Darwin":
25			if subprocess.check_output(["/usr/sbin/sysctl", "hw.optional.avx2_0"])[-1] == '1':
26			return "AVX2"
27			else:
28			return "default"
29			elif platform.system() == "Linux":
30			import numpy.distutils.cpuinfo
31			if "avx2" in numpy.distutils.cpuinfo.cpu.info[0].get('flags', ""):
32			return "AVX2"
33			else:
34			return "default"
35
36
37			try:
38			instr_set = instruction_set()
39			if instr_set == "AVX2":
40			logger.info("Loading faiss with AVX2 support.")
milvus-io / milvus

faiss F last analyzed 2021-04-15 09:02 UTC

Complexity

Size/Duplication

Importance

33 Functions

6 Methods

How to fix Duplicated Code Complexity

Duplicated Code

Complexity

Duplication Side-by-Side

Filter issues like

faiss F
last analyzed 2021-04-15 09:02 UTC