Python ssdeep.compare() Examples
The following are 16
code examples of ssdeep.compare().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
ssdeep
, or try the search function
.
Example #1
Source File: game.py From DueUtil with GNU General Public License v3.0 | 6 votes |
def get_spam_level(player, message_content): """ Get's a spam level for a message using a fuzzy hash > 50% means it's probably spam """ message_hash = ssdeep.hash(message_content) spam_level = 0 spam_levels = [ssdeep.compare(message_hash, prior_hash) for prior_hash in player.last_message_hashes if prior_hash is not None] if len(spam_levels) > 0: spam_level = max(spam_levels) player.last_message_hashes.append(message_hash) if spam_level > SPAM_TOLERANCE: player.spam_detections += 1 return spam_level
Example #2
Source File: apifuzz.py From codex-backend with MIT License | 6 votes |
def searchFuzzy(fuzz, limit, thresh): client = MongoClient(envget('metadata.host'), envget('metadata.port')) db = client[envget('db_metadata_name')] coll_meta = db["db_metadata_collection"] f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1}).limit(limit) l = [] for f in f1: l.append(f) ret = {} for a in l: res = -1 try: res = ssdeep.compare(a["fuzzy_hash"], fuzz) except InternalError: print(str(res) + "------" + str(a["fuzzy_hash"]) + "-----" + str(a["file_id"])) continue if(res >= thresh): ret[a["file_id"]] = res return ret
Example #3
Source File: SearchModule.py From codex-backend with MIT License | 6 votes |
def fuzz_search_fast(id, p, fuzz): block = int(fuzz.split(':')[0]) lap = 500 coll_meta = db[envget("db_metadata_collection")] f1 = coll_meta.find({}, {"file_id": 1, p: 1}) l = [] for f in f1: l.append(f) dic = {} for a in l: res = -1 try: f_comp = a[p] block_comp = int(f_comp.split(':')[0]) if(block_comp <= block + lap and block_comp >= block - lap): res = ssdeep.compare(f_comp, fuzz) if(res > 0): dic[a["file_id"]] = res except Exception, e: logging.exception( "fuzz_search_fast(id=" + str(id) + ",p=" + str(p) + ",fuzz=" + str(fuzz)) continue
Example #4
Source File: ssdeep_analytics.py From multiscanner with Mozilla Public License 2.0 | 6 votes |
def main(): parser = argparse.ArgumentParser(description='Script to interact with ' 'Multiscanner\'s Elasticsearch datastore to run analytics based on ' 'ssdeep hash.') group = parser.add_mutually_exclusive_group(required=True) parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='Increase output to stdout') group.add_argument('-c', '--compare', dest='compare', action='store_true', help='Run ssdeep.compare using a few optimizations based on ssdeep' ' hash structure.') group.add_argument('-g', '--group', dest='group', action='store_true', help='Returns group of samples based on ssdeep hash.') args = parser.parse_args() ssdeep_analytic = SSDeepAnalytic(debug=args.verbose) if args.compare: ssdeep_analytic.ssdeep_compare() print('[*] Success') elif args.group: pprint(ssdeep_analytic.ssdeep_group()) print('[*] Success')
Example #5
Source File: gitgot.py From GitGot with GNU Lesser General Public License v3.0 | 5 votes |
def should_parse(repo, state, is_gist=False): owner_login = repo.owner.login if is_gist else repo.repository.owner.login if owner_login in state.bad_users: print(bcolors.FAIL + "Failed check: Ignore User" + bcolors.ENDC) return False if not is_gist and repo.repository.name in state.bad_repos: print(bcolors.FAIL + "Failed check: Ignore Repo" + bcolors.ENDC) return False if not is_gist and repo.name in state.bad_files: print(bcolors.FAIL + "Failed check: Ignore File" + bcolors.ENDC) return False # Fuzzy Hash Comparison try: if not is_gist: # Temporary fix for PyGithub until fixed upstream (PyGithub#1178) repo._url.value = repo._url.value.replace( repo._path.value, urllib.parse.quote(repo._path.value)) candidate_sig = ssdeep.hash(repo.decoded_content) for sig in state.bad_signatures: similarity = ssdeep.compare(candidate_sig, sig) if similarity > SIMILARITY_THRESHOLD: print( bcolors.FAIL + "Failed check: Ignore Fuzzy Signature on Contents " "({}% Similarity)".format(similarity) + bcolors.ENDC) return False except github.UnknownObjectException: print( bcolors.FAIL + "API Error: File no longer exists on github.com" + bcolors.ENDC) return False return True
Example #6
Source File: vectorization.py From IntroductionToMachineLearningForSecurityPros with GNU General Public License v3.0 | 5 votes |
def vectorize(feature_set, c2_data): vector = np.zeros((len(feature_set),), dtype=np.float) for index, (offset, code, ssdeep_hash) in enumerate(feature_set): if offset not in c2_data: continue if c2_data[offset]["code"] == code: d = ssdeep.compare(c2_data[offset]["content_ssdeep"], ssdeep_hash) d = float(d) / float(100.0) vector[index] = d return vector
Example #7
Source File: vectorization.py From IntroductionToMachineLearningForSecurityPros with GNU General Public License v3.0 | 5 votes |
def vectorize_with_sparse_features(sparse_feature_set, feature_count, c2_data): vector = lil_matrix((1, feature_count), dtype=np.float) for index, (offset, code, ssdeep_hash) in sparse_feature_set: if offset not in c2_data: continue if c2_data[offset]["code"] == code: d = ssdeep.compare(c2_data[offset]["content_ssdeep"], ssdeep_hash) d = float(d) / float(100.0) vector[0, index] = d return vector
Example #8
Source File: cfire.py From Security-Research with BSD 3-Clause "New" or "Revised" License | 5 votes |
def ssdeepcompare(target, IP): try: ss_target = requests.get('http://{}/'.format(target)) ssdeep_target_fuzz = ssdeep.hash(ss_target.text) print target, ssdeep_target_fuzz content = requests.get('https://{}'.format(IP), verify=False, timeout = 5, headers = {'Host': target}) ssdeep_fuzz = ssdeep.hash(content.text) print IP, ssdeep_fuzz print "ssdeep score for", IP, "is", ssdeep.compare(ssdeep_target_fuzz, ssdeep_fuzz) except(requests.exceptions.ConnectionError): print "cant connect to", IP
Example #9
Source File: hash.py From FACT_core with GNU General Public License v3.0 | 5 votes |
def get_ssdeep_comparison(first, second): return ssdeep.compare(first, second)
Example #10
Source File: file_coverage.py From FACT_core with GNU General Public License v3.0 | 5 votes |
def _find_similar_file_for(self, file_uid: str, parent_uid: str, comparison_fo: FileObject): hash_one = self.database.get_ssdeep_hash(file_uid) if hash_one: id1 = self._get_similar_file_id(file_uid, parent_uid) for potential_match in comparison_fo.files_included: id2 = self._get_similar_file_id(potential_match, comparison_fo.uid) hash_two = self.database.get_ssdeep_hash(potential_match) ssdeep_similarity = ssdeep.compare(hash_one, hash_two) if hash_two and ssdeep_similarity > self.ssdeep_ignore_threshold: yield (id1, id2), ssdeep_similarity
Example #11
Source File: vectorization.py From IDPanel with MIT License | 5 votes |
def vectorize(feature_set, c2_data): vector = np.zeros((len(feature_set),), dtype=np.float) for index, (offset, code, ssdeep_hash) in enumerate(feature_set): if offset not in c2_data: continue if c2_data[offset]["code"] == code: d = ssdeep.compare(c2_data[offset]["content_ssdeep"], ssdeep_hash) d = float(d) / float(100.0) vector[index] = d return vector
Example #12
Source File: vectorization.py From IDPanel with MIT License | 5 votes |
def vectorize_with_sparse_features(sparse_feature_set, feature_count, c2_data): vector = lil_matrix((1, feature_count), dtype=np.float) for index, (offset, code, ssdeep_hash) in sparse_feature_set: if offset not in c2_data: continue if c2_data[offset]["code"] == code: d = ssdeep.compare(c2_data[offset]["content_ssdeep"], ssdeep_hash) d = float(d) / float(100.0) vector[0, index] = d return vector
Example #13
Source File: ssdeep_querying.py From ssdeep-elastic with MIT License | 4 votes |
def get_matching_items_by_ssdeep(ssdeep_value, threshold_grade): """ A function that finds matching items by ssdeep comparison with optimizations using ElasticSearch :param ssdeep_value: The ssdeep hash value of the item :param threshold_grade: The grade being used as a threshold, only items that pass this grade will be returned :return: A List of matching items (in this case, a list of sha256 hash values) """ chunksize, chunk, double_chunk = ssdeep_value.split(':') chunksize = int(chunksize) es = elasticsearch.Elasticsearch(['localhost:9200']) query = { 'query': { 'bool': { 'must': [ { 'terms': { 'chunksize': [chunksize, chunksize * 2, int(chunksize / 2)] } }, { 'bool': { 'should': [ { 'match': { 'chunk': { 'query': chunk } } }, { 'match': { 'double_chunk': { 'query': double_chunk } } } ], 'minimum_should_match': 1 } } ] } } } results = es.search('ssdeep-index', body=query) sha256_list_to_return = [] for record in results['hits']['hits']: record_ssdeep = record['_source']['ssdeep'] ssdeep_grade = ssdeep.compare(record_ssdeep, ssdeep_value) if ssdeep_grade >= threshold_grade: sha256_list_to_return.append(record['_source']['sha256']) return sha256_list_to_return
Example #14
Source File: parse_ssdeep.py From android-malware-analysis with GNU General Public License v3.0 | 4 votes |
def main(): all_hashes = {'malicious': [], 'benign': []} app_malicious_map = {} # mapping from android app names to 1 or 0 for malware or goodware similarity_buckets = ['similarity_limit_0', 'similarity_limit_0.2', 'similarity_limit_0.4', 'similarity_limit_0.6', 'similarity_limit_0.8', 'similarity_limit_1.0'] root_dir = os.getcwd() for i, directory in enumerate(['benign_apk', 'malicious_apk']): os.chdir(directory) with open(directory.split('_')[0] + '_apk_ssdeep.csv') as hashes: for j, line in enumerate(hashes): if j == 0: continue b64hash = line.split(',')[0] app_name = line.split(',')[-1].split('/')[-1][:-2] app_malicious_map[app_name] = [1,0] if i else [0,1] all_hashes['malicious' if i else 'benign'].append((app_name, b64hash)) os.chdir(root_dir) all_apps = {} # mapping from each app to its similarity score and classification num_zero = {} num_each = {} for category in all_hashes: num_zero[category] = 0 num_each[category] = 0 for app_and_hash in all_hashes[category]: similarity_scores = [] this_score = app_and_hash[1] for i in range(1000): other_score = random.choice(all_hashes[category])[1] similarity_scores.append(ssdeep.compare(this_score, other_score)) score = numpy.mean(similarity_scores) num_each[category] += 1 if score == 0: num_zero[category] += 1 bit_vector = [] last_limit = -0.01 for limit in similarity_buckets: float_limit = float(limit.split('_')[-1]) if score <= float_limit and score > last_limit: bit_vector.append(1) else: bit_vector.append(0) last_limit = float_limit if not any(bit_vector): # score > 1 bit_vector[-1] = 1 all_apps[app_and_hash[0]] = {'vector': bit_vector, 'malicious': app_malicious_map[app_and_hash[0]]} with open('app_hash_vectors.json', 'w') as outfile: json.dump({'features': similarity_buckets, 'apps': all_apps}, outfile) print('{} of {} malicious apps and {} of {} benign apps had zero similarity found'.format(num_zero['malicious'], num_each['malicious'], num_zero['benign'], num_zero['benign'])) print('Wrote data on ' + str(len(all_apps)) + ' apps to a file.')
Example #15
Source File: ssdeep_python.py From Learning-Python-for-Forensics-Second-Edition with MIT License | 4 votes |
def main(known_file, comparison, output_type): """ The main function handles the main operations of the script :param known_file: path to known file :param comparison: path to look for similar files :param output_type: type of output to provide :return: None """ # Check output formats if output_type not in OUTPUT_OPTS: logger.error( "Unsupported output format '{}' selected. Please " "use one of {}".format( output_type, ", ".join(OUTPUT_OPTS))) sys.exit(2) elif output_type == 'csv': # Special handling for CSV headers print('"similarity","known_file","known_hash",' '"comp_file","comp_hash"') # Check provided file paths known_file = os.path.abspath(known_file) comparison = os.path.abspath(comparison) # Generate ssdeep signature for known file if not os.path.exists(known_file): logger.error("Error - path {} not found".format( comparison)) sys.exit(1) known_hash = ssdeep.hash_from_file(known_file) # Generate and test ssdeep signature for comparison file(s) if os.path.isdir(comparison): # Process files in folders for root, _, files in os.walk(comparison): for f in files: file_entry = os.path.join(root, f) comp_hash = ssdeep.hash_from_file(file_entry) comp_val = ssdeep.compare(known_hash, comp_hash) output(known_file, known_hash, file_entry, comp_hash, comp_val, output_type) elif os.path.isfile(comparison): # Process a single file comp_hash = ssdeep.hash_from_file(comparison) comp_val = ssdeep.compare(known_hash, comp_hash) output(known_file, known_hash, file_entry, comp_hash, comp_val, output_type) else: logger.error("Error - path {} not found".format( comparison)) sys.exit(1)
Example #16
Source File: malfunction.py From Malfunction with GNU Lesser General Public License v2.1 | 4 votes |
def process_sigs(cursor, sig_list, bin_list): """ Process the function signatures Go through every function and compare it to functions in every binary Get the highest score per function and add it to a score_list cursor - the database cursor sig_list - the list of function signatures for analysis bin_list - the list of binaries in the current database to compare to""" score_list = [] maxval = 0 for row in bin_list: cursor.execute("SELECT count(hash) FROM functions WHERE binaryid=?", (row[0],)) maxval += int(cursor.fetchone()[0]) maxval = maxval*len(sig_list) if progressbar: widgets = [" ", progressbar.Bar(marker="#"), " ", progressbar.Percentage(), " ", progressbar.ETA()] pbar = progressbar.ProgressBar(widgets=widgets, maxval=maxval).start() else: pbar = None i = 0 for row in bin_list: function_score_list = [] for sig in sig_list: highest_score = 0 cursor.execute("SELECT hash FROM functions WHERE binaryid=?", (row[0], )) # h means hash, hash is a keyword in Python # so we can't use it for h in cursor.fetchall(): strength = ssdeep.compare(sig, h[0]) if strength > highest_score: highest_score = strength i += 1 if pbar: pbar.update(i) elif i % 10000 == 0 or i == maxval: print("%d / %d Done" % (i, maxval)) function_score_list.append(highest_score) score_list.append(function_score_list) if pbar: pbar.finish() return score_list