You Know, for Search
# momory
# grep MemTotal /proc/meminfo
# processor
# cat /proc/cpuinfo | grep "model name"
# hard disk
# fdisk -l
# fun in a single node
# docker download
docker pull docker.elastic.co/elasticsearch/elasticsearch:7.10.1
# docker run
docker run -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:7.10.1
# check
# open http://ip:9200/
# firewall
firewall-cmd --list-all
Data Importer + Data Browser + Query Explorer + Search Preview
Bring data from your JSON or CSV files to Elasticsearch, and set data mappings with a guided process. Browse your imported data, edit it, and add new fields. Create rich filtered and query views. Export data in JSON and CSV formats. Create search UI and test search relevancy of your dataset with zero lines of code. Export the code to get a functional React web app.
Installation [chrome extension / docker / live-server]
docker pull appbaseio/dejavu
docker run -p 1358:1358 -d appbaseio/dejavu
open http://localhost:1358/
# pip install elasticsearch
from datetime import datetime
from elasticsearch import Elasticsearch
es = Elasticsearch()
doc = {
'author': 'kimchy',
'text': 'Elasticsearch: cool. bonsai cool.',
'timestamp': datetime.now(),
}
res = es.index(index="test-index", id=1, body=doc)
print(res['result'])
res = es.get(index="test-index", id=1)
print(res['_source'])
es.indices.refresh(index="test-index")
res = es.search(index="test-index", body={"query": {"match_all": {}}})
print("Got %d Hits:" % res['hits']['total']['value'])
for hit in res['hits']['hits']:
print("%(timestamp)s %(author)s: %(text)s" % hit["_source"])
over 10000 size:
1. conditional query
2. multiple query
3. config server
- 3.1 curl -XPUT http://127.0.0.1:9200/_settings -d '{ "index" : { "max_result_window" : 100000000}}‘
- 3.2 config/elasticsearch.yml 最后加上index.max_result_window: 100000000,注意在最前面加上空格
print(Elasticsearch(url).search(index=idx,body=bdy,size=sz)['hits']['hits'])
print(Elasticsearch(url).count(index=index_date, q='xxx_id:xxxx'))['count'])
举例统计某个skywalking服务的信息:
#!/usr/bin/env python3
import sys, base64
from elasticsearch import Elasticsearch
from proto.language_agent_v2 import trace_pb2
url_target = "http://xxx:9200"
url_reference = "http://xxx/_cat/indices?v"
idx_service_inventory = "xxx"
idx_interface_inventory = "xxx"
idx_service_prefix = "xxx-"
idx_relation_prefix = "xxx-"
idx_days = [20210101,20210102,20210103,20210104,20210105,20210106,20210107]
bdy_matchall = {"query": {"match_all": {}}}
bdy_scroll = '_scroll_id'
bdy_core = '_source'
bdy = '\n------------------------------'
def gen_date(idx_prefix): return [idx_prefix+str(day) for day in idx_days]
class SkywalkingES:
def __init__(self,url=url_target): self.es = Elasticsearch(url)
def _search(self, idx,bdy=bdy_matchall,sz=10000):
return self.es.search(index=idx,body=bdy,size=sz)['hits']['hits']
def _scroll(self,idx,bdy=bdy_matchall,wait='5m',out='5s'):
a_search = self.es.search(index=idx,body=bdy,scroll='5m',timeout='5s',size=1000)
scrollid = a_search.get(bdy_scroll)
while a_search['hits']['total'] > 0:
a_search = self.es.scroll( scroll_id = scrollid, scroll = '2m' )
print(a_search['hits'],bdy)
print('That was all.')
def _count(self,query_condition,idx):
try: return self.es.count(index=idx, q=query_condition)['count']
except Exception as e: print(e); return -1
def _service_count(self,idx):
print('starting %s ...' % idx)
for service in self._search(idx_service_inventory):
service_name = service[bdy_core]['name'].replace(',','#')
service_sequence = service[bdy_core]['sequence']
service_count = self._count('service_id:'+str(service_sequence), idx)
# if service_count == 0: continue
print("%s,%s,%s" % (service_name, service_sequence, service_count))
def get_service_list(self): return self._search(idx_service_inventory)
def get_service_count(self): return [self._service_count(idx) for idx in gen_date(idx_service_prefix)]
def get_interface_list(self): return self._search(idx_interface_inventory)
def get_service_by_name(self,nm):
sequence = self._search(idx=idx_service_inventory,bdy={"query":{"match":{"name": nm}}})[0][bdy_core]['sequence']
res = []
for i in gen_date(idx_service_prefix):
res.extend(self._search(idx=i,bdy={"query":{"match":{"service_id": sequence}}}))
print(res)
return res
# TODO
# tsp-apm-es-cluster-prd_segment-20210106
def get_data_binary(self):
test = gen_date(idx_service_prefix)[0]
data = self._search(test)
segment_object = trace_pb2.SegmentObject()
for service in data:
core = service[bdy_core]
trace_id = core['trace_id']
# print(trace_id)
segment_object.ParseFromString(base64.b64decode(core['data_binary']))
print(segment_object,bdy)
print(len(data))
# def relation():
# # service
# es = SkywalkingES(); service_list = es.search(idx_service_inventory)
# service_no_ip_address_list = {}
# for service in service_list:
# if service[bdy_core]['is_address'] == 1: continue
# service_no_ip_address_list[service[bdy_core]['sequence']] = service[bdy_core]['name'].replace(',','#')
# # relation
# for index_relation in [index_relation_0,index_relation_2,index_relation_3,index_relation_4,index_relation_5]:
# print(print_mark); relation_list = es.search(index_relation, body_matchall, 10000)
# for relation in relation_list:
# rel = relation[bdy_core]
# source_idx = rel['source_service_id']
# target_idx = rel['dest_service_id']
# try: print('%s,%s' % (service_no_ip_address_list[source_idx],service_no_ip_address_list[target_idx]))
# except: print('%s,%s' % (source_idx, target_idx))
if __name__ == '__main__':
swes = SkywalkingES()
swes._scroll(idx_service_prefix+'20210106')
# swes.get_data_binary()
# swes.get_service_by_name('xxx')
# print(swes.get_service_list())
# print(swes.get_service_count())
# print(swes.get_interface_list())
#
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。