EDT部署指南
应用服务
部署列表
应用 | 服务用途 | 依赖组件 |
---|---|---|
edt-event-data-merge | flink实时写后定时合并数据 | flink hdfs |
edt-event-process-data | 接收kafka数据处理到hive,clickhose,hbase(维度表) | flink clickhose hdfs kafka |
edt-event-process-pipeline | 处理前端上报接口校验,正确和错误的上报数据分别分发到kafka供edt-event-process-data消费,上报接口数据完整存入clickhouse | flink clickhouse zookeeper kafka |
edt-report-api | 获取token, 事件上报 | kafka |
edt-management-service | EDT后台关联系统 | zookeeper,pgsql,clickhouse |
应用部署脚本
edt-event-process-pipeline服务
依赖jar:通过maven打包后上传到指定的目录
启动脚本:
bash /data/software/flink-1.17.1/bin/flink run -c com.chinapex.edt.pipeline.FlinkEventProcessPipelineApp /home/hadoop/project/edt-event-process-pipeline.jar
application.properties
###############################flink配置###############################
#并行数
stream.parallelism=1
#是否开启checkpoint
stream.checkpoint.enable=true
#开启检查点并设置检查点间隔时间,以毫秒为单位
stream.checkpoint.interval=5000
#重启次数
stream.restart.attempts=4
#重启间隔时间,单位ms
stream.delay.between.attempts=60000
#两个Checkpoint 之间最少等待时间,单位ms
stream.min.pause.between.checkpoints=500
#checkpoint的超时时间,单位ms
stream.checkpoint.timeout=600000
#设置同一时间有多少个checkpoint可以同时执行
stream.max.concurrent.checkpoints=1
#设置允许的检查点失败次数
stream.tolerable.checkpoint.failure.number=0
#设置时间窗口间隔,单位秒
stream.window.time=10
###############################flink配置###############################
##Source kafka config #########################################################
nexus.pipeline.kafka.source.bootstrap-servers=10.25.19.207:9092,10.25.19.208:9092
nexus.pipeline.kafka.source.topic=data_buffer_nexus_release_dc_sdk_push_json
nexus.pipeline.kafka.source.consumer.group-id=default
nexus.pipeline.kafka.source.max-poll-records=10000
nexus.pipeline.kafka.source.session.timeout.ms=120000
nexus.pipeline.kafka.source.request.timeout.ms=180000
# latest, earliest, none
nexus.pipeline.kafka.source.auto-offset-reset=latest
##Sink kafka config #########################################################
nexus.pipeline.kafka.sink.bootstrap-servers=10.25.19.207:9092,10.25.19.208:9092
nexus.pipeline.kafka.dc.valid.topic=data_pipeline_valid_release_json
nexus.pipeline.kafka.dc.invalid.topic=data_pipeline_invalid_release_json
nexus.pipeline.kafka.dc.event.distinct.topic=data_pipeline_event_distinct_release_json
#zookeeper
nexus.edt.zookeeper.connect.string=10.25.19.204:2181,10.25.19.205:2181,10.25.19.206:2181
nexus.edt.zookeeper.namespace=edt_event
nexus.edt.zookeeper.session.timeout.ms=60000
nexus.edt.zookeeper.max.close.wait.ms=15000
nexus.edt.zookeeper.base.sleep.time.ms=3000
nexus.edt.zookeeper.max.retries=10
nexus.edt.zookeeper.event.attribute.mapping.path=/event/attribute/mapping
nexus.edt.zookeeper.event.path=/event
#clickhouse
clickhouse.host=10.25.19.208:8123
clickhouse.database=default
clickhouse.username=default
clickhouse.password=123
clickhouse.stream.parallelism=1
sql_clickhouse.sql
CREATE DATABASE IF NOT EXISTS ods_dc;
drop table IF EXISTS ods_dc.ods_event_pipeline_realtime;
CREATE TABLE IF NOT EXISTS ods_dc.ods_event_pipeline_realtime(
apexId Nullable(String),
appId Nullable(String),
appVersion Nullable(String),
content Nullable(String),
createTime DateTime,
ts DateTime,
error Nullable(String),
eventCode Nullable(String),
eventType Nullable(String),
projectId Nullable(String),
sendType Nullable(String),
wechatOpenId Nullable(String),
wechatUnionId Nullable(String),
eventName Nullable(String)
) ENGINE = MergeTree
PARTITION BY ts
ORDER BY (ts,createTime)
COMMENT '事件中心实时查询表';
edt-event-process-data服务
1、edt-event-process-data 启动以及properties脚本
依赖jar:通过maven打包后上传到指定的目录
启动脚本
bash /data/software/flink-1.17.1/bin/flink run -c com.chinapex.edt.data.process.FlinkKafkaConsumerAndProcess /home/hadoop/project/edt-event-process-data.jar
application.properties
##Source kafka config #########################################################
nexus.pipeline.kafka.bootstrap-servers=10.25.19.207:9092,10.25.19.208:9092
nexus.pipeline.kafka.dc.valid.topic=data_pipeline_valid_release_json
nexus.pipeline.kafka.dc.invalid.topic=data_pipeline_invalid_release_json
nexus.pipeline.kafka.consumer.group-id=default
nexus.pipeline.kafka.producer.max-poll-records=10000
nexus.pipeline.kafka.session.timeout.ms=120000
nexus.pipeline.kafka.request.timeout.ms=180000
# latest, earliest, none
nexus.pipeline.kafka.auto-offset-reset=latest
spring.datasource.url=jdbc:postgresql://10.25.19.2:5432/postgres
spring.datasource.username=postgres
spring.datasource.password=123456
spring.datasource.driver-class-name=org.postgresql.Driver
spring.datasource.type=com.zaxxer.hikari.HikariDataSource
spring.datasource.hikari.schema=nexus_edt
mybatis-plus.configuration.map-underscore-to-camel-case=true
mybatis-plus.mapper-locations=classpath:mapper/*.xml
###############################flink配置###############################
#并行数
stream.parallelism=1
#是否开启checkpoint
stream.checkpoint.enable=true
#开启检查点并设置检查点间隔时间,以毫秒为单位
stream.checkpoint.interval=10000
#重启次数
stream.restart.attempts=4
#重启间隔时间,单位ms
stream.delay.between.attempts=60000
#两个Checkpoint 之间最少等待时间,单位ms
stream.min.pause.between.checkpoints=500
#checkpoint的超时时间,单位ms
stream.checkpoint.timeout=600000
#设置同一时间有多少个checkpoint可以同时执行
stream.max.concurrent.checkpoints=3
#设置允许的检查点失败次数
stream.tolerable.checkpoint.failure.number=0
#设置时间窗口间隔,单位秒
stream.window.time=30
#实时指标计算的时间窗口间隔,单位秒
stream.events.metrics.window.time=10
###############################flink配置###############################
#hdfs
hdfs.root.path=hdfs://nn1:8020
raw.events.hdfs.path=/raw/dc/events
raw.events.error.hdfs.path=/raw/dc/events/error
dwd.events.hdfs.path=/user/hive/warehouse/dwd_events
hadoop.user.name=hadoop
#EventsDwd
struct.evetns.dwd=distinct_id:string,ts:string,apex_id:string,event_code:string,tenant_id:string,project_id:string,app_id:string,platform:string,message_id:string,user_id:string,event_name:string,event_type:string,send_type:string,event_code_md5:string,event_duration:string,browse_time:string,device_id:string,device_model:string,device_brand:string,device_type:string,device_uuid:string,carriers:string,network_type:string,android_id:string,imei:string,mac:string,user_agent:string,os:string,idfa:string,idfv:string,oaid:string,latitude:string,longitude:string,wechat_open_id:string,wechat_union_id:string,screen_width:string,screen_height:string,page_height:string,page_width:string,browser_version:string,session_id:string,sdk_version:string,sdk_type:string,app_version:string,app_package_id:string,mini_program_scene:string,market_name:string,referrer_url:string,referrer_host:string,utm_matching_type:string,utm_source:string,utm_medium:string,utm_term:string,utm_content:string,utm_campaign:string,ip:string,province:string,city:string,county:string,country:string,page_type:string,current_page_id:string,current_page_name:string,current_page_url:string,time_since_last_operation:string,current_page_host:string,current_page_query:string,current_page_path:string,source_page_id:string,source_page_name:string,source_page_url:string,source_page_host:string,source_page_query:string,source_page_path:string,page_X:string,page_Y:string,client_X:string,client_Y:string,element_id:string,element_name:string,element_target_url:string,element_type:string,element_content:string,element_selector:string,element_position:string,element_index:string,element_tag_name:string,element_class:string,element_path:string,upload_time:string,write_log_time:string,insert_db_time:string,track_signup_original_id:string,last_visit_time:string,is_first_time:string,forward_user_id:string,extensions_1:string,extensions_2:string,extensions_3:string,extensions_4:string,extensions_5:string,extensions_6:string,extensions_7:string,extensions_8:string,extensions_9:string,extensions_10:string,extensions_11:string,extensions_12:string,extensions_13:string,extensions_14:string,extensions_15:string,extensions_16:string,extensions_17:string,extensions_18:string,extensions_19:string,extensions_20:string,extensions_21:string,extensions_22:string,extensions_23:string,extensions_24:string,extensions_25:string,extensions_26:string,extensions_27:string,extensions_28:string,extensions_29:string,extensions_30:string,extensions_31:string,extensions_32:string,extensions_33:string,extensions_34:string,extensions_35:string,extensions_36:string,extensions_37:string,extensions_38:string,extensions_39:string,extensions_40:string,extensions_41:string,extensions_42:string,extensions_43:string,extensions_44:string,extensions_45:string,extensions_46:string,extensions_47:string,extensions_48:string,extensions_49:string,extensions_50:string,extensions_51:string,extensions_52:string,extensions_53:string,extensions_54:string,extensions_55:string,extensions_56:string,extensions_57:string,extensions_58:string,extensions_59:string,extensions_60:string,extensions_61:string,extensions_62:string,extensions_63:string,extensions_64:string,extensions_65:string,extensions_66:string,extensions_67:string,extensions_68:string,extensions_69:string,extensions_70:string,extensions_71:string,extensions_72:string,extensions_73:string,extensions_74:string,extensions_75:string,extensions_76:string,extensions_77:string,extensions_78:string,extensions_79:string,extensions_80:string,extensions_81:string,extensions_82:string,extensions_83:string,extensions_84:string,extensions_85:string,extensions_86:string,extensions_87:string,extensions_88:string,extensions_89:string,extensions_90:string,extensions_91:string,extensions_92:string,extensions_93:string,extensions_94:string,extensions_95:string,extensions_96:string,extensions_97:string,extensions_98:string,extensions_99:string,extensions_100:string,extensions_101:string,extensions_102:string,extensions_103:string,extensions_104:string,extensions_105:string,extensions_106:string,extensions_107:string,extensions_108:string,extensions_109:string,extensions_110:string,extensions_111:string,extensions_112:string,extensions_113:string,extensions_114:string,extensions_115:string,extensions_116:string,extensions_117:string,extensions_118:string,extensions_119:string,extensions_120:string,extensions_121:string,extensions_122:string,extensions_123:string,extensions_124:string,extensions_125:string,extensions_126:string,extensions_127:string,extensions_128:string,extensions_129:string,extensions_130:string,extensions_131:string,extensions_132:string,extensions_133:string,extensions_134:string,extensions_135:string,extensions_136:string,extensions_137:string,extensions_138:string,extensions_139:string,extensions_140:string,extensions_141:string,extensions_142:string,extensions_143:string,extensions_144:string,extensions_145:string,extensions_146:string,extensions_147:string,extensions_148:string,extensions_149:string,extensions_150:string,extensions_151:string,extensions_152:string,extensions_153:string,extensions_154:string,extensions_155:string,extensions_156:string,extensions_157:string,extensions_158:string,extensions_159:string,extensions_160:string,extensions_161:string,extensions_162:string,extensions_163:string,extensions_164:string,extensions_165:string,extensions_166:string,extensions_167:string,extensions_168:string,extensions_169:string,extensions_170:string,extensions_171:string,extensions_172:string,extensions_173:string,extensions_174:string,extensions_175:string,extensions_176:string,extensions_177:string,extensions_178:string,extensions_179:string,extensions_180:string,extensions_181:string,extensions_182:string,extensions_183:string,extensions_184:string,extensions_185:string,extensions_186:string,extensions_187:string,extensions_188:string,extensions_189:string,extensions_190:string,extensions_191:string,extensions_192:string,extensions_193:string,extensions_194:string,extensions_195:string,extensions_196:string,extensions_197:string,extensions_198:string,extensions_199:string,extensions_200:string,extensions_201:string,extensions_202:string,extensions_203:string,extensions_204:string,extensions_205:string,extensions_206:string,extensions_207:string,extensions_208:string,extensions_209:string,extensions_210:string,extensions_211:string,extensions_212:string,extensions_213:string,extensions_214:string,extensions_215:string,extensions_216:string,extensions_217:string,extensions_218:string,extensions_219:string,extensions_220:string,extensions_221:string,extensions_222:string,extensions_223:string,extensions_224:string,extensions_225:string,extensions_226:string,extensions_227:string,extensions_228:string,extensions_229:string,extensions_230:string,extensions_231:string,extensions_232:string,extensions_233:string,extensions_234:string,extensions_235:string,extensions_236:string,extensions_237:string,extensions_238:string,extensions_239:string,extensions_240:string,extensions_241:string,extensions_242:string,extensions_243:string,extensions_244:string,extensions_245:string,extensions_246:string,extensions_247:string,extensions_248:string,extensions_249:string,extensions_250:string,extensions_251:string,extensions_252:string,extensions_253:string,extensions_254:string,extensions_255:string,extensions_256:string,extensions_257:string,extensions_258:string,extensions_259:string,extensions_260:string,extensions_261:string,extensions_262:string,extensions_263:string,extensions_264:string,extensions_265:string,extensions_266:string,extensions_267:string,extensions_268:string,extensions_269:string,extensions_270:string,extensions_271:string,extensions_272:string,extensions_273:string,extensions_274:string,extensions_275:string,extensions_276:string,extensions_277:string,extensions_278:string,extensions_279:string,extensions_280:string,extensions_281:string,extensions_282:string,extensions_283:string,extensions_284:string,extensions_285:string,extensions_286:string,extensions_287:string,extensions_288:string,extensions_289:string,extensions_290:string,extensions_291:string,extensions_292:string,extensions_293:string,extensions_294:string,extensions_295:string,extensions_296:string,extensions_297:string,extensions_298:string,extensions_299:string,extensions_300:string,extensions_301:string,extensions_302:string,extensions_303:string,extensions_304:string,extensions_305:string,extensions_306:string,extensions_307:string,extensions_308:string,extensions_309:string,extensions_310:string,extensions_311:string,extensions_312:string,extensions_313:string,extensions_314:string,extensions_315:string,extensions_316:string,extensions_317:string,extensions_318:string,extensions_319:string,extensions_320:string,extensions_321:string,extensions_322:string,extensions_323:string,extensions_324:string,extensions_325:string,extensions_326:string,extensions_327:string,extensions_328:string,extensions_329:string,extensions_330:string,extensions_331:string,extensions_332:string,extensions_333:string,extensions_334:string,extensions_335:string,extensions_336:string,extensions_337:string,extensions_338:string,extensions_339:string,extensions_340:string,extensions_341:string,extensions_342:string,extensions_343:string,extensions_344:string,extensions_345:string,extensions_346:string,extensions_347:string,extensions_348:string,extensions_349:string,extensions_350:string,extensions_351:string,extensions_352:string,extensions_353:string,extensions_354:string,extensions_355:string,extensions_356:string,extensions_357:string,extensions_358:string,extensions_359:string,extensions_360:string,extensions_361:string,extensions_362:string,extensions_363:string,extensions_364:string,extensions_365:string,extensions_366:string,extensions_367:string,extensions_368:string,extensions_369:string,extensions_370:string,extensions_371:string,extensions_372:string,extensions_373:string,extensions_374:string,extensions_375:string,extensions_376:string,extensions_377:string,extensions_378:string,extensions_379:string,extensions_380:string,extensions_381:string,extensions_382:string,extensions_383:string,extensions_384:string,extensions_385:string,extensions_386:string,extensions_387:string,extensions_388:string,extensions_389:string,extensions_390:string,extensions_391:string,extensions_392:string,extensions_393:string,extensions_394:string,extensions_395:string,extensions_396:string,extensions_397:string,extensions_398:string,extensions_399:string,extensions_400:string,extensions_401:string,extensions_402:string,extensions_403:string,extensions_404:string,extensions_405:string,extensions_406:string,extensions_407:string,extensions_408:string,extensions_409:string,extensions_410:string,extensions_411:string,extensions_412:string,extensions_413:string,extensions_414:string,extensions_415:string,extensions_416:string,extensions_417:string,extensions_418:string,extensions_419:string,extensions_420:string,extensions_421:string,extensions_422:string,extensions_423:string,extensions_424:string,extensions_425:string,extensions_426:string,extensions_427:string,extensions_428:string,extensions_429:string,extensions_430:string,extensions_431:string,extensions_432:string,extensions_433:string,extensions_434:string,extensions_435:string,extensions_436:string,extensions_437:string,extensions_438:string,extensions_439:string,extensions_440:string,extensions_441:string,extensions_442:string,extensions_443:string,extensions_444:string,extensions_445:string,extensions_446:string,extensions_447:string,extensions_448:string,extensions_449:string,extensions_450:string,extensions_451:string,extensions_452:string,extensions_453:string,extensions_454:string,extensions_455:string,extensions_456:string,extensions_457:string,extensions_458:string,extensions_459:string,extensions_460:string,extensions_461:string,extensions_462:string,extensions_463:string,extensions_464:string,extensions_465:string,extensions_466:string,extensions_467:string,extensions_468:string,extensions_469:string,extensions_470:string,extensions_471:string,extensions_472:string,extensions_473:string,extensions_474:string,extensions_475:string,extensions_476:string,extensions_477:string,extensions_478:string,extensions_479:string,extensions_480:string,extensions_481:string,extensions_482:string,extensions_483:string,extensions_484:string,extensions_485:string,extensions_486:string,extensions_487:string,extensions_488:string,extensions_489:string,extensions_490:string,extensions_491:string,extensions_492:string,extensions_493:string,extensions_494:string,extensions_495:string,extensions_496:string,extensions_497:string,extensions_498:string,extensions_499:string,extensions_500:string,extensions_501:string,extensions_502:string,extensions_503:string,extensions_504:string,extensions_505:string,extensions_506:string,extensions_507:string,extensions_508:string,extensions_509:string,extensions_510:string,extensions_511:string,extensions_512:string,extensions_513:string,extensions_514:string,extensions_515:string,extensions_516:string,extensions_517:string,extensions_518:string,extensions_519:string,extensions_520:string,extensions_521:string,extensions_522:string,extensions_523:string,extensions_524:string,extensions_525:string,extensions_526:string,extensions_527:string,extensions_528:string,extensions_529:string,extensions_530:string,extensions_531:string,extensions_532:string,extensions_533:string,extensions_534:string,extensions_535:string,extensions_536:string,extensions_537:string,extensions_538:string,extensions_539:string,extensions_540:string,extensions_541:string,extensions_542:string,extensions_543:string,extensions_544:string,extensions_545:string,extensions_546:string,extensions_547:string,extensions_548:string,extensions_549:string,extensions_550:string,extensions_551:string,extensions_552:string,extensions_553:string,extensions_554:string,extensions_555:string,extensions_556:string,extensions_557:string,extensions_558:string,extensions_559:string,extensions_560:string,extensions_561:string,extensions_562:string,extensions_563:string,extensions_564:string,extensions_565:string,extensions_566:string,extensions_567:string,extensions_568:string,extensions_569:string,extensions_570:string,extensions_571:string,extensions_572:string,extensions_573:string,extensions_574:string,extensions_575:string,extensions_576:string,extensions_577:string,extensions_578:string,extensions_579:string,extensions_580:string,extensions_581:string,extensions_582:string,extensions_583:string,extensions_584:string,extensions_585:string,extensions_586:string,extensions_587:string,extensions_588:string,extensions_589:string,extensions_590:string,extensions_591:string,extensions_592:string,extensions_593:string,extensions_594:string,extensions_595:string,extensions_596:string,extensions_597:string,extensions_598:string,extensions_599:string,extensions_600:string,extensions_601:string,extensions_602:string,extensions_603:string,extensions_604:string,extensions_605:string,extensions_606:string,extensions_607:string,extensions_608:string,extensions_609:string,extensions_610:string,extensions_611:string,extensions_612:string,extensions_613:string,extensions_614:string,extensions_615:string,extensions_616:string,extensions_617:string,extensions_618:string,extensions_619:string,extensions_620:string,extensions_621:string,extensions_622:string,extensions_623:string,extensions_624:string,extensions_625:string,extensions_626:string,extensions_627:string,extensions_628:string,extensions_629:string,extensions_630:string,extensions_631:string,extensions_632:string,extensions_633:string,extensions_634:string,extensions_635:string,extensions_636:string,extensions_637:string,extensions_638:string,extensions_639:string,extensions_640:string,extensions_641:string,extensions_642:string,extensions_643:string,extensions_644:string,extensions_645:string,extensions_646:string,extensions_647:string,extensions_648:string,extensions_649:string,extensions_650:string,extensions_651:string,extensions_652:string,extensions_653:string,extensions_654:string,extensions_655:string,extensions_656:string,extensions_657:string,extensions_658:string,extensions_659:string,extensions_660:string,extensions_661:string,extensions_662:string,extensions_663:string,extensions_664:string,extensions_665:string,extensions_666:string,extensions_667:string,extensions_668:string,extensions_669:string,extensions_670:string,extensions_671:string,extensions_672:string,extensions_673:string,extensions_674:string,extensions_675:string,extensions_676:string,extensions_677:string,extensions_678:string,extensions_679:string,extensions_680:string,extensions_681:string,extensions_682:string,extensions_683:string,extensions_684:string,extensions_685:string,extensions_686:string,extensions_687:string,extensions_688:string,extensions_689:string,extensions_690:string,extensions_691:string,extensions_692:string,extensions_693:string,extensions_694:string,extensions_695:string,extensions_696:string,extensions_697:string,extensions_698:string,extensions_699:string,extensions_700:string,extensions_701:string,extensions_702:string,extensions_703:string,extensions_704:string,extensions_705:string,extensions_706:string,extensions_707:string,extensions_708:string,extensions_709:string,extensions_710:string,extensions_711:string,extensions_712:string,extensions_713:string,extensions_714:string,extensions_715:string,extensions_716:string,extensions_717:string,extensions_718:string,extensions_719:string,extensions_720:string,extensions_721:string,extensions_722:string,extensions_723:string,extensions_724:string,extensions_725:string,extensions_726:string,extensions_727:string,extensions_728:string,extensions_729:string,extensions_730:string,extensions_731:string,extensions_732:string,extensions_733:string,extensions_734:string,extensions_735:string,extensions_736:string,extensions_737:string,extensions_738:string,extensions_739:string,extensions_740:string,extensions_741:string,extensions_742:string,extensions_743:string,extensions_744:string,extensions_745:string,extensions_746:string,extensions_747:string,extensions_748:string,extensions_749:string,extensions_750:string,extensions_751:string,extensions_752:string,extensions_753:string,extensions_754:string,extensions_755:string,extensions_756:string,extensions_757:string,extensions_758:string,extensions_759:string,extensions_760:string,extensions_761:string,extensions_762:string,extensions_763:string,extensions_764:string,extensions_765:string,extensions_766:string,extensions_767:string,extensions_768:string,extensions_769:string,extensions_770:string,extensions_771:string,extensions_772:string,extensions_773:string,extensions_774:string,extensions_775:string,extensions_776:string,extensions_777:string,extensions_778:string,extensions_779:string,extensions_780:string,extensions_781:string,extensions_782:string,extensions_783:string,extensions_784:string,extensions_785:string,extensions_786:string,extensions_787:string,extensions_788:string,extensions_789:string,extensions_790:string,extensions_791:string,extensions_792:string,extensions_793:string,extensions_794:string,extensions_795:string,extensions_796:string,extensions_797:string,extensions_798:string,extensions_799:string,extensions_800:string,extensions_801:string,extensions_802:string,extensions_803:string,extensions_804:string,extensions_805:string,extensions_806:string,extensions_807:string,extensions_808:string,extensions_809:string,extensions_810:string,extensions_811:string,extensions_812:string,extensions_813:string,extensions_814:string,extensions_815:string,extensions_816:string,extensions_817:string,extensions_818:string,extensions_819:string,extensions_820:string,extensions_821:string,extensions_822:string,extensions_823:string,extensions_824:string,extensions_825:string,extensions_826:string,extensions_827:string,extensions_828:string,extensions_829:string,extensions_830:string,extensions_831:string,extensions_832:string,extensions_833:string,extensions_834:string,extensions_835:string,extensions_836:string,extensions_837:string,extensions_838:string,extensions_839:string,extensions_840:string,extensions_841:string,extensions_842:string,extensions_843:string,extensions_844:string,extensions_845:string,extensions_846:string,extensions_847:string,extensions_848:string,extensions_849:string,extensions_850:string,extensions_851:string,extensions_852:string,extensions_853:string,extensions_854:string,extensions_855:string,extensions_856:string,extensions_857:string,extensions_858:string,extensions_859:string,extensions_860:string,extensions_861:string,extensions_862:string,extensions_863:string,extensions_864:string,extensions_865:string,extensions_866:string,extensions_867:string,extensions_868:string,extensions_869:string,extensions_870:string,extensions_871:string,extensions_872:string,extensions_873:string,extensions_874:string,extensions_875:string,extensions_876:string,extensions_877:string,extensions_878:string,extensions_879:string,extensions_880:string,extensions_881:string,extensions_882:string,extensions_883:string,extensions_884:string,extensions_885:string,extensions_886:string,extensions_887:string,extensions_888:string,extensions_889:string,extensions_890:string,extensions_891:string,extensions_892:string,extensions_893:string,extensions_894:string,extensions_895:string,extensions_896:string,extensions_897:string,extensions_898:string,extensions_899:string,extensions_900:string,extensions_901:string,extensions_902:string,extensions_903:string,extensions_904:string,extensions_905:string,extensions_906:string,extensions_907:string,extensions_908:string,extensions_909:string,extensions_910:string,extensions_911:string,extensions_912:string,extensions_913:string,extensions_914:string,extensions_915:string,extensions_916:string,extensions_917:string,extensions_918:string,extensions_919:string,extensions_920:string,extensions_921:string,extensions_922:string,extensions_923:string,extensions_924:string,extensions_925:string,extensions_926:string,extensions_927:string,extensions_928:string,extensions_929:string,extensions_930:string,extensions_931:string,extensions_932:string,extensions_933:string,extensions_934:string,extensions_935:string,extensions_936:string,extensions_937:string,extensions_938:string,extensions_939:string,extensions_940:string,extensions_941:string,extensions_942:string,extensions_943:string,extensions_944:string,extensions_945:string,extensions_946:string,extensions_947:string,extensions_948:string,extensions_949:string,extensions_950:string,extensions_951:string,extensions_952:string,extensions_953:string,extensions_954:string,extensions_955:string,extensions_956:string,extensions_957:string,extensions_958:string,extensions_959:string,extensions_960:string,extensions_961:string,extensions_962:string,extensions_963:string,extensions_964:string,extensions_965:string,extensions_966:string,extensions_967:string,extensions_968:string,extensions_969:string,extensions_970:string,extensions_971:string,extensions_972:string,extensions_973:string,extensions_974:string,extensions_975:string,extensions_976:string,extensions_977:string,extensions_978:string,extensions_979:string,extensions_980:string,extensions_981:string,extensions_982:string,extensions_983:string,extensions_984:string,extensions_985:string,extensions_986:string,extensions_987:string,extensions_988:string,extensions_989:string,extensions_990:string,extensions_991:string,extensions_992:string,extensions_993:string,extensions_994:string,extensions_995:string,extensions_996:string,extensions_997:string,extensions_998:string,extensions_999:string,extensions_1000:string
#zookeeper
nexus.edt.zookeeper.connect.string=10.25.19.204:2181,10.25.19.205:2181,10.25.19.206:2181
nexus.edt.zookeeper.namespace=edt_event
nexus.edt.zookeeper.session.timeout.ms=60000
nexus.edt.zookeeper.max.close.wait.ms=15000
nexus.edt.zookeeper.base.sleep.time.ms=3000
nexus.edt.zookeeper.max.retries=10
nexus.edt.zookeeper.event.attribute.mapping.path=/event/attribute/mapping
nexus.edt.zookeeper.event.path=/event
# HBase
hbase.zookeeper.quorum=10.25.19.204:2181,10.25.19.205:2181,10.25.19.206:2181
#设置 HBase 客户端在遇到可重试错误时的重试次数
hbase.client.retries.number=3
#指定 HBase Master 的信息端口号。Flink 客户端通过这个端口与 HBase Master 进行通信,获取集群的元数据信息。
hbase.master.info.port=-1
hbase.zookeeper.property.clientPort=2181
#增加 RPC 超时时间可以避免因操作超时导致的写入失败。但需要注意,过长的超时时间可能会导致系统响应变慢。
hbase.rpc.timeout=300000
#用于设置 HBase 客户端操作的超时时间。如果操作在给定的时间内没有完成,客户端将抛出超时异常
hbase.client.operation.timeout=300000
#设置 HBase Scanner 的超时时间。Scanner 用于从 HBase 表中读取数据
hbase.client.scanner.timeout.period=300000
#增加处理客户端请求的线程数,可以提高并发处理能力
#hbase.regionserver.handler.count=5
#调整 RegionServer 的线程池大小,以匹配集群的负载和硬件资源
#hbase.regionserver.thread.pool.size=
#合理设置 MemStore 的大小,避免频繁的 Flush 操作。过大的 MemStore 可能会导致内存溢出,而过小则可能增加磁盘 I/O 负担
#hbase.regionserver.global.memstore.size=
hbase.root.dir=hdfs://nn1:8020/hbase
#用于设置写入缓冲区的数据大小,单位是字节。默认情况下,它的值是2MB 64*1024*1024=67108864
hbase.client.write.buffer=67108864
hbase.flush.size=1000
#单位ms
hbase.flush.interval.millis=5000
#任务并行数,和stream.parallelism的并行数量一起看
valid.kafka.sink.parallelism=1
invalid.kafka.sink.parallelism=1
events.dwd.sink.parallelism=1
events.enum.stream.parallelism=1
events.user.stream.parallelism=1
events.device.stream.parallelism=1
events.location.stream.parallelism=1
events.utm.stream.parallelism=1
events.page.stream.parallelism=1
events.metrics.stream.parallelism=1
#clickhouse
clickhouse.host=10.25.19.208:8123
clickhouse.database=default
clickhouse.username=default
clickhouse.password=123
clickhouse.stream.parallelism=1
#枚举表-hbase
hbase.family.name=cf
platform.table=platform
tenantId.table=tenant_id
projectId.table=project_id
appId.table=app_id
deviceModel.table=device_model
deviceBrand.table=device_brand
deviceType.table=device_type
eventName.table=event_name
eventType.table=event_type
sdkVersion.table=sdk_version
appVersion.table=app_version
browserVersion.table=browser_version
sendType.table=send_type
marketName.table=market_name
utmMatchingType.table=utm_matching_type
utmSource.table=utm_source
eventCode.table=event_code
eventsUser.table=events_user
eventsDevice.table=events_device
eventsLocation.table=events_location
eventsUtm.table=events_utm
eventsPage.table=events_page
eventsMetrics.table=events_metrics
2、edt-event-process-data hdfs类脚本
说明:
raw_events_loop_add_partition.sh 正确埋点数据ODS层分区目录创建
events_loop_add_partition.sh 正确ODS层埋点数据按小时加载
raw_events_loop_add_partition_error.sh 错误埋点数据ODS层分区目录创建
events_loop_add_partition_error.sh 错误ODS层埋点数据按小时加载
dwd_events_loop_add_partition.sh 正确的埋点数据DWD层分区目录
dwd_events_loop_add_partition_sql.sh 正确的埋点数据DWD层分区按小时加载
创建目录示例:
nohup bash /home/hadoop/chenglei_shell/dwd_events_loop_add_partition.sh >> /home/hadoop/chenglei_shell/dwd_events_loop_add_partition.log &
dwd_events_loop_add_partition_sql.sh 正确DWD层埋点数据按小时创建生成创建表SQL文件,然后去hive执行生成的文件,例如:
生成创建分区的SQL语句
nohup bash /home/hadoop/chenglei_shell/dwd_events_loop_add_partition_sql.sh >> /home/hadoop/chenglei_shell/dwd_events_loop_add_partition_sql.sql &
执行hive命令:
hive -f dwd_events_loop_add_partition_sql.sql
yr变量指当前年份
pth:hive表数据的hdfs存储地址
raw_events_loop_add_partition.sh
#!/bin/bash
yr=2024
for m in `seq -w 3 12`
do
for d in `seq -w 1 31`
do
for h in `seq -w 0 23`
do
pth="/raw/dc/events/dt=${yr}${m}${d}/hour=${h}"
echo $pth
hdfs dfs -mkdir -p "$pth"
done
done
done
events_loop_add_partition.sh
#!/bin/bash
yr=2024
for m in `seq -w 3 12`
do
for d in `seq -w 1 31`
do
for h in `seq -w 0 23`
do
pth="/raw/dc/events/dt=${yr}${m}${d}/hour=${h}"
#echo $pth
echo "alter table ods_dc.ods_events add partition(dt='${yr}${m}${d}',hour='${h}') location '$pth';"
done
done
done
raw_events_loop_add_partition_error.sh
#!/bin/bash
yr=2024
for m in `seq -w 3 12`
do
for d in `seq -w 1 31`
do
for h in `seq -w 0 23`
do
pth="/raw/dc/events/error/dt=${yr}${m}${d}/hour=${h}"
echo $pth
hdfs dfs -mkdir -p "$pth"
done
done
done
events_loop_add_partition_error.sh
#!/bin/bash
yr=2024
for m in `seq -w 3 12`
do
for d in `seq -w 1 31`
do
for h in `seq -w 0 23`
do
pth="/raw/dc/events/error/dt=${yr}${m}${d}/hour=${h}"
#echo $pth
echo "alter table ods_dc.ods_events_error add partition(dt='${yr}${m}${d}',hour='${h}') location '$pth';"
done
done
done
dwd_events_loop_add_partition.sh
#!/bin/bash
yr=2024
for m in `seq -w 3 12`
do
for d in `seq -w 1 31`
do
for h in `seq -w 0 23`
do
pth="/user/hive/warehouse/dwd_events/dt=${yr}${m}${d}/hour=${h}"
echo $pth
hdfs dfs -mkdir -p "$pth"
done
done
done
dwd_events_loop_add_partition_sql.sh
#!/bin/bash
yr=2024
for m in `seq -w 3 12`
do
for d in `seq -w 1 31`
do
for h in `seq -w 0 23`
do
pth="/user/hive/warehouse/dwd_events/dt=${yr}${m}${d}/hour=${h}"
#echo $pth
echo "alter table dwd_dc.dwd_events add partition(dt='${yr}${m}${d}',hour='${h}') location '$pth';"
done
done
done
3、edt-evemt-process-data clickhouse相关脚本
说明:
clickhouse任务的定时脚本.txt,放到crontab表达式中
dwd开头的shell脚本放到clickhouse任务表达式下配置的目录下
conf.sh是配置Clickhouse的账户脚本
conf.sh
#!/bin/bash
source /home/hadoop/.bash_profile
ck_login='clickhouse-client --user default --password 123'
clickhouse任务的定时脚本.txt
#每5分钟更新 流量概览表按天 dwd_event_attribute_info
*/5 * * * * /usr/bin/sh /home/hadoop/shell_code/dwd_event_attribute_info.sh >> /home/hadoop/shell_code/logs/dwd_event_attribute_info.log 2>&1
#每5分钟更新 流量概览表按天 dwd_flow_summary_bydate
*/5 * * * * /usr/bin/sh /home/hadoop/shell_code/dwd_flow_summary_bydate.sh >> /home/hadoop/shell_code/logs/dwd_flow_summary_bydate.log 2>&1
#每5分钟更新 流量概览表按小时 dwd_flow_summary_byhour
*/5 * * * * /usr/bin/sh /home/hadoop/shell_code/dwd_flow_summary_byhour.sh >> /home/hadoop/shell_code/logs/dwd_flow_summary_byhour.log 2>&1
#每5分钟更新 新老访客统计表 dwd_newolduser_summary_date
*/5 * * * * /usr/bin/sh /home/hadoop/shell_code/dwd_newolduser_summary_date.sh >> /home/hadoop/shell_code/logs/dwd_newolduser_summary_date.log 2>&1
#每5分钟更新 受访页面统计表 dwd_visituri_summary_bydate
*/5 * * * * /usr/bin/sh /home/hadoop/shell_code/dwd_visituri_summary_bydate.sh >> /home/hadoop/shell_code/logs/dwd_visituri_summary_bydate.log 2>&1
#每5分钟更新 来源网站统计表 dwd_source_site_summary_bydate
*/5 * * * * /usr/bin/sh /home/hadoop/shell_code/dwd_source_site_summary_bydate.sh >> /home/hadoop/shell_code/logs/dwd_source_site_summary_bydate.log 2>&1
#每5分钟更新 地域统计表 dwd_area_summary_bydate
*/5 * * * * /usr/bin/sh /home/hadoop/shell_code/dwd_area_summary_bydate.sh >> /home/hadoop/shell_code/logs/dwd_area_summary_bydate.log 2>&1
#每5分钟更新 设备统计表 dwd_device_summary_bydate
*/5 * * * * /usr/bin/sh /home/hadoop/shell_code/dwd_device_summary_bydate.sh >> /home/hadoop/shell_code/logs/dwd_device_summary_bydate.log 2>&1
#每5分钟更新 用户访问次数分布表 dwd_user_visit_bydate
*/5 * * * * /usr/bin/sh /home/hadoop/shell_code/dwd_user_visit_bydate.sh >> /home/hadoop/shell_code/logs/dwd_user_visit_bydate.log 2>&1
#每5分钟更新 用户访问页数分布表 dwd_user_visitpage_bydate
*/5 * * * * /usr/bin/sh /home/hadoop/shell_code/dwd_user_visitpage_bydate.sh >> /home/hadoop/shell_code/logs/dwd_user_visitpage_bydate.log 2>&1
#每5分钟更新 用户访问时长分布表 dwd_user_visittime_bydate
*/5 * * * * /usr/bin/sh /home/hadoop/shell_code/dwd_user_visittime_bydate.sh >> /home/hadoop/shell_code/logs/dwd_user_visittime_bydate.log 2>&1
#每5分钟更新 新老访客生命周期统计 dwd_visitor_life_bydate
*/5 * * * * /usr/bin/sh /home/hadoop/shell_code/dwd_visitor_life_bydate.sh >> /home/hadoop/shell_code/logs/dwd_visitor_life_bydate.log 2>&1
dwd_area_summary_bydate.sh
#!/bin/bash
# 地域统计表 dwd_area_summary_bydate
source /home/hadoop/shell_code/conf.sh
curdir=$(cd $(dirname $0);pwd)
cf=$curdir/$0
dt=`date -d "-1 days" +%Y-%m-%d `
[ $# -eq 1 ] && dt=$1
echo $ck_login
echo $curdir $cf $dt
sql="
insert into dwd_dc.dwd_area_summary_bydate
select t1.dt,
t1.tenant_id,
t1.project_id,
t1.app_id,
t1.platform,
t1.country,
t1.province,
t1.pv,
t2.visit_num,
t1.uv,
t1.ip_num,
t3.visit_time,
t4.bounce_num,
multiIf(t5.is_first_time='true','是','否') as is_new_user,
t1.users,
toTimeZone(now(), 'Asia/Shanghai') as update_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
multiIf(country is null, '空',country = '', '空',country = 'N/A', '空',country) as country,
multiIf(province is null, '空',province = '', '空',province = 'N/A', '空',province) as province,
count(distinct_id) as pv,
COUNTDistinct(distinct_id) as uv,
COUNTDistinct(ip) as ip_num,
COUNTDistinct(user_id) as users,
toTimeZone(now(), 'Asia/Shanghai') as update_time
from dwd_dc.dwd_ck_events where dt>='$dt'
group by dt,tenant_id,project_id,app_id,platform,country,province
) t1
left join (
select
dt,
tenant_id,
project_id,
app_id,
platform,
country,
province,
sum(is_new_visit) as visit_num
from (
select dt,tenant_id,project_id,app_id,platform,country,province,
distinct_id,
real_time,
if(
prev_real_time IS NULL OR
toUInt64(real_time) - toUInt64(prev_real_time) > 30 * 60,
1,
0
) as is_new_visit
from (
select dt,tenant_id,project_id,app_id,platform,country,province,
distinct_id,
real_time,
any(real_time) OVER (PARTITION BY dt,distinct_id ORDER BY real_time Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS prev_real_time
from (
select dt,tenant_id,project_id,app_id,platform,
multiIf(country is null, '空',country = '', '空',country = 'N/A', '空',country) as country,
multiIf(province is null, '空',province = '', '空',province = 'N/A', '空',province) as province,
distinct_id,
toDateTime(toInt64(upload_time) / 1000) AS real_time
from dwd_dc.dwd_ck_events where dt>='$dt'
)
)
where is_new_visit = 1
)
group by dt,tenant_id,project_id,app_id,platform,country,province
) t2 on t1.dt=t2.dt and t1.tenant_id=t2.tenant_id and t1.project_id=t2.project_id and t1.app_id=t2.app_id
and t1.platform=t2.platform and t1.country=t2.country and t1.province=t2.province
left join (
select
a1.dt,
a1.tenant_id,
a1.project_id,
a1.app_id,
a1.platform,
a1.country,
a1.province,
sum(a1.visit_time) as visit_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
session_id,
multiIf(country is null, '空',country = '', '空',country = 'N/A', '空',country) as country,
multiIf(province is null, '空',province = '', '空',province = 'N/A', '空',province) as province,
max(toInt64(upload_time))-min(toInt64(upload_time)) as visit_time
from dwd_dc.dwd_ck_events where dt>='$dt' and session_id<>''
group by dt,tenant_id,project_id,app_id,platform,session_id,country,province
) a1 group by a1.dt,a1.tenant_id,a1.project_id,a1.app_id,a1.platform,a1.country,a1.province
) t3 on t1.dt=t3.dt and t1.tenant_id=t3.tenant_id and t1.project_id=t3.project_id and t1.app_id=t3.app_id
and t1.platform=t3.platform and t1.country=t3.country and t1.province=t3.province
left join (
select dt,tenant_id,project_id,app_id,platform,country,province,sum(multiIf(pv=1 ,1,0)) as bounce_num from (
select dt,tenant_id,project_id,app_id,platform,
multiIf(country is null, '空',country = '', '空',country = 'N/A', '空',country) as country,
multiIf(province is null, '空',province = '', '空',province = 'N/A', '空',province) as province,
count(distinct_id) pv from dwd_dc.dwd_ck_events where dt>='$dt'
group by dt,tenant_id,project_id,app_id,platform,country,province
) group by dt,tenant_id,project_id,app_id,platform,country,province
) t4 on t1.dt=t4.dt and t1.tenant_id=t4.tenant_id and t1.project_id=t4.project_id and t1.app_id=t4.app_id and t1.platform=t4.platform
and t1.country=t4.country and t1.province=t4.province
left join (
select distinct dt,tenant_id,project_id,app_id,platform,
multiIf(country is null, '空',country = '', '空',country = 'N/A', '空',country) as country,
multiIf(province is null, '空',province = '', '空',province = 'N/A', '空',province) as province,
is_first_time from dwd_dc.dwd_ck_events where dt>='$dt' and distinct_id is not null
) t5 on t1.dt=t5.dt and t1.tenant_id=t5.tenant_id and t1.project_id=t5.project_id and t1.app_id=t5.app_id and t1.platform=t5.platform
and t1.country=t5.country and t1.province=t5.province;
"
echo "${sql}"
$ck_login --query="${sql}"
sql1="OPTIMIZE TABLE dwd_dc.dwd_area_summary_bydate FINAL SETTINGS optimize_skip_merged_partitions=1;"
echo "${sql1}"
$ck_login --query="${sql1}"
dwd_device_summary_bydate.sh
#!/bin/bash
# 设备统计表 dwd_device_summary_bydate
source /home/hadoop/shell_code/conf.sh
curdir=$(cd $(dirname $0);pwd)
cf=$curdir/$0
dt=`date -d "-1 days" +%Y-%m-%d `
[ $# -eq 1 ] && dt=$1
echo $ck_login
echo $curdir $cf $dt
sql="
insert into dwd_dc.dwd_device_summary_bydate
select t1.dt,
t1.tenant_id,
t1.project_id,
t1.app_id,
t1.platform,
t1.device_model,
t1.device_brand,
t1.pv,
t2.visit_num,
t1.uv,
t1.ip_num,
t3.visit_time,
t4.bounce_num,
multiIf(t5.is_first_time='true','是','否') as is_new_user,
t1.users,
toTimeZone(now(), 'Asia/Shanghai') as update_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
multiIf(device_model is null, '空',device_model = '', '空',device_model = 'N/A', '空',device_model) as device_model,
multiIf(device_brand is null, '空',device_brand = '', '空',device_brand = 'N/A', '空',device_brand) as device_brand,
count(distinct_id) as pv,
COUNTDistinct(distinct_id) as uv,
COUNTDistinct(ip) as ip_num,
COUNTDistinct(user_id) as users
from dwd_dc.dwd_ck_events where dt>='$dt'
group by dt,tenant_id,project_id,app_id,platform,device_model,device_brand
) t1
left join (
select
dt,
tenant_id,
project_id,
app_id,
platform,
device_model,
device_brand,
sum(is_new_visit) as visit_num
from (
select dt,tenant_id,project_id,app_id,platform,device_model,device_brand,
distinct_id,
real_time,
if(
prev_real_time IS NULL OR
toUInt64(real_time) - toUInt64(prev_real_time) > 30 * 60,
1,
0
) as is_new_visit
from (
select dt,tenant_id,project_id,app_id,platform,device_model,device_brand,
distinct_id,
real_time,
any(real_time) OVER (PARTITION BY dt,distinct_id ORDER BY real_time Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS prev_real_time
from (
select dt,tenant_id,project_id,app_id,platform,
multiIf(device_model is null, '空',device_model = '', '空',device_model = 'N/A', '空',device_model) as device_model,
multiIf(device_brand is null, '空',device_brand = '', '空',device_brand = 'N/A', '空',device_brand) as device_brand,
distinct_id,
toDateTime(toInt64(upload_time) / 1000) AS real_time
from dwd_dc.dwd_ck_events where dt>='$dt'
)
)
where is_new_visit = 1
)
group by dt,tenant_id,project_id,app_id,platform,device_model,device_brand
) t2 on t1.dt=t2.dt and t1.tenant_id=t2.tenant_id and t1.project_id=t2.project_id and t1.app_id=t2.app_id
and t1.platform=t2.platform and t1.device_model=t2.device_model and t1.device_brand=t2.device_brand
left join (
select
a1.dt,
a1.tenant_id,
a1.project_id,
a1.app_id,
a1.platform,
a1.device_model,
a1.device_brand,
sum(a1.visit_time) as visit_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
session_id,
multiIf(device_model is null, '空',device_model = '', '空',device_model = 'N/A', '空',device_model) as device_model,
multiIf(device_brand is null, '空',device_brand = '', '空',device_brand = 'N/A', '空',device_brand) as device_brand,
max(toInt64(upload_time))-min(toInt64(upload_time)) as visit_time
from dwd_dc.dwd_ck_events where dt>='$dt' and session_id<>''
group by dt,tenant_id,project_id,app_id,platform,session_id,device_model,device_brand
) a1 group by a1.dt,a1.tenant_id,a1.project_id,a1.app_id,a1.platform,a1.device_model,a1.device_brand
) t3 on t1.dt=t3.dt and t1.tenant_id=t3.tenant_id and t1.project_id=t3.project_id and t1.app_id=t3.app_id
and t1.platform=t3.platform and t1.device_model=t3.device_model and t1.device_brand=t3.device_brand
left join (
select dt,tenant_id,project_id,app_id,platform,device_model,device_brand,sum(multiIf(pv=1 ,1,0)) as bounce_num from (
select dt,tenant_id,project_id,app_id,platform,
multiIf(device_model is null, '空',device_model = '', '空',device_model = 'N/A', '空',device_model) as device_model,
multiIf(device_brand is null, '空',device_brand = '', '空',device_brand = 'N/A', '空',device_brand) as device_brand,
count(distinct_id) pv from dwd_dc.dwd_ck_events where dt>='$dt'
group by dt,tenant_id,project_id,app_id,platform,device_model,device_brand
) group by dt,tenant_id,project_id,app_id,platform,device_model,device_brand
) t4 on t1.dt=t4.dt and t1.tenant_id=t4.tenant_id and t1.project_id=t4.project_id and t1.app_id=t4.app_id and t1.platform=t4.platform
and t1.device_model=t4.device_model and t1.device_brand=t4.device_brand
left join (
select distinct dt,tenant_id,project_id,app_id,platform,
multiIf(device_model is null, '空',device_model = '', '空',device_model = 'N/A', '空',device_model) as device_model,
multiIf(device_brand is null, '空',device_brand = '', '空',device_brand = 'N/A', '空',device_brand) as device_brand,
is_first_time from dwd_dc.dwd_ck_events where dt>='$dt' and distinct_id is not null
) t5 on t1.dt=t5.dt and t1.tenant_id=t5.tenant_id and t1.project_id=t5.project_id and t1.app_id=t5.app_id and t1.platform=t5.platform
and t1.device_model=t5.device_model and t1.device_brand=t5.device_brand;
"
echo "${sql}"
$ck_login --query="${sql}"
sql1="OPTIMIZE TABLE dwd_dc.dwd_device_summary_bydate FINAL SETTINGS optimize_skip_merged_partitions=1;"
echo "${sql1}"
$ck_login --query="${sql1}"
dwd_event_attribute_info.sh
#!/bin/bash
# 事件属性信息 dwd_event_attribute_info
source /home/hadoop/shell_code/conf.sh
curdir=$(cd $(dirname $0);pwd)
cf=$curdir/$0
dt=`date -d "-1 days" +%Y-%m-%d `
[ $# -eq 1 ] && dt=$1
echo $ck_login
echo $curdir $cf $dt
sql="
insert into dwd_dc.dwd_event_attribute_info
select 'platform' as option_name,platform as option_value,toTimeZone(now(), 'Asia/Shanghai') as update_time from dwd_dc.dwd_ck_events where dt>='$dt' group by platform
union all
select 'app_package_id' as option_name,app_package_id as option_value,toTimeZone(now(), 'Asia/Shanghai') as update_time from dwd_dc.dwd_ck_events where dt>='$dt' group by app_package_id
union all
select 'app_version' as option_name,app_version as option_value,toTimeZone(now(), 'Asia/Shanghai') as update_time from dwd_dc.dwd_ck_events where dt>='$dt' group by app_version
union all
select 'market_name' as option_name,market_name as option_value,toTimeZone(now(), 'Asia/Shanghai') as update_time from dwd_dc.dwd_ck_events where dt>='$dt' group by market_name
union all
select 'device_type' as option_name,device_type as option_value,toTimeZone(now(), 'Asia/Shanghai') as update_time from dwd_dc.dwd_ck_events where dt>='$dt' group by device_type
union all
select 'device_brand' as option_name,device_brand as option_value,toTimeZone(now(), 'Asia/Shanghai') as update_time from dwd_dc.dwd_ck_events where dt>='$dt' group by device_brand
union all
select 'device_model' as option_name,device_model as option_value,toTimeZone(now(), 'Asia/Shanghai') as update_time from dwd_dc.dwd_ck_events where dt>='$dt' group by device_model
union all
select 'os' as option_name,os as option_value,toTimeZone(now(), 'Asia/Shanghai') as update_time from dwd_dc.dwd_ck_events where dt>='$dt' group by option_value
union all
select 'carriers' as option_name,carriers as option_value,toTimeZone(now(), 'Asia/Shanghai') as update_time from dwd_dc.dwd_ck_events where dt>='$dt' group by carriers
union all
select 'user_agent' as option_name,user_agent as option_value,toTimeZone(now(), 'Asia/Shanghai') as update_time from dwd_dc.dwd_ck_events where dt>='$dt' group by user_agent
;
"
echo "${sql}"
$ck_login --query="${sql}"
sql1="OPTIMIZE TABLE dwd_dc.dwd_event_attribute_info FINAL SETTINGS optimize_skip_merged_partitions=1;"
echo "${sql1}"
$ck_login --query="${sql1}"
dwd_flow_summary_bydate.sh
#!/bin/bash
# 流量概览 dwd_flow_summary_bydate
source /home/hadoop/shell_code/conf.sh
curdir=$(cd $(dirname $0);pwd)
cf=$curdir/$0
dt=`date -d "-1 days" +%Y-%m-%d `
[ $# -eq 1 ] && dt=$1
echo $ck_login
echo $curdir $cf $dt
sql="
insert into dwd_dc.dwd_flow_summary_bydate
select t1.dt,
t1.tenant_id,
t1.project_id,
t1.app_id,
t1.platform,
t1.is_new_user,
t1.is_user_login,
t1.pv,
t2.visit_num,
t1.uv,
t1.ip_num,
t3.visit_time,
t4.bounce_num,
t1.users,
toTimeZone(now(), 'Asia/Shanghai') as update_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
multiIf(is_first_time='true','是','否') as is_new_user,
multiIf(user_id is not null,'是','否') as is_user_login,
count(distinct_id) as pv,
COUNTDistinct(distinct_id) as uv,
COUNTDistinct(ip) as ip_num,
COUNTDistinct(user_id) as users
from dwd_dc.dwd_ck_events where dt>='$dt'
group by dt,tenant_id,project_id,app_id,platform,
multiIf(is_first_time='true','是','否'),
multiIf(user_id is not null,'是','否')
) t1
left join (
select
dt,
tenant_id,
project_id,
app_id,
platform,
sum(is_new_visit) as visit_num
from (
select dt,tenant_id,project_id,app_id,platform,
distinct_id,
real_time,
if(
prev_real_time IS NULL OR
toUInt64(real_time) - toUInt64(prev_real_time) > 30 * 60,
1,
0
) as is_new_visit
from (
select dt,tenant_id,project_id,app_id,platform,
distinct_id,
real_time,
any(real_time) OVER (PARTITION BY dt,distinct_id ORDER BY real_time Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS prev_real_time
from (
select dt,tenant_id,project_id,app_id,platform,
distinct_id,
toDateTime(toInt64(upload_time) / 1000) AS real_time
from dwd_dc.dwd_ck_events where dt>='$dt'
)
)
where is_new_visit = 1
)
group by dt,tenant_id,project_id,app_id,platform
) t2 on t1.dt=t2.dt and t1.tenant_id=t2.tenant_id and t1.project_id=t2.project_id and t1.app_id=t2.app_id and t1.platform=t2.platform
left join (
select
a1.dt,
a1.tenant_id,
a1.project_id,
a1.app_id,
a1.platform,
sum(a1.visit_time) as visit_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
session_id,
max(toInt64(upload_time))-min(toInt64(upload_time)) as visit_time
from dwd_dc.dwd_ck_events where dt>='$dt' and session_id<>''
group by dt,tenant_id,project_id,app_id,platform,session_id
) a1 group by a1.dt,a1.tenant_id,a1.project_id,a1.app_id,a1.platform
) t3 on t1.dt=t3.dt and t1.tenant_id=t3.tenant_id and t1.project_id=t3.project_id and t1.app_id=t3.app_id and t1.platform=t3.platform
left join (
select dt,tenant_id,project_id,app_id,platform,sum(multiIf(pv=1 ,1,0)) as bounce_num from (
select dt,tenant_id,project_id,app_id,platform,count(distinct_id) pv from dwd_dc.dwd_ck_events where dt>='$dt' group by dt,tenant_id,project_id,app_id,platform
) group by dt,tenant_id,project_id,app_id,platform
) t4 on t1.dt=t4.dt and t1.tenant_id=t4.tenant_id and t1.project_id=t4.project_id and t1.app_id=t4.app_id and t1.platform=t4.platform
;
"
echo "${sql}"
$ck_login --query="${sql}"
sql1="OPTIMIZE TABLE dwd_dc.dwd_flow_summary_bydate FINAL SETTINGS optimize_skip_merged_partitions=1;"
echo "${sql1}"
$ck_login --query="${sql1}"
dwd_flow_summary_byhour.sh
#!/bin/bash
# 流量趋势表 dwd_flow_summary_byhour
source /home/hadoop/shell_code/conf.sh
curdir=$(cd $(dirname $0);pwd)
cf=$curdir/$0
dt=`date -d "-1 days" +%Y-%m-%d `
[ $# -eq 1 ] && dt=$1
echo $ck_login
echo $curdir $cf $dt
sql="
insert into dwd_dc.dwd_flow_summary_byhour
select t1.dt,
t1.stat_hour,
t1.tenant_id,
t1.project_id,
t1.app_id,
t1.platform,
t1.is_new_user,
t1.is_user_login,
t1.pv,
t2.visit_num,
t1.uv,
t1.ip_num,
t3.visit_time,
t4.bounce_num,
t1.users,
toTimeZone(now(), 'Asia/Shanghai') as update_time
from (
select
dt,
stat_hour,
tenant_id,
project_id,
app_id,
platform,
multiIf(is_first_time='true','是','否') as is_new_user,
multiIf(user_id is not null,'是','否') as is_user_login,
count(distinct_id) as pv,
COUNTDistinct(distinct_id) as uv,
COUNTDistinct(ip) as ip_num,
COUNTDistinct(user_id) as users
from dwd_dc.dwd_ck_events where dt>='$dt'
group by dt,stat_hour,tenant_id,project_id,app_id,platform,
multiIf(is_first_time='true','是','否'),
multiIf(user_id is not null,'是','否')
) t1
left join (
select
dt,
stat_hour,
tenant_id,
project_id,
app_id,
platform,
sum(is_new_visit) as visit_num
from (
select dt,stat_hour,tenant_id,project_id,app_id,platform,
distinct_id,
real_time,
if(
prev_real_time IS NULL OR
toUInt64(real_time) - toUInt64(prev_real_time) > 30 * 60,
1,
0
) as is_new_visit
from (
select dt,stat_hour,tenant_id,project_id,app_id,platform,
distinct_id,
real_time,
any(real_time) OVER (PARTITION BY dt,distinct_id ORDER BY real_time Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS prev_real_time
from (
select dt,stat_hour,tenant_id,project_id,app_id,platform,
distinct_id,
toDateTime(toInt64(upload_time) / 1000) AS real_time
from dwd_dc.dwd_ck_events where dt>='$dt'
)
)
where is_new_visit = 1
)
group by dt,stat_hour,tenant_id,project_id,app_id,platform
) t2 on t1.dt=t2.dt and t1.stat_hour=t2.stat_hour and t1.tenant_id=t2.tenant_id and t1.project_id=t2.project_id and t1.app_id=t2.app_id and t1.platform=t2.platform
left join (
select
a1.dt,
a1.stat_hour,
a1.tenant_id,
a1.project_id,
a1.app_id,
a1.platform,
sum(a1.visit_time) as visit_time
from (
select
dt,
stat_hour,
tenant_id,
project_id,
app_id,
platform,
session_id,
max(toInt64(upload_time))-min(toInt64(upload_time)) as visit_time
from dwd_dc.dwd_ck_events where dt>='$dt' and session_id<>''
group by dt,stat_hour,tenant_id,project_id,app_id,platform,session_id
) a1 group by a1.dt,a1.stat_hour,a1.tenant_id,a1.project_id,a1.app_id,a1.platform
) t3 on t1.dt=t3.dt and t1.stat_hour=t3.stat_hour and t1.tenant_id=t3.tenant_id and t1.project_id=t3.project_id and t1.app_id=t3.app_id and t1.platform=t3.platform
left join (
select dt,stat_hour,tenant_id,project_id,app_id,platform,sum(multiIf(pv=1 ,1,0)) as bounce_num from (
select dt,stat_hour,tenant_id,project_id,app_id,platform,count(distinct_id) pv from dwd_dc.dwd_ck_events where dt>='$dt' group by dt,stat_hour,tenant_id,project_id,app_id,platform
) group by dt,stat_hour,tenant_id,project_id,app_id,platform
) t4 on t1.dt=t4.dt and t1.stat_hour=t4.stat_hour and t1.tenant_id=t4.tenant_id and t1.project_id=t4.project_id and t1.app_id=t4.app_id and t1.platform=t4.platform
;
"
echo "${sql}"
$ck_login --query="${sql}"
sql1="OPTIMIZE TABLE dwd_dc.dwd_flow_summary_byhour FINAL SETTINGS optimize_skip_merged_partitions=1;"
echo "${sql1}"
$ck_login --query="${sql1}"
dwd_newolduser_summary_date.sh
#!/bin/bash
# 新老访客统计表 dwd_newolduser_summary_date
source /home/hadoop/shell_code/conf.sh
curdir=$(cd $(dirname $0);pwd)
cf=$curdir/$0
dt=`date -d "-1 days" +%Y-%m-%d `
[ $# -eq 1 ] && dt=$1
echo $ck_login
echo $curdir $cf $dt
sql="
insert into dwd_dc.dwd_newolduser_summary_date
select t1.dt,
t1.tenant_id,
t1.project_id,
t1.app_id,
t1.platform,
t1.pv,
t2.visit_num,
t1.uv,
t1.ip_num,
t3.visit_time,
t4.bounce_num,
multiIf(t5.is_first_time='true','是','否') as is_new_user,
t1.users,
toTimeZone(now(), 'Asia/Shanghai') as update_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
count(distinct_id) as pv,
COUNTDistinct(distinct_id) as uv,
COUNTDistinct(ip) as ip_num,
COUNTDistinct(user_id) as users,
toTimeZone(now(), 'Asia/Shanghai') as update_time
from dwd_dc.dwd_ck_events where dt>='$dt'
group by dt,tenant_id,project_id,app_id,platform
) t1
left join (
select
dt,
tenant_id,
project_id,
app_id,
platform,
sum(is_new_visit) as visit_num
from (
select dt,tenant_id,project_id,app_id,platform,
distinct_id,
real_time,
if(
prev_real_time IS NULL OR
toUInt64(real_time) - toUInt64(prev_real_time) > 30 * 60,
1,
0
) as is_new_visit
from (
select dt,tenant_id,project_id,app_id,platform,
distinct_id,
real_time,
any(real_time) OVER (PARTITION BY dt,distinct_id ORDER BY real_time Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS prev_real_time
from (
select dt,tenant_id,project_id,app_id,platform,
distinct_id,
toDateTime(toInt64(upload_time) / 1000) AS real_time
from dwd_dc.dwd_ck_events where dt>='$dt'
)
)
where is_new_visit = 1
)
group by dt,tenant_id,project_id,app_id,platform
) t2 on t1.dt=t2.dt and t1.tenant_id=t2.tenant_id and t1.project_id=t2.project_id and t1.app_id=t2.app_id
and t1.platform=t2.platform
left join (
select
a1.dt,
a1.tenant_id,
a1.project_id,
a1.app_id,
a1.platform,
sum(a1.visit_time) as visit_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
session_id,
max(toInt64(upload_time))-min(toInt64(upload_time)) as visit_time
from dwd_dc.dwd_ck_events where dt>='$dt' and session_id<>''
group by dt,tenant_id,project_id,app_id,platform,session_id
) a1 group by a1.dt,a1.tenant_id,a1.project_id,a1.app_id,a1.platform
) t3 on t1.dt=t3.dt and t1.tenant_id=t3.tenant_id and t1.project_id=t3.project_id and t1.app_id=t3.app_id
and t1.platform=t3.platform
left join (
select dt,tenant_id,project_id,app_id,platform,sum(multiIf(pv=1 ,1,0)) as bounce_num from (
select dt,tenant_id,project_id,app_id,platform,
count(distinct_id) pv from dwd_dc.dwd_ck_events where dt>='$dt'
group by dt,tenant_id,project_id,app_id,platform
) group by dt,tenant_id,project_id,app_id,platform
) t4 on t1.dt=t4.dt and t1.tenant_id=t4.tenant_id and t1.project_id=t4.project_id and t1.app_id=t4.app_id and t1.platform=t4.platform
left join (
select distinct dt,tenant_id,project_id,app_id,platform,
is_first_time from dwd_dc.dwd_ck_events where dt>='$dt' and distinct_id is not null
) t5 on t1.dt=t5.dt and t1.tenant_id=t5.tenant_id and t1.project_id=t5.project_id and t1.app_id=t5.app_id and t1.platform=t5.platform
;
"
echo "${sql}"
$ck_login --query="${sql}"
sql1="OPTIMIZE TABLE dwd_dc.dwd_newolduser_summary_date FINAL SETTINGS optimize_skip_merged_partitions=1;"
echo "${sql1}"
$ck_login --query="${sql1}"
dwd_source_site_summary_bydate.sh
#!/bin/bash
# 来源网站统计表 dwd_source_site_summary_bydate
source /home/hadoop/shell_code/conf.sh
curdir=$(cd $(dirname $0);pwd)
cf=$curdir/$0
dt=`date -d "-1 days" +%Y-%m-%d `
[ $# -eq 1 ] && dt=$1
echo $ck_login
echo $curdir $cf $dt
sql="
insert into dwd_dc.dwd_source_site_summary_bydate
select t1.dt,
t1.tenant_id,
t1.project_id,
t1.app_id,
t1.platform,
t1.utm_source as source_site,
t1.pv,
t2.visit_num,
t1.uv,
t1.ip_num,
t3.visit_time,
t4.bounce_num,
multiIf(t5.is_first_time='true','是','否') as is_new_user,
t1.users,
toTimeZone(now(), 'Asia/Shanghai') as update_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
multiIf(utm_source is null, '空',utm_source = '', '空',utm_source = 'N/A', '空',utm_source) as utm_source,
count(distinct_id) as pv,
COUNTDistinct(distinct_id) as uv,
COUNTDistinct(ip) as ip_num,
COUNTDistinct(user_id) as users
from dwd_dc.dwd_ck_events where dt>='$dt'
group by dt,tenant_id,project_id,app_id,platform,utm_source
) t1
left join (
select
dt,
tenant_id,
project_id,
app_id,
platform,
utm_source,
sum(is_new_visit) as visit_num
from (
select dt,tenant_id,project_id,app_id,platform,utm_source,
distinct_id,
real_time,
if(
prev_real_time IS NULL OR
toUInt64(real_time) - toUInt64(prev_real_time) > 30 * 60,
1,
0
) as is_new_visit
from (
select dt,tenant_id,project_id,app_id,platform,utm_source,
distinct_id,
real_time,
any(real_time) OVER (PARTITION BY dt,distinct_id ORDER BY real_time Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS prev_real_time
from (
select dt,tenant_id,project_id,app_id,platform,
multiIf(utm_source is null, '空',utm_source = '', '空',utm_source = 'N/A', '空',utm_source) as utm_source,
distinct_id,
toDateTime(toInt64(upload_time) / 1000) AS real_time
from dwd_dc.dwd_ck_events where dt>='$dt'
)
)
where is_new_visit = 1
)
group by dt,tenant_id,project_id,app_id,platform,utm_source
) t2 on t1.dt=t2.dt and t1.tenant_id=t2.tenant_id and t1.project_id=t2.project_id and t1.app_id=t2.app_id
and t1.platform=t2.platform and t1.utm_source=t2.utm_source
left join (
select
a1.dt,
a1.tenant_id,
a1.project_id,
a1.app_id,
a1.platform,
a1.utm_source,
sum(a1.visit_time) as visit_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
session_id,
multiIf(utm_source is null, '空',utm_source = '', '空',utm_source = 'N/A', '空',utm_source) as utm_source,
max(toInt64(upload_time))-min(toInt64(upload_time)) as visit_time
from dwd_dc.dwd_ck_events where dt>='$dt' and session_id<>''
group by dt,tenant_id,project_id,app_id,platform,session_id,utm_source
) a1 group by a1.dt,a1.tenant_id,a1.project_id,a1.app_id,a1.platform,a1.utm_source
) t3 on t1.dt=t3.dt and t1.tenant_id=t3.tenant_id and t1.project_id=t3.project_id and t1.app_id=t3.app_id
and t1.platform=t3.platform and t1.utm_source=t3.utm_source
left join (
select dt,tenant_id,project_id,app_id,platform,utm_source,sum(multiIf(pv=1 ,1,0)) as bounce_num from (
select dt,tenant_id,project_id,app_id,platform,
multiIf(utm_source is null, '空',utm_source = '', '空',utm_source = 'N/A', '空',utm_source) as utm_source,
count(distinct_id) pv from dwd_dc.dwd_ck_events where dt>='$dt'
group by dt,tenant_id,project_id,app_id,platform,utm_source
) group by dt,tenant_id,project_id,app_id,platform,utm_source
) t4 on t1.dt=t4.dt and t1.tenant_id=t4.tenant_id and t1.project_id=t4.project_id and t1.app_id=t4.app_id and t1.platform=t4.platform
and t1.utm_source=t4.utm_source
left join (
select distinct dt,tenant_id,project_id,app_id,platform,
multiIf(utm_source is null, '空',utm_source = '', '空',utm_source = 'N/A', '空',utm_source) as utm_source,
is_first_time from dwd_dc.dwd_ck_events where dt>='$dt' and distinct_id is not null
) t5 on t1.dt=t5.dt and t1.tenant_id=t5.tenant_id and t1.project_id=t5.project_id and t1.app_id=t5.app_id and t1.platform=t5.platform
and t1.utm_source=t5.utm_source;
"
echo "${sql}"
$ck_login --query="${sql}"
sql1="OPTIMIZE TABLE dwd_dc.dwd_source_site_summary_bydate FINAL SETTINGS optimize_skip_merged_partitions=1;"
echo "${sql1}"
$ck_login --query="${sql1}"
dwd_user_visit_bydate.sh
#!/bin/bash
# 用户访问次数分布表 dwd_user_visit_bydate
source /home/hadoop/shell_code/conf.sh
curdir=$(cd $(dirname $0);pwd)
cf=$curdir/$0
dt=`date -d "-1 days" +%Y-%m-%d `
[ $# -eq 1 ] && dt=$1
echo $ck_login
echo $curdir $cf $dt
sql="
insert into dwd_dc.dwd_user_visit_bydate
select
dt,
tenant_id,
project_id,
app_id,
platform,
is_new_user,
countDistinct(if(visit = 1, distinct_id, null)) as v1_uv,
countDistinct(if(visit = 2, distinct_id, null)) as v2_uv,
countDistinct(if(visit = 3, distinct_id, null)) as v3_uv,
countDistinct(if(visit = 4, distinct_id, null)) as v4_uv,
countDistinct(if(visit = 5, distinct_id, null)) as v5_uv,
countDistinct(if(visit = 6, distinct_id, null)) as v6_uv,
countDistinct(if(visit = 7, distinct_id, null)) as v7_uv,
countDistinct(if(visit = 8, distinct_id, null)) as v8_uv,
countDistinct(if(visit = 9, distinct_id, null)) as v9_uv,
countDistinct(if(visit = 10, distinct_id, null)) as v10_uv,
countDistinct(if(visit >= 11
AND visit <= 15, distinct_id, null)) as v11_15_uv,
countDistinct(if(visit >= 16
AND visit <= 50, distinct_id, null)) as v16_50_uv,
countDistinct(if(visit >= 51
AND visit <= 100, distinct_id, null)) as v51_100_uv,
countDistinct(if(visit >= 101
AND visit <= 200, distinct_id, null)) as v101_200_uv,
countDistinct(if(visit >= 201
AND visit <= 300, distinct_id, null)) as v201_300_uv,
countDistinct(if(visit >= 301, distinct_id, null)) as v300_uv,
toTimeZone(now(), 'Asia/Shanghai') as update_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
distinct_id,
is_new_user,
sum(is_new_visit) as visit
from (
select dt,tenant_id,project_id,app_id,platform,is_new_user,
distinct_id,
real_time,
if(
prev_real_time IS NULL OR
toUInt64(real_time) - toUInt64(prev_real_time) > 30 * 60,
1,
0
) as is_new_visit
from (
select dt,tenant_id,project_id,app_id,platform,is_new_user,
distinct_id,
real_time,
any(real_time) OVER (PARTITION BY dt,distinct_id ORDER BY real_time Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS prev_real_time
from (
select dt,tenant_id,project_id,app_id,platform,
distinct_id,multiIf(is_first_time='true','是','否') as is_new_user,
toDateTime(toInt64(upload_time) / 1000) AS real_time
from dwd_dc.dwd_ck_events where dt>='$dt'
)
)
where is_new_visit = 1
)
group by dt,tenant_id,project_id,app_id,platform,distinct_id,is_new_user
) group by dt,tenant_id,project_id,app_id,platform,is_new_user;
"
echo "${sql}"
$ck_login --query="${sql}"
sql1="OPTIMIZE TABLE dwd_dc.dwd_user_visit_bydate FINAL SETTINGS optimize_skip_merged_partitions=1;"
echo "${sql1}"
$ck_login --query="${sql1}"
dwd_user_visitpage_bydate.sh
#!/bin/bash
# 用户 访问页数分布表 dwd_user_visitpage_bydate
source /home/hadoop/shell_code/conf.sh
curdir=$(cd $(dirname $0);pwd)
cf=$curdir/$0
dt=`date -d "-1 days" +%Y-%m-%d `
[ $# -eq 1 ] && dt=$1
echo $ck_login
echo $curdir $cf $dt
sql="
insert into dwd_dc.dwd_user_visitpage_bydate
select
dt,
tenant_id,
project_id,
app_id,
platform,
is_new_user,
countDistinct(if(pv = 1, distinct_id, null)) as pv1_uv,
countDistinct(if(pv >= 2 and pv <= 5, distinct_id, null)) as pv2_5_uv,
countDistinct(if(pv >= 6 and pv <= 10, distinct_id, null)) as pv6_10_uv,
countDistinct(if(pv >= 11 and pv <= 20, distinct_id, null)) as pv11_20_uv,
countDistinct(if(pv >= 21 and pv <= 30, distinct_id, null)) as pv21_30_uv,
countDistinct(if(pv >= 31 and pv <= 40, distinct_id, null)) as pv31_40_uv,
countDistinct(if(pv >= 41 and pv <= 50, distinct_id, null)) as pv41_50_uv,
countDistinct(if(pv >= 51 and pv <= 100, distinct_id, null)) as pv51_100_uv,
countDistinct(if(pv >= 101, distinct_id, null)) as pv100_uv,
toTimeZone(now(), 'Asia/Shanghai') as update_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
distinct_id,
multiIf(is_first_time='true','是','否') as is_new_user,
count(distinct_id) as pv
from dwd_dc.dwd_ck_events where dt>='$dt' and event_type in ('view_page','click','custom','business_event','wx_event')
group by dt,tenant_id,project_id,app_id,platform,distinct_id,multiIf(is_first_time='true','是','否')
) group by dt,tenant_id,project_id,app_id,platform,is_new_user;
"
echo "${sql}"
$ck_login --query="${sql}"
sql1="OPTIMIZE TABLE dwd_dc.dwd_user_visitpage_bydate FINAL SETTINGS optimize_skip_merged_partitions=1;"
echo "${sql1}"
$ck_login --query="${sql1}"
dwd_user_visittime_bydate.sh
#!/bin/bash
# 用户访问时长分布表 dwd_user_visittime_bydate
source /home/hadoop/shell_code/conf.sh
curdir=$(cd $(dirname $0);pwd)
cf=$curdir/$0
dt=`date -d "-1 days" +%Y-%m-%d `
[ $# -eq 1 ] && dt=$1
echo $ck_login
echo $curdir $cf $dt
sql="
insert into dwd_dc.dwd_user_visittime_bydate
select
dt,
tenant_id,
project_id,
app_id,
platform,
is_new_user,
countDistinct(if(visit_time < 10, distinct_id, NULL)) as vt0_10_uv,
countDistinct(if(visit_time >= 10 AND visit_time < 30, distinct_id, NULL)) as vt10_30_uv,
countDistinct(if(visit_time >= 30 AND visit_time < 60, distinct_id, NULL)) as vt30_60_uv,
countDistinct(if(visit_time >= 60 AND visit_time < 120, distinct_id, NULL)) as vt60_120_uv,
countDistinct(if(visit_time >= 120 AND visit_time < 180, distinct_id, NULL)) as vt120_180_uv,
countDistinct(if(visit_time >= 180 AND visit_time < 240, distinct_id, NULL)) as vt180_240_uv,
countDistinct(if(visit_time >= 240 AND visit_time < 300, distinct_id, NULL)) as vt240_300_uv,
countDistinct(if(visit_time >= 300 AND visit_time < 600, distinct_id, NULL)) as vt300_600_uv,
countDistinct(if(visit_time >= 600 AND visit_time < 1800, distinct_id, NULL)) as vt600_1800_uv,
countDistinct(if(visit_time >= 1800 AND visit_time < 3600, distinct_id, NULL)) as vt1800_3600_uv,
countDistinct(if(visit_time >= 3600, distinct_id, NULL)) as vt3600_uv,
toTimeZone(now(), 'Asia/Shanghai') as update_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
distinct_id,
multiIf(is_first_time='true','是','否') as is_new_user,
(max(toInt64(upload_time))-min(toInt64(upload_time)))/1000 as visit_time
from dwd_dc.dwd_ck_events where dt>='$dt' and session_id<>''
group by dt,tenant_id,project_id,app_id,platform,distinct_id,multiIf(is_first_time='true','是','否')
) group by dt,tenant_id,project_id,app_id,platform,is_new_user
"
echo "${sql}"
$ck_login --query="${sql}"
sql1="OPTIMIZE TABLE dwd_dc.dwd_user_visittime_bydate FINAL SETTINGS optimize_skip_merged_partitions=1;"
echo "${sql1}"
$ck_login --query="${sql1}"
dwd_visitor_life_bydate.sh
#!/bin/bash
# 新老访客生命周期统计 dwd_visitor_life_bydate
source /home/hadoop/shell_code/conf.sh
curdir=$(cd $(dirname $0);pwd)
cf=$curdir/$0
dt=`date -d "-1 days" +%Y-%m-%d `
[ $# -eq 1 ] && dt=$1
echo $ck_login
echo $curdir $cf $dt
sql="
insert into dwd_dc.dwd_visitor_life_bydate
select
t1.dt,
t1.tenant_id,
t1.project_id,
t1.app_id,
t1.platform,
t1.is_new_user,
t1.uv,
t2.uv as reflux_uv, --回流用户
t3.uv as silent_uv, --沉默用户
t4.uv as lost_uv, --流失用户
toTimeZone(now(), 'Asia/Shanghai') as update_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
multiIf(is_first_time='true','是','否') as is_new_user,
countdistinct(distinct_id) as uv
from dwd_dc.dwd_ck_events where dt>='$dt'
group by dt,tenant_id,project_id,app_id,platform,multiIf(is_first_time='true','是','否')
) t1
left join (
--回流用户,老访客中,当天访问但前一天未访问的独立访客数。
select a1.dt,a1.tenant_id,a1.project_id,a1.app_id,a1.platform,countdistinct(a1.distinct_id) as uv
from (select distinct dt,tenant_id,project_id,app_id,platform,distinct_id from dwd_dc.dwd_ck_events where dt>='$dt' and is_first_time<>'true') a1
left join (
select distinct dt,tenant_id,project_id,app_id,platform,distinct_id from dwd_dc.dwd_ck_events where dt>='$dt'
) a2 on toDate(a1.dt)-1=toDate(a2.dt) and a1.tenant_id=a2.tenant_id and a1.project_id=a2.project_id and a1.app_id=a2.app_id
and a1.platform=a2.platform and a1.distinct_id=a2.distinct_id
where ifnull(a2.distinct_id,'')=''
group by a1.dt,a1.tenant_id,a1.project_id,a1.app_id,a1.platform
) t2 on t1.dt=t2.dt and t1.tenant_id=t2.tenant_id and t1.project_id=t2.project_id and t1.app_id=t2.app_id
and t1.platform=t2.platform and t1.is_new_user='否'
left join (
--沉默用户 老访客中,当天未访问但前一天有访问的独立访客数。
select a1.dt,a1.tenant_id,a1.project_id,a1.app_id,a1.platform,countdistinct(a1.distinct_id) as uv
from (select distinct dt,tenant_id,project_id,app_id,platform,distinct_id from dwd_dc.dwd_ck_events where dt>='$dt' and is_first_time<>'true') a1
left join (
select distinct dt,tenant_id,project_id,app_id,platform,distinct_id from dwd_dc.dwd_ck_events where dt>='$dt' and is_first_time<>'true'
) a2 on toDate(a1.dt)+1=toDate(a2.dt) and a1.tenant_id=a2.tenant_id and a1.project_id=a2.project_id and a1.app_id=a2.app_id
and a1.platform=a2.platform and a1.distinct_id=a2.distinct_id
where ifnull(a2.distinct_id,'')=''
group by a1.dt,a1.tenant_id,a1.project_id,a1.app_id,a1.platform
) t3 on t1.dt=t3.dt and t1.tenant_id=t3.tenant_id and t1.project_id=t3.project_id and t1.app_id=t3.app_id
and t1.platform=t3.platform and t1.is_new_user='否'
left join (
--流失用户 访客中距离上次访问超过一天的。
select dt,tenant_id,project_id,app_id,platform,is_new_user,countdistinct(distinct_id) as uv from (
select dt,tenant_id,project_id,app_id,platform,distinct_id,multiIf(is_first_time='true','是','否') as is_new_user,
any(dt) OVER (PARTITION BY distinct_id ORDER BY dt Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS prev_real_dt
from dwd_dc.dwd_ck_events where dt>='$dt'
) where toDate(dt)-toDate(prev_real_dt)>1 group by dt,tenant_id,project_id,app_id,platform,is_new_user
) t4 on t1.dt=t4.dt and t1.tenant_id=t4.tenant_id and t1.project_id=t4.project_id and t1.app_id=t4.app_id
and t1.platform=t4.platform and t1.is_new_user=t4.is_new_user
;
"
echo "${sql}"
$ck_login --query="${sql}"
sql1="OPTIMIZE TABLE dwd_dc.dwd_visitor_life_bydate FINAL SETTINGS optimize_skip_merged_partitions=1;"
echo "${sql1}"
$ck_login --query="${sql1}"
dwd_visituri_summary_bydate.sh
#!/bin/bash
# 受访页面统计表 dwd_visituri_summary_bydate
source /home/hadoop/shell_code/conf.sh
curdir=$(cd $(dirname $0);pwd)
cf=$curdir/$0
dt=`date -d "-1 days" +%Y-%m-%d `
[ $# -eq 1 ] && dt=$1
echo $ck_login
echo $curdir $cf $dt
sql="
insert into dwd_dc.dwd_visituri_summary_bydate
select t1.dt,
t1.tenant_id,
t1.project_id,
t1.app_id,
t1.platform,
t1.visit_title,
t1.visit_url,
t1.pv,
t2.visit_num,
t1.uv,
t1.ip_num,
t3.visit_time,
t4.bounce_num,
t6.c_d_pv,
multiIf(t5.is_first_time='true','是','否') as is_new_user,
t1.users,
toTimeZone(now(), 'Asia/Shanghai') as update_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
multiIf(current_page_name is null, '空',current_page_name = '', '空',current_page_name = 'N/A', '空',current_page_name) as visit_title,
multiIf(current_page_url is null, '空',current_page_url = '', '空',current_page_url = 'N/A', '空',current_page_url) as visit_url,
count(distinct_id) as pv,
COUNTDistinct(distinct_id) as uv,
COUNTDistinct(ip) as ip_num,
COUNTDistinct(user_id) as users,
toTimeZone(now(), 'Asia/Shanghai') as update_time
from dwd_dc.dwd_ck_events where dt>='$dt'
group by dt,tenant_id,project_id,app_id,platform,visit_title,visit_url
) t1
left join (
select
dt,
tenant_id,
project_id,
app_id,
platform,
visit_title,
visit_url,
sum(is_new_visit) as visit_num
from (
select dt,tenant_id,project_id,app_id,platform,visit_title,visit_url,
distinct_id,
real_time,
if(
prev_real_time IS NULL OR
toUInt64(real_time) - toUInt64(prev_real_time) > 30 * 60,
1,
0
) as is_new_visit
from (
select dt,tenant_id,project_id,app_id,platform,visit_title,visit_url,
distinct_id,
real_time,
any(real_time) OVER (PARTITION BY dt,distinct_id ORDER BY real_time Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS prev_real_time
from (
select dt,tenant_id,project_id,app_id,platform,
multiIf(current_page_name is null, '空',current_page_name = '', '空',current_page_name = 'N/A', '空',current_page_name) as visit_title,
multiIf(current_page_url is null, '空',current_page_url = '', '空',current_page_url = 'N/A', '空',current_page_url) as visit_url,
distinct_id,
toDateTime(toInt64(upload_time) / 1000) AS real_time
from dwd_dc.dwd_ck_events where dt>='$dt'
)
)
where is_new_visit = 1
)
group by dt,tenant_id,project_id,app_id,platform,visit_title,visit_url
) t2 on t1.dt=t2.dt and t1.tenant_id=t2.tenant_id and t1.project_id=t2.project_id and t1.app_id=t2.app_id
and t1.platform=t2.platform and t1.visit_title=t2.visit_title and t1.visit_url=t2.visit_url
left join (
select
a1.dt,
a1.tenant_id,
a1.project_id,
a1.app_id,
a1.platform,
a1.visit_title,
a1.visit_url,
sum(a1.visit_time) as visit_time
from (
select
dt,
tenant_id,
project_id,
app_id,
platform,
session_id,
multiIf(current_page_name is null, '空',current_page_name = '', '空',current_page_name = 'N/A', '空',current_page_name) as visit_title,
multiIf(current_page_url is null, '空',current_page_url = '', '空',current_page_url = 'N/A', '空',current_page_url) as visit_url,
max(toInt64(upload_time))-min(toInt64(upload_time)) as visit_time
from dwd_dc.dwd_ck_events where dt>='$dt' and session_id<>''
group by dt,tenant_id,project_id,app_id,platform,session_id,visit_title,visit_url
) a1 group by a1.dt,a1.tenant_id,a1.project_id,a1.app_id,a1.platform,a1.visit_title,a1.visit_url
) t3 on t1.dt=t3.dt and t1.tenant_id=t3.tenant_id and t1.project_id=t3.project_id and t1.app_id=t3.app_id
and t1.platform=t3.platform and t1.visit_title=t3.visit_title and t1.visit_url=t3.visit_url
left join (
select dt,tenant_id,project_id,app_id,platform,visit_title,visit_url,sum(multiIf(pv=1 ,1,0)) as bounce_num from (
select dt,tenant_id,project_id,app_id,platform,
multiIf(current_page_name is null, '空',current_page_name = '', '空',current_page_name = 'N/A', '空',current_page_name) as visit_title,
multiIf(current_page_url is null, '空',current_page_url = '', '空',current_page_url = 'N/A', '空',current_page_url) as visit_url,
count(distinct_id) pv from dwd_dc.dwd_ck_events where dt>='$dt'
group by dt,tenant_id,project_id,app_id,platform,visit_title,visit_url
) group by dt,tenant_id,project_id,app_id,platform,visit_title,visit_url
) t4 on t1.dt=t4.dt and t1.tenant_id=t4.tenant_id and t1.project_id=t4.project_id and t1.app_id=t4.app_id and t1.platform=t4.platform
and t1.visit_title=t4.visit_title and t1.visit_url=t4.visit_url
left join (
select distinct dt,tenant_id,project_id,app_id,platform,
multiIf(current_page_name is null, '空',current_page_name = '', '空',current_page_name = 'N/A', '空',current_page_name) as visit_title,
multiIf(current_page_url is null, '空',current_page_url = '', '空',current_page_url = 'N/A', '空',current_page_url) as visit_url,
is_first_time from dwd_dc.dwd_ck_events where dt>='$dt' and distinct_id is not null
) t5 on t1.dt=t5.dt and t1.tenant_id=t5.tenant_id and t1.project_id=t5.project_id and t1.app_id=t5.app_id and t1.platform=t5.platform
and t1.visit_title=t5.visit_title and t1.visit_url=t5.visit_url
left join (
select dt,tenant_id,project_id,app_id,platform,
multiIf(referrer_url is null, '空',referrer_url = '', '空',referrer_url = 'N/A', '空',referrer_url) as referrer_url,
count(distinct_id) as c_d_pv from dwd_dc.dwd_ck_events where dt>='$dt' group by dt,tenant_id,project_id,app_id,platform,referrer_url
) t6 on t1.dt=t6.dt and t1.tenant_id=t6.tenant_id and t1.project_id=t6.project_id and t1.app_id=t6.app_id and t1.platform=t6.platform
and t1.visit_url=t6.referrer_url;
"
echo "${sql}"
$ck_login --query="${sql}"
sql1="OPTIMIZE TABLE dwd_dc.dwd_visituri_summary_bydate FINAL SETTINGS optimize_skip_merged_partitions=1;"
echo "${sql1}"
$ck_login --query="${sql1}"
4、edt-evemt-process-data SQL相关脚本
说明:
创建clickhouse表脚本
创建hive表脚本
sql脚本_clickhouse.sql
CREATE DATABASE IF NOT EXISTS ods_dc;
CREATE DATABASE IF NOT EXISTS dwd_dc;
CREATE TABLE ods_dc.ods_event_pipeline_realtime
(
`apexId` Nullable(String),
`appId` Nullable(String),
`appVersion` Nullable(String),
`content` Nullable(String),
`createTime` DateTime,
`ts` DateTime,
`error` Nullable(String),
`eventCode` Nullable(String),
`eventType` Nullable(String),
`projectId` Nullable(String),
`sendType` Nullable(String),
`wechatOpenId` Nullable(String),
`wechatUnionId` Nullable(String),
`eventName` Nullable(String)
) ENGINE = MergeTree
PARTITION BY ts
ORDER BY (ts,createTime)
COMMENT '事件中心实时查询表';
drop table IF EXISTS dwd_dc.dwd_ck_events;
CREATE TABLE IF NOT EXISTS dwd_dc.dwd_ck_events(
dt String,
stat_hour String,
distinct_id String,
ts Nullable(String),
apex_id Nullable(String),
event_code Nullable(String),
tenant_id Nullable(String),
project_id Nullable(String),
app_id Nullable(String),
platform Nullable(String),
message_id Nullable(String),
user_id Nullable(String),
event_name Nullable(String),
event_type Nullable(String),
send_type Nullable(String),
event_code_md5 Nullable(String),
event_duration Nullable(String),
browse_time Nullable(String),
device_id Nullable(String),
device_model Nullable(String),
device_brand Nullable(String),
device_type Nullable(String),
device_uuid Nullable(String),
carriers Nullable(String),
network_type Nullable(String),
android_id Nullable(String),
imei Nullable(String),
mac Nullable(String),
user_agent Nullable(String),
os Nullable(String),
idfa Nullable(String),
idfv Nullable(String),
oaid Nullable(String),
latitude Nullable(String),
longitude Nullable(String),
wechat_open_id Nullable(String),
wechat_union_id Nullable(String),
screen_width Nullable(String),
screen_height Nullable(String),
page_height Nullable(String),
page_width Nullable(String),
browser_version Nullable(String),
session_id Nullable(String),
sdk_version Nullable(String),
sdk_type Nullable(String),
app_version Nullable(String),
app_package_id Nullable(String),
mini_program_scene Nullable(String),
market_name Nullable(String),
referrer_url Nullable(String),
referrer_host Nullable(String),
utm_matching_type Nullable(String),
utm_source Nullable(String),
utm_medium Nullable(String),
utm_term Nullable(String),
utm_content Nullable(String),
utm_campaign Nullable(String),
ip Nullable(String),
province Nullable(String),
city Nullable(String),
county Nullable(String),
country Nullable(String),
page_type Nullable(String),
current_page_id Nullable(String),
current_page_name Nullable(String),
current_page_url Nullable(String),
time_since_last_operation Nullable(String),
current_page_host Nullable(String),
current_page_query Nullable(String),
current_page_path Nullable(String),
source_page_id Nullable(String),
source_page_name Nullable(String),
source_page_url Nullable(String),
source_page_host Nullable(String),
source_page_query Nullable(String),
source_page_path Nullable(String),
page_X Nullable(String),
page_Y Nullable(String),
client_X Nullable(String),
client_Y Nullable(String),
element_id Nullable(String),
element_name Nullable(String),
element_target_url Nullable(String),
element_type Nullable(String),
element_content Nullable(String),
element_selector Nullable(String),
element_position Nullable(String),
element_index Nullable(String),
element_tag_name Nullable(String),
element_class Nullable(String),
element_path Nullable(String),
upload_time Nullable(String),
write_log_time Nullable(String),
insert_db_time Nullable(String),
track_signup_original_id Nullable(String),
last_visit_time Nullable(String),
is_first_time Nullable(String),
forward_user_id Nullable(String),
common Nullable(String),
extensions Nullable(String)
) ENGINE = MergeTree
PARTITION BY dt
ORDER BY distinct_id
COMMENT '事件数据全表';
--事件属性信息
drop table IF EXISTS dwd_dc.dwd_event_attribute_info;
CREATE TABLE IF NOT EXISTS dwd_dc.dwd_event_attribute_info (
option_name String COMMENT '属性key',
option_value String COMMENT '属性value',
update_time DateTime COMMENT '更新时间'
) engine = ReplacingMergeTree()
order by (option_name,option_value);
-- 流量概览表按天
drop table IF EXISTS dwd_dc.dwd_flow_summary_bydate;
CREATE TABLE IF NOT EXISTS dwd_dc.dwd_flow_summary_bydate (
dt String COMMENT '日期',
tenant_id String COMMENT '租户id',
project_id String COMMENT '项目id',
app_id String COMMENT '产品id',
platform String COMMENT '上报平台',
is_new_user String COMMENT '是否新客',
is_user_login String COMMENT '会员是否登录',
pv Int32 DEFAULT 0 COMMENT '浏览量 PV',
visit_num Int32 DEFAULT 0 COMMENT '访问次数',
uv Int32 DEFAULT 0 COMMENT '访客数 UV',
ip_num Int32 DEFAULT 0 COMMENT 'IP数',
visit_time Int64 DEFAULT 0 COMMENT '访问时长 ms',
bounce_num Int32 DEFAULT 0 COMMENT '跳出数',
users Int32 DEFAULT 0 COMMENT '会员数',
update_time DateTime COMMENT '更新时间'
) engine = ReplacingMergeTree()
PARTITION BY dt
order by (dt,tenant_id,project_id,app_id,platform,is_new_user,is_user_login);
-- 流量概览表按小时
drop table IF EXISTS dwd_dc.dwd_flow_summary_byhour;
CREATE TABLE IF NOT EXISTS dwd_dc.dwd_flow_summary_byhour (
dt String COMMENT '日期',
stat_hour String COMMENT '小时',
tenant_id String COMMENT '租户id',
project_id String COMMENT '项目id',
app_id String COMMENT '产品id',
platform String COMMENT '上报平台',
is_new_user String COMMENT '是否新客',
is_user_login String COMMENT '会员是否登录',
pv Int32 DEFAULT 0 COMMENT '浏览量 PV',
visit_num Int32 DEFAULT 0 COMMENT '访问次数',
uv Int32 DEFAULT 0 COMMENT '访客数 UV',
ip_num Int32 DEFAULT 0 COMMENT 'IP数',
visit_time Int64 DEFAULT 0 COMMENT '访问时长 ms',
bounce_num Int32 DEFAULT 0 COMMENT '跳出数',
users Int32 DEFAULT 0 COMMENT '会员数',
update_time DateTime COMMENT '更新时间'
) engine = ReplacingMergeTree()
PARTITION BY dt
order by (dt,stat_hour,tenant_id,project_id,app_id,platform,is_new_user,is_user_login);
--新老访客统计表
drop table IF EXISTS dwd_dc.dwd_newolduser_summary_date;
CREATE TABLE IF NOT EXISTS dwd_dc.dwd_newolduser_summary_date (
dt String COMMENT '日期',
tenant_id String COMMENT '租户id',
project_id String COMMENT '项目id',
app_id String COMMENT '产品id',
platform String COMMENT '上报平台',
pv Int32 DEFAULT 0 COMMENT '浏览量 PV',
visit_num Int32 DEFAULT 0 COMMENT '访问次数',
uv Int32 DEFAULT 0 COMMENT '访客数 UV',
ip_num Int32 DEFAULT 0 COMMENT 'IP数',
visit_time Int64 DEFAULT 0 COMMENT '访问时长 ms',
bounce_num Int32 DEFAULT 0 COMMENT '跳出数',
is_new_user String COMMENT '是否新客',
users Int32 DEFAULT 0 COMMENT '会员数',
update_time DateTime COMMENT '更新时间'
) engine = ReplacingMergeTree()
PARTITION BY dt
order by (dt,tenant_id,project_id,app_id,platform,is_new_user);
-- 受访页面统计表
drop table IF EXISTS dwd_dc.dwd_visituri_summary_bydate;
CREATE TABLE IF NOT EXISTS dwd_dc.dwd_visituri_summary_bydate (
dt String COMMENT '日期',
tenant_id String COMMENT '租户id',
project_id String COMMENT '项目id',
app_id String COMMENT '产品id',
platform String COMMENT '上报平台',
visit_title String COMMENT '受访页面标题',
visit_url String COMMENT '受访页面url',
pv Int32 DEFAULT 0 COMMENT 'PV',
visit_num Int32 DEFAULT 0 COMMENT '访问次数',
uv Int32 DEFAULT 0 COMMENT '访客数 UV',
ip_num Int32 DEFAULT 0 COMMENT 'IP数',
visit_time Int64 DEFAULT 0 COMMENT '访问时长 ms',
bounce_num Int32 DEFAULT 0 COMMENT '跳出数',
c_d_pv Int32 DEFAULT 0 COMMENT '贡献下游浏览量',
is_new_user String COMMENT '是否新客',
users Int32 DEFAULT 0 COMMENT '会员数',
update_time DateTime COMMENT '更新时间'
) engine = ReplacingMergeTree()
PARTITION BY dt
order by (dt,tenant_id,project_id,app_id,platform,visit_title,visit_url,is_new_user);
-- 来源网站统计表
drop table IF EXISTS dwd_dc.dwd_source_site_summary_bydate;
CREATE TABLE IF NOT EXISTS dwd_dc.dwd_source_site_summary_bydate (
dt String COMMENT '日期',
tenant_id String COMMENT '租户id',
project_id String COMMENT '项目id',
app_id String COMMENT '产品id',
platform String COMMENT '上报平台',
source_site String COMMENT '来源网站',
pv Int32 DEFAULT 0 COMMENT 'PV',
visit_num Int32 DEFAULT 0 COMMENT '访问次数',
uv Int32 DEFAULT 0 COMMENT '访客数 UV',
ip_num Int32 DEFAULT 0 COMMENT 'IP数',
visit_time Int64 DEFAULT 0 COMMENT '访问时长 ms',
bounce_num Int32 DEFAULT 0 COMMENT '跳出数',
is_new_user String COMMENT '是否新客',
users Int32 DEFAULT 0 COMMENT '会员数',
update_time DateTime COMMENT '更新时间'
) engine = ReplacingMergeTree()
PARTITION BY dt
order by (dt,tenant_id,project_id,app_id,platform,source_site,is_new_user);
-- 地域统计表
drop table IF EXISTS dwd_dc.dwd_area_summary_bydate;
CREATE TABLE IF NOT EXISTS dwd_dc.dwd_area_summary_bydate (
dt String COMMENT '日期',
tenant_id String COMMENT '租户id',
project_id String COMMENT '项目id',
app_id String COMMENT '产品id',
platform String COMMENT '上报平台',
country String COMMENT '国家',
province String COMMENT '省份',
pv Int32 DEFAULT 0 COMMENT 'PV',
visit_num Int32 DEFAULT 0 COMMENT '访问次数',
uv Int32 DEFAULT 0 COMMENT '访客数 UV',
ip_num Int32 DEFAULT 0 COMMENT 'IP数',
visit_time Int64 DEFAULT 0 COMMENT '访问时长 ms',
bounce_num Int32 DEFAULT 0 COMMENT '跳出数',
is_new_user String COMMENT '是否新客',
users Int32 DEFAULT 0 COMMENT '会员数',
update_time DateTime COMMENT '更新时间'
) engine = ReplacingMergeTree()
PARTITION BY dt
order by (dt,tenant_id,project_id,app_id,platform,country,province,is_new_user);
-- 设备统计表
drop table IF EXISTS dwd_dc.dwd_device_summary_bydate;
CREATE TABLE IF NOT EXISTS dwd_dc.dwd_device_summary_bydate (
dt String COMMENT '日期',
tenant_id String COMMENT '租户id',
project_id String COMMENT '项目id',
app_id String COMMENT '产品id',
platform String COMMENT '上报平台',
device_model String COMMENT '设备机型',
device_brand String COMMENT '设备品牌',
pv Int32 DEFAULT 0 COMMENT 'PV',
visit_num Int32 DEFAULT 0 COMMENT '访问次数',
uv Int32 DEFAULT 0 COMMENT '访客数 UV',
ip_num Int32 DEFAULT 0 COMMENT 'IP数',
visit_time Int64 DEFAULT 0 COMMENT '访问时长 ms',
bounce_num Int32 DEFAULT 0 COMMENT '跳出数',
is_new_user String COMMENT '是否新客',
users Int32 DEFAULT 0 COMMENT '会员数',
update_time DateTime COMMENT '更新时间'
) engine = ReplacingMergeTree()
PARTITION BY dt
order by (dt,tenant_id,project_id,app_id,platform,device_model,device_brand,is_new_user);
-- 用户访问次数分布表
drop table IF EXISTS dwd_dc.dwd_user_visit_bydate;
CREATE TABLE IF NOT EXISTS dwd_dc.dwd_user_visit_bydate (
dt String COMMENT '日期',
tenant_id String COMMENT '租户id',
project_id String COMMENT '项目id',
app_id String COMMENT '产品id',
platform String COMMENT '上报平台',
is_new_user String COMMENT '是否新客',
v1_uv Int32 DEFAULT 0 COMMENT '访问次数为1的uv',
v2_uv Int32 DEFAULT 0 COMMENT '访问次数为2的uv',
v3_uv Int32 DEFAULT 0 COMMENT '访问次数为3的uv',
v4_uv Int32 DEFAULT 0 COMMENT '访问次数为4的uv',
v5_uv Int32 DEFAULT 0 COMMENT '访问次数为5的uv',
v6_uv Int32 DEFAULT 0 COMMENT '访问次数为6的uv',
v7_uv Int32 DEFAULT 0 COMMENT '访问次数为7的uv',
v8_uv Int32 DEFAULT 0 COMMENT '访问次数为8的uv',
v9_uv Int32 DEFAULT 0 COMMENT '访问次数为9的uv',
v10_uv Int32 DEFAULT 0 COMMENT '访问次数为10的uv',
v11_15_uv Int32 DEFAULT 0 COMMENT '访问次数为11到15的uv',
v16_50_uv Int32 DEFAULT 0 COMMENT '访问次数为16到50的uv',
v51_100_uv Int32 DEFAULT 0 COMMENT '访问次数为51搭配100的uv',
v101_200_uv Int32 DEFAULT 0 COMMENT '访问次数为101到200的uv',
v201_300_uv Int32 DEFAULT 0 COMMENT '访问次数为201到300的uv',
v300_uv Int32 DEFAULT 0 COMMENT '访问次数为300以上的uv',
update_time DateTime COMMENT '更新时间'
) engine = ReplacingMergeTree()
PARTITION BY dt
order by (dt,tenant_id,project_id,app_id,platform,is_new_user);
-- 用户访问页数分布表
drop table IF EXISTS dwd_dc.dwd_user_visitpage_bydate;
CREATE TABLE IF NOT EXISTS dwd_dc.dwd_user_visitpage_bydate (
dt String COMMENT '日期',
tenant_id String COMMENT '租户id',
project_id String COMMENT '项目id',
app_id String COMMENT '产品id',
platform String COMMENT '上报平台',
is_new_user String COMMENT '是否新客',
pv1_uv Int32 DEFAULT 0 COMMENT '访问页数为1到1',
pv2_5_uv Int32 DEFAULT 0 COMMENT '访问页数为2到5',
pv6_10_uv Int32 DEFAULT 0 COMMENT '访问页数为6到10',
pv11_20_uv Int32 DEFAULT 0 COMMENT '访问页数为11到20',
pv21_30_uv Int32 DEFAULT 0 COMMENT '访问页数为21到30',
pv31_40_uv Int32 DEFAULT 0 COMMENT '访问页数为31到40',
pv41_50_uv Int32 DEFAULT 0 COMMENT '访问页数为41到50',
pv51_100_uv Int32 DEFAULT 0 COMMENT '访问页数为51到100',
pv100_uv Int32 DEFAULT 0 COMMENT '访问页数为100以上',
update_time DateTime COMMENT '