From 5b1e03f8097f08a7ad4a0bd650a6936b253649a8 Mon Sep 17 00:00:00 2001 From: yueliuli <1628111725@qq.com> Date: Wed, 13 May 2026 09:43:29 +0800 Subject: [PATCH] =?UTF-8?q?=E9=87=8D=E6=9E=84=EF=BC=9A=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E9=A1=B9=E7=9B=AE=E7=BB=93=E6=9E=84=EF=BC=8C=E4=BB=A5=E9=80=82?= =?UTF-8?q?=E9=85=8D=E6=9B=B4=E5=A4=9A=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .coverage | Bin 0 -> 69632 bytes asr_service.py => app/asr/asr_service.py | 0 main.py => app/asr/core.py | 22 +- .../asr/diarization_service.py | 0 map_speaker.py => app/asr/map_speaker.py | 0 app/asr/routes.py | 70 ++++ app/settings.py | 10 + lib/convert.py => app/transcode/core.py | 23 +- app/transcode/routes.py | 40 +++ app/utils.py | 30 ++ lib/{ => caddy}/caddy_windows_amd64.exe | Bin lib/caddy/run.py | 8 + server.py | 331 ++---------------- 13 files changed, 210 insertions(+), 324 deletions(-) create mode 100644 .coverage rename asr_service.py => app/asr/asr_service.py (100%) rename main.py => app/asr/core.py (97%) rename diarization_service.py => app/asr/diarization_service.py (100%) rename map_speaker.py => app/asr/map_speaker.py (100%) create mode 100644 app/asr/routes.py create mode 100644 app/settings.py rename lib/convert.py => app/transcode/core.py (60%) create mode 100644 app/transcode/routes.py create mode 100644 app/utils.py rename lib/{ => caddy}/caddy_windows_amd64.exe (100%) create mode 100644 lib/caddy/run.py diff --git a/.coverage b/.coverage new file mode 100644 index 0000000000000000000000000000000000000000..dce66a015d9225e243a47ce625635e4d721e28e8 GIT binary patch literal 69632 zcmeI537izg+5hY5>FKVj>7Hg;7Fd>r;aFhhTu?v+<-QS+!wZFFcYp+LLbK1p2b3}&+u6?QMy2RB52kp&_PL>7oF@SU+hX1Zgy z$>;llq^g2;3@p;-8(fRRa3n(wJcqe zT2QexovNrRuP7_6tEgI-s#}!Sd#|WXmy0nA%W%{ZJNC&QFKBK>IUZG)UWipJtEs3g zty!H~l3v{_yMy|)y7a0#S%8(KD;8FXJyJc?;l-(%^n!Fvx~eQ)E6<`wMR_qV4$k3r zo7`O3kj}QLw5AOINgUQ1EI8DZ2@9~Ivg(?0arX0TN~_8irE7boO2ZbGRpWWCs*|m) zsjlo>SDji~QI&0GRc%FGMRipwy((R{qAp$jtqtrYjHdt4cuiY_bBu7;TUV_b z`!8RcZ@HF-+I!gL)Fu~KR35dWs$%(yba?T5r4Ak0E8C2!>Ru_)2)qM|5>}gIbAjMp znO;@9d?|i)np?V}t~#sd;>Dj^g8y;qbEw^>V@KGqJbR-E^UR%JQCAxRJM5+ji;r;Y zi0wzdZ^Tn__)Q|7qNPhy)eEwZj5s6+4o={{=yo~ZzQ~RN|@DV zOvfYLI^iuaw}{m3minK4*e#^5}?nwSoC#jD( z)Jy7TL~X(5^tP=wBMOex9JSxv5(PA4n|7PLJjlo=Li9`G-}qsT5G98mfo}{1|N7}K zE3I3U`ulSi#dzY~<5rvF@{V-k;uPkVi2qvkEsX3bpC8LwzY;rr@o(&~Qx+wMPI*## zb?>uEmtt?SthAyAA6e{!K?&ZYCKSJY+iUm>%%rx<$YhC(J-CQ&ekMM2`uRertSqf6SL2l!hGnhT_lt*9e2(ly@W51ed?;)2 z5&cG;x>B`O^jNz7KFiQ1qSngNa{0K6i_)aVm5b7USDhSnbYKDX~lTWU@Eb z%F^0YSxp)@s&3N3R`%u}M;_k73bnl2uJhw#rS9r-(e3i>|1iMYo zo{(9go?zKI&8@|60+pq@tx6h)-C5t*c<3gZioG;`MQVC;34=GWp^)+uM2{#h`+R9S zy_foe#EV@#OYpuV_)qi~Ss=1NWP!*6kp&_PL>7oF5LqCyKxBc)0+9tG3w-x15Ca2j z;r|=nTg3Ya{}cU17KkhmSs=1NWP!*6kp&_PL>7oF5LqCyKxBc)0{@W~aEw@?r_(}8 zT0pEIU*!M{>Qi!bpMD%$Wq9us?|tv{|HuYLCmmTJvOr{k$O4fCA`3(oh%69UAhJMY zfye@p1tJTC7I0#PpnnE1c&vb_Ujan?Kbg3Wcn^9NUVieq_c`}$x5#rOL)vEEOYf5X%Csfrg!wn-Q%o$Z(o}N>>qP(KI{~SCFzv$!3)Kgrn zsbe#0sw`bL7eAqNNw&JN4px`btm-PN@U&(nu%5onsHeQ5w5DPWP7qUFHCHsYqAV?1 z%bVHS(%PCP)y-sBUGHX`WnH?qZf?4NdM?gCQ&n4r7YI*sD#cp6G^5s~74zqmm6n&U zo>Q|z7;~oG%+oAgwroy#hI4S{8gZJlEv&Vq8K-%8tu;9PLAn;}ofgM>i<((4UZ*)> zjgw5Qv7nhXuBfY6Do*dj7*^J)nU&S1>%`PWqP{5x*4LZ|P(I3Y%c_^H#_GlbR@a;d zP~4JOkBI*#6W5dE*Ix$atLaN>i+_1-En3nr+L+7{eNe+nd#e{ zdcNKNPc*l+hx`A0mYM0@oU{B^|DR1WGhLcf>o@&>+T7DT+*4QnzkRZqDQV7W9$stc zty|O0Oi^>|J7oF5LqCyKxBc)0+9tG3q%%(EbyJSKn!BU74d)Yz9RTf^cPtmvOr{k z$O4fCA`3(oh%69UAhJMYfye@p1tJT42Q3hbxlF|WNsI4b7oF5LqCyKxBc)0-*&&{6F&lLvutBSs=1NWP!*6kp&_PL>7oF5LqCyKxBc) z0+9v2lNN|$tH3)b+Lq%T^#0)O@}Bl~c=vl-y_>zw-eulK?_6)Cx6~{3PVuIAqrGFj zeqML4gO}$y9wfg=?n}O#d@cE6@|onL$@`LbCT~n$le{RoKDjnon_QeMO`e>bk{p>F zoa~+KoNSYHlHh*fe(1jC{?7fCyTkpNdxv|Sd%1hQyUwj~7rJxY>F!wfIJci$Dp8r3pEx-&IWZzJFwrxSO5`OtZ{(lx5BMAWxBMyo zFu#Z2#INQT@(e$VFX8j}Y(9w(=g07)ct_rfGuFTku=m;P>_zqzdxYJ~Ze^RU<1JC#jkquCJFmvv?NEXiW@uXG=MhrU9eqmR-1>0R^&dL_Mpoikdf9r``h|6$b*FW`b%nLj`kqy5EwawAPO`>XL#-04n^j=7u;TH9@%{04 z<9~?%CcZQNQ2d_w&GAj~i{e3iRlG7jKYnt2a(qO5V7zBM70-)vv(fy_{J?y}e93&q zeAN7zd8c`Ud8K)Qd5&3UR+wj+GtBYkFtfke-E42>m{#myY=7+C*sHN$$DW8i7~2~A zQS9p2_hXsZS+OOtd9hisiLn!6N5_g|9b&mLYJ6>cYP@IcHg*|L8V?&kF>Wz_Xk4rw zF6s}D0|Q`FXHfqHiIXN$|9FX$w^M(Z#EFxrKUCpl>K`X@JMKF~;<%~QKUU(H$EZJ8 z;^;BdA0%aNBta$ZqoK!ATnUC?R%n-u}0fZX3MBSeK)%!H5;k#NHk2_ zPh|I`Sje-5cFC92XA*@)sl;Nms4cNO+Rl>L6`L5B*agjDO6=T)`Z0-}I#b_}nCe7* zkk~OreIl_#N18cDVqtfh`JTjr4m7h)V%q|mSt~LBOPV=bmFCmT8i}pj(#&d!d97(? zl|-~{W~Ib@m|SP zODyU}GgS)l#49CsEuxvF5<7RLnI#g@3Yo+6%tbiX=af`G)QKl#QXy^vp`}2 zVp?K5v_iSWw%GJCh3#l&zQi_Yg;I&RZD?km#8z0tTvghNX3mt@B9~^)ka(a4&CHRQ z#4B;SL^nw@r%6n>G;^v%o}igiBp&26bFxIM(ay|9WMH95XURg-qM4ZzV7g@&GEqTVjq#2DzVK_nmJKoYi!OGiTm*SPL`O9HB6G2gUy*J5ucLG z1c~^RWX4OxrzA5@qG;Jzi4L}GjKqXPGovL^Y~(13Hl>-75-po%Mo5fXc1Co(4C2^c z+3&{9IL*j@H!j*M``x%`Z?@lM;ue(Mu35OBW@NW(wlAa^+3lL`&d6G-L9E~rz5*v z)BB2MWVdU&XeQb1nhCsqvfDMqRhQka$?&SnZr7xYW@NW(3Ny)W*R;^2vfDMkLW{|6 z*EDnNjOcb5m}oKC>zdg0WU{?3!~Q5EJKY$=?XuI2i4Vwjx)2i|Ap6{yxDK+_PGXj)fw668p8at&ox@3 z`DLGL2t&(0*Jz1`mVK__wWJx*=K>fG8ZX=Fg7F33O`_MuQr?kfWVdTjG@k5s4GSBT z?RLQsca!LMad5%5%Wl^Y?UCKCA=)FmT|?YW*>0B^jVu$7GiDgD&zQn8rsPiy!ZI<* zAHydsV@Uo8K4uwE`@=^pL$chE3;WnPviP-zGuZbez6h_ebrPR}7ui~gKZj@7*%BXt zpR+X*x4|Q9wZwa18(SsuCaia*#9QGWc9z7O;a0Xn?{G7#%VNVBa1*PQrJEYgV>J>l zgH3F?#EoznTP87pjjUSY8a%8@;yMUerNpyg9b2k*IGZiWV#9f`hAoz*3lS?MRyAy5 zizF_FDz;GKQdrCuNL&I-Sz7O~gq2HNggcZ;TnLNU{46$Xf`zP9md-?+C-L-#yV+cc zC&TINOo^w$$?Ob?r@*Ofj^5!EcDlq_xWj1@XTmIYY8D&rhMDXXSvmsoWQpS&cCgtJ zN5gnFOX64<&1Om*17q0?y~7xGlEjg?!*q!wU?iKC#fBX)f=!jBeGpHScud2KY>LDZ zIEGD@H~>o6B#HfD0Gp_H=+7od?29{$m)HmTvT<2#coF)rv9dJP*oBRe*cwu7v_uD5 zvr!TO95ynGBTpRnU4zCqq%!_*Gs4K_53ja|s=>^NDv z6LE;dryGxB$4Y#XJk18{9iC)^ve==pHk$c(E60arKu>le{lWST3 zEH%t0chY_Wfn2dB_sGJumuPnc1B@w+cDHQNglTqB7KThmMhD0jCOX*Y+1`ZmLtntBZsw+$PCx^vYWk@ zb}|b|&vvtb-*}xYIIf+@Lc+0m7I?yDSzz3zS)k0evtU!(%7TRjS%_Oq_Mm7@X2q}! z$979}p1>Lr8AFe&g+JWD|AF_BaKAk7qr|=FM{jgKcUQajxt}CwCUw&dl>b?Et@nVgvHnH-)R zm`o*GC7HX|{n~xiecpWx{r&as7WY!O(w*Rzy0hG2ZVCGO`L64N^MP}v^SZOk+3M_c zwxg%N&M9`5J85TxbE-4NImXFJ{M?Br{+9S%VqfCz#50Lo64TMY|3RW6kx8sboRK&d zz53pXP6@`p=AZDr{CR!{e~jP9xA05(dcK-h@=`vFAIELphj-$wywAN?yfeJ%-f>=0 z@;&?&V@q;FauK_feZ`(-1@+J-P&dCw6p%zOmL= zW|WItO8zCR?Dqy%(jSU7_bY)OntS(y?wWh`hHjdRdqI)r9>vgAbN3$5Mf2|N&{=cQ z3D8M%*CI$c;&Y9xg9{z?h7Mh!gXTSjP^h`E1GLxNt`G_|w`~XQH0QO2e9f)$psnWI zR?tRs%Uo!!Ij1G$X>O4Ntu%WrAXjtJgO-}zB;;^BY2lT-&_WMZQp$vJLXNE;i!iGi4pSksMIP=na!kHTQU%e4@E~5Aw0*qHbh=$lb|D zn!6T}eVRLWC4bi3sWbUd^V^-spERcqk`FX@Op*6B=N};NX)ZWG{;0WK0okh=E%I*2 z?Z_U@ZQ7D|H0QP zs^){7{9d!wNM2DEw+!;KE;cRlJI%O`&P$q&82PQb5_sf`su|GG`DI`eySPok$W}o&m}+6oYP3| z(Tvjr{aCZ>k*%5&7P(t9-XnKuW}MuqnKE*RX0+h#nk|>yrui$I+^X5kA-5=Nng+R9 z7vuU#Hz})`jpRqF7~N}fqh|c18#Lo5U9TCh)peTjT5Zvc7wB5ec!4%+HX6wfH8;k{ zCdKGylWQ~=enGC*+^&#Zr5Uf#m6}`UlPfgimAG7U%RF+KX0Ii=RI}reOEiDskc&0* zj^rZEl#?H5wkY|&W^C7m%4&u|F3`o;#EqJG9moi0ajyjC;1<29PmA78FHfQ`#Eqeot? z8C~)!&FGU?YF-0pFI6nO@+G<)-SWkn(J!yijE?yt&FGmg)QqnA0?mtINm?^H=jEEw zJ1nCYOe|@@Q;jmBB<>;|b)r>CtiJH-8 zpQ0I^_Q{&jYoDaK1o}_ZjDGtB&FHv~S1dgDak?B`_pzGMcORn}o%hk2(R&}ISh(*a zHKYGNLNhw>!!@G^e}ZOo;g8q+I(cK5V&TLO)#d2LAEz1J_#v9nk3UxPPV(en#ln*x zq|4EjAE+69`C~MrGk>&Z^yUXB7VdoiL*+Y5xOiqkIQ0E=F?#fUMKKtFE`1+`cwlcP zN`+J3OVNTwJr$)F9HnS;x>(V)mwKq`(X;Qa3MWkKrfA57BK48Nx$mk<2M+0?OVPdW ztV(;OJ1OcrFr}z>-;R1~?+%Jy>Q$(ZMkl|$X7ut4)E))B+9~R=H(ycv4s8|fEoh_n zYu{QQjlO=KD$OfsrAyJ<&sC)@^IGas^!IZ#qr=}q?cufb6eYc+A~)$Oa$HAI!bvFN z39g7?5Ly}@e@Iq@p1&f?wiLy!xc+2xBut9O;MI!9sD=Jm(#0?G_`3yb>`h*UbEEhF zwU-+GKe9k%fye@p1tJSX7KkhmSs=1NWP!*6kp&_Pe0ME?@Bc%aSTW(_`KyUBQT+e^ zKK@U)(i>HV|KG*_shy4f@A+o@-(HEa|5AGv<^UWi`oC4j|E=X#+By~E{-ZGR-!qK; z@6Sg6pT+q9HjMpm&c^;%W~2WzRQ$gSW&nuj{{izojQ#JzEPzAt|8pEJ7U`~ z2Vhg|!q~c4HAent#Ky!1VJ827Ui_aBYdj_|vTw%!vAmxs|7Y?4@MklJ9E3}?YB&fj z;D@1l8jgqiLUkw1ft{h+2rJ>0P}RdZ@LH%=z(w$WsH)*|_;aYvfIHwT6}I`-pg;aT zEAj$VH0M7h{(o4^zez4YbK?Ix(2o`o@qcBJCNY0oWC$o*T1SfcH_ZzuY7+BrnhWr+ z#Q%>J^FLCYziBQ&+a_`Trt$x#G5^Ej{7vHjM~eCXSL6I5wy$mV&jR{raS{Jl7H<;U zx0+@PG|f6_k{$71j{pDv67yHq!A$vo9Oy^uh`a!0ohC8=c#}+q|MvL*kz)S;&N#n_ z?JKKcw!94Jqjf}_Usc=WnLC0-AD0%74?EB{x7mQ0HXN6 zv_lmC&)R`R@&Etj@qf$%V9;n}lQ9g;oHKyO{!IR6bTHzl#9y|V{Uv6qFQoV28}N(y zTs{+j6&T9<@vgircQ|36;rs8q*$eEK>;ZNcyPjRnHn6kt?e{WvGMmIsU;|ipR=_-b z_x&&QL;5EDEq#hUOmD)B_zN*Jz816LPseQdQJ4>(iyrZQ``i5CCH7o zv0GbTTKlXwF%$j?e9QedYm;@Mwa%)xO05~z81#dS@%?r;-WdNB-)?^z-)z4-eqH>M zxF4^JFNjZ!4~_T1H`-gqP4f%Pe18t#W&er!Bl8OLJad(~*qmdYXb#7>*t?poO^WZZ zzY%*O_IT`Pv0G!;#4d=f#W&Yaij9s9jP;1Mi#bMv@v-ra@e;nX{-ANEajkKYajsEg zlpC{+@x~DEQ}12xWt=(TZtptp60gqly#+X5!bESV*T+j?&WDLJC47*4KDi@#Z}O(( zmH3{)>g1B-8Of>15y=6`BAgk4xnH{b++FSy_~ykX_d=Wxp&H+hnBk6b2f4*=f$KVr zI1j?_oTr?-oGs2pMlYNLp~fk9W;^4ZAx5}zf0pZHbck;IP^HzY1g)F=8S z&PpswoQCrr9G~c%z?2pKSNI_A2Tc|ogXSg*~Dd+^ZgsKCi;O0;jLI=1hRPCVn6KRX((X8$#6@^5ObW zE>x``54MCV7h1u!p~94Z*eu>i5HIAwvyDbh__G06Fc>z4dzy_7TobAom~eHd z0Ag@es0ij@U8xmm00LKp>MPWjhw32t3NADMK$6+apTmB_AfMsqUlQ*5X=87=I8+~# zPvN3a?I$0@4?^`3*$>|j)raIGxG+?IA|Jv9p?aVE2{wl6J@P)BAF93NJ=hScJ!CJO z7piy29#|i$H_1CtAF4OVn-GL*H+cj6P`yfaLx%gRwKC61-RtDH*xqx(J$E&p0N)GM z3uG6p3)Qd53$Qj+&y!!n*`a!tJP&I^^$d9yR)^{-@(irfs__K!6s**WJVt&CXN77< z;|y34sz=EVs0-C2U)sf6~NMP)9vIgSQ4t+$nCH=R5z2`pdwT^k(*&rsBR=T!NO48KyHKuq1r-jfOM#? zC0n38RQOc{%0jh?Y=-%vx`u3m(okJRu7P=>x}01Eb3=6*xg5?6)g|OII3rXSkxO7s zsD3~$g408F0r>%(7OD;80ytH_EEJFpaEcb|$X37%*X)}wY~@zK3|Gl3Rsv?YO0HW0 znBgjUc^zPetK{nCfElimtEvGrTqQ3l17Uq-#Z;J!&UOs8GsqCk|#|B%y5-Fc{^Z+tK^B305e<_PX^3zmAri- zV1}#YaZ>>^TqTcr3^2o0^5`*u8LpB?jRwqcl{|73V1}#Y$3_BXxJn*A4lu)2@(IHM zGh8JP+YOlEDtYKIDA7D*C}4`KEI)P#^wm80Sm>j9&|v7TdEg-ErFp8Kcgvwy+y2>4*y3mI|G8J#^yxQ!=`Q>B-XFPdLY2(w^eCp>Am;kGAWr{RR^ad^T=glY#oVDL~q0FUEKdUzX& z5fvVR9R>~6!|+IxPk#s=HnQKQvU_fahm3f*=Yz1_FhlhKJZQv1bw7Td5vrfU{RV{U z9{8z2LUlLX1C61&3+{%7P~8D{!Pm}qITqsE5Ex7dx82mR5xxx7jSbasFjP0djqtZn zT?bddUqiJ8u7kgX>RQ+WUxaEiTnnFv>W8oyJ`2?*_#qq!)itmQJ`L5?a1DGCsw?3t z*ng%-zkunsPSKZJX(Mg3~1R>4~MeeyMR++oot>@=Q*m%}|* z!YcS(sOlQ-gqK2919kA*P%VcVcrjGfupD-UstT&%g-}&O75pYti=h&J9jXdg49|yZ zAymL~p-RI-cs5k!kcMA{suarMnNZDxQg}L4b73Al6{fWQ)nu3gkA`XzOopF_Y647xM?y6g zCcwj?8VNJup-_#2v9LWlTW)Fnwco+`%V=tHeHjgc65_CjQVmACN+;boX zD7S^`Xc!1T4b=cR8t%pXe;AI_ui?A!`@Fr;{D0B>f6@GZ(foh-x)6)z{{uUk{}1m3 zE1LgLIEz*^|DSk^Fq;2Q+ziqDf7#zpqWS-#`TsTUCG_qO2N{~Nru-g1m&pYBcb#&|=#zFt=^ z-%EN%Hk!RV`J3e9IOqTE$;~+9eeSR<(v5T`8m9Xr}-&3qyO=|1oIi%aL&HQIsW#rm)SEooBuuRN9;;= zK3j{~4D)f8zwzui)`xXst(Z*@;ynIuVJ^dybUVG9UQaKh>**@G1n2OdPDf)t!%;Yc ze+wG3zpy{R`TKukKW^V=-;VS9U1VqMI(wmg8qVE6+#X zy4Skdy4tz`Ga8mzW!7wK0?ygr*XnHLSrljW`zZc){H6F)nANZ~egj^j=r6KBWP$$$ z7GU*aDsBV6VL#i*>&0B$BE*C#IJ{oW#w`R!w0OOkk6Q=~Xz_Y6BexJ3&*JrBPHrJE zoW<+KtlUCiG>g}ZdAWtaU>2_zQ*#S}u`FIMCg&CcLs`7uRsthgyk1PuEw*4Fi`U1M zz&IAK7i%MkEf~h)^>2lJJ{h!qdS8Vh0z z2CR56PYH}y@j$GzAhuw*iU(q)1tBn6#RIX{f)E(2;=vpxFjmC_vEBl1!JyeWJP<1` zh}|$!#RIYCf)E&};(=IoK?saf@j$G*AdbN>6%WM93t|gKsdyk3Uyx!I55)2dLST%F z2VwyRAuvS61F-~y5E!B2!E_}sK*fV;N??472UC^6@DvYDR05+@JeZ;c2B&y1SqY3y z@nDh?7@Fe2L?tjX#e)e-U|@;|&HkQ5I_DuEFx9*9L6#1;%l@nE6c5D04RXsU9*CtIgupNq55(dP zLSPh%2V(gK6e0-41F?XE*n%-A9*89zguoCK55yu4LSO`n2PH~i0E!3wl)x-P9`scL zlL&dxM+wX!_HxMR|1m< zdC*M>%pK%GkrJ3X$OEy!gE(u<9OQvm<3R{a9OOY~B`|N02V$WIu?5oxd5}^9vj%w} z7JCp|FlmqnVz~z)FlUelV!;O?FlCSj?UlfcK^}-jAH)_+803Li_5p=>&5#FT;Rmq= z(*=1TmVOWdvjuq|7Jm=|lLdJomVXcea|L-I7Jv`}Qw4b-mViJZG6i`c7J(1~69su7 zmVpog^8|U29er?uFiaEVff{~9mLLz*@Ix?3kOyk`A($h`12y~*OcCUP8h!|72=YJ; zKLis5d7y?Lg86|wVCq9LJ&*@#_#v1b$OAR}5KIo_ff|0u93+-?qYtJ-VYVOh#o&XC$$r!q zV-E(uRw9a#2XO_ijwQw&#JUyK7o!g1@;d5^F$b}FIrYVegIHBfeKFo3E-9tH7;O+2 zFQL8|YY;0IQ(ufUh>I$yFUA?fg^Q>!Mj6Bf3#l*07{v4f>WdKuu{=$EF}@&{l~Z4g zE{OBXs4vDA#L_D2i;)HK%=y$8;|k)|GpR3z6~r0qs4oT;#Oe1^UkoXT_fG#m-YvDz literal 0 HcmV?d00001 diff --git a/asr_service.py b/app/asr/asr_service.py similarity index 100% rename from asr_service.py rename to app/asr/asr_service.py diff --git a/main.py b/app/asr/core.py similarity index 97% rename from main.py rename to app/asr/core.py index b8c567e..5842404 100644 --- a/main.py +++ b/app/asr/core.py @@ -20,9 +20,9 @@ from pathlib import Path from typing import Dict, List, Optional # 路径配置 -BASE_DIR = Path(__file__).parent.absolute() -TEMP_DIR = BASE_DIR / "temp" -OUTPUT_DIR = BASE_DIR / "output" +BASE_DIR = os.getcwd() +TEMP_DIR = Path(BASE_DIR) / "temp" +OUTPUT_DIR = Path(BASE_DIR) / "output/SpeechRecognition" @@ -155,7 +155,7 @@ def process_batch_diarization(video_paths, max_workers=1): # 加载说话人分离模型(只加载一次) print("加载说话人分离模型...") - from diarization_service import DiarizationService + from app.asr.diarization_service import DiarizationService diar_service = DiarizationService( embedding_model="eres2netv2", @@ -206,7 +206,7 @@ def process_batch_diarization(video_paths, max_workers=1): diar_result = { "segments": [seg.to_dict() for seg in diar_segments] } - from map_speaker import save_json + from app.asr.map_speaker import save_json save_json(temp_diar_path, diar_result) video_process_time = time.time() - video_start_time @@ -269,7 +269,7 @@ def process_batch_asr(video_paths, diar_results, max_workers=1): # 加载 ASR 模型(只加载一次) print("加载 ASR 模型...") - from asr_service import ASRService + from app.asr.asr_service import ASRService asr_service = ASRService( model_name="paraformer-zh", @@ -342,7 +342,7 @@ def process_batch_asr(video_paths, diar_results, max_workers=1): continue # 2. 加载说话人分离结果 - from map_speaker import load_json + from app.asr.map_speaker import load_json diar_result = load_json(diar_path) # 3. 执行 ASR 识别(不使用 ASR 自带的说话人) @@ -498,7 +498,7 @@ def main(path: str): """主函数""" import torch # 拼接根目录 - path = os.path.join(os.path.dirname(__file__), "input", path) + path = os.path.join(BASE_DIR, "input", path) print(f"开始处理路径:{path}") print("\n" + "=" * 60) @@ -593,10 +593,10 @@ def main(path: str): print(f"输出目录:{OUTPUT_DIR}") -if __name__ == "__main__": +# if __name__ == "__main__": # 视频文件夹路径(全局变量) # PATH = r"D:\Userfile\Projects\AnzezxianxHazardInspectAI\Code\audio2\input\宁波北仑区鼎邦杰西雅服饰有限公司" # PATH = r"D:\Userfile\Projects\AnzezxianxHazardInspectAI\Code\audio2\input\temp" # PATH = r"D:\Userfile\Projects\AnzezxianxHazardInspectAI\Code\audio2\input\temp\VID_20251104_085655_024.AVI" - PATH = r"VID_20251104_085655_024.AVI" - main(PATH) + # PATH = r"VID_20251104_085655_024.AVI" + # main(PATH) diff --git a/diarization_service.py b/app/asr/diarization_service.py similarity index 100% rename from diarization_service.py rename to app/asr/diarization_service.py diff --git a/map_speaker.py b/app/asr/map_speaker.py similarity index 100% rename from map_speaker.py rename to app/asr/map_speaker.py diff --git a/app/asr/routes.py b/app/asr/routes.py new file mode 100644 index 0000000..66885f1 --- /dev/null +++ b/app/asr/routes.py @@ -0,0 +1,70 @@ +from flask import request, jsonify +from app.utils import make_response +from pathlib import Path +import uuid +import threading +import signal +import os +import json + +# 全局状态(模块内部) +task_running = {} +GLOBAL_ASR_SERVICE = None +GLOBAL_DIAR_SERVICE = None +ASR_MODEL_LOADED = False +DIAR_MODEL_LOADED = False +ASR_MODEL_LOCK = threading.Lock() +DIAR_MODEL_LOCK = threading.Lock() + +def register_asr_routes(app): + """【模块唯一入口】只注册路由,不创建app""" + + @app.route('/api/recognize', methods=['GET']) + def recognize(): + task_id = str(uuid.uuid4()) + task_running[task_id] = True + try: + path = request.args.get('path', '') + if not path: + return jsonify(make_response(status="error", message="缺少path参数")), 400 + + server_dir = Path(__file__).parent.parent.parent.absolute() + os.chdir(server_dir) + + def timeout_handler(signum, frame): + task_running[task_id] = False + raise TimeoutError(f"超时 {app.config['TASK_TIMEOUT']}s") + + use_alarm = False + try: + signal.signal(signal.SIGALRM, timeout_handler) # pyright: ignore + signal.alarm(app.config['TASK_TIMEOUT']) # pyright: ignore + use_alarm = True + except: + pass + + try: + from app.asr.core import main + main(path) + finally: + if use_alarm: + signal.alarm(0) # pyright: ignore + task_running[task_id] = False + + return jsonify(make_response(message="推理完成", data={"task_id": task_id, "path": path})) + + except Exception as e: + task_running[task_id] = False + return jsonify(make_response(status="error", message=str(e))), 500 + + @app.route('/api/result', methods=['GET']) + def result(): + path = request.args.get('path', '') + if not path: + return jsonify(make_response(status="error", message="缺少path")), 400 + result_file = Path(app.config['OUTPUT_DIR']) / "SpeechRecognition" / f"{Path(path).stem}_result.json" + if not result_file.exists(): + return jsonify(make_response(status="error", message="结果不存在")), 404 + with open(result_file, 'r', encoding='utf-8') as f: + data = json.load(f) + return jsonify(make_response(data=data)) \ No newline at end of file diff --git a/app/settings.py b/app/settings.py new file mode 100644 index 0000000..d4d6133 --- /dev/null +++ b/app/settings.py @@ -0,0 +1,10 @@ +# 全局配置 +config = { + 'MAX_CONTENT_LENGTH': 500 * 1024 * 1024, + 'SEND_FILE_MAX_AGE_DEFAULT': 300, + 'INPUT_DIR': 'input', + 'OUTPUT_DIR': 'output', + 'TASK_TIMEOUT': 600, + 'API_PORT': 5000, + 'VIDEO_PORT': 8086, +} \ No newline at end of file diff --git a/lib/convert.py b/app/transcode/core.py similarity index 60% rename from lib/convert.py rename to app/transcode/core.py index e0f7bfd..1d801cf 100644 --- a/lib/convert.py +++ b/app/transcode/core.py @@ -2,7 +2,7 @@ import subprocess from pathlib import Path -def convert_to_h264(input_path: str, output_path: str = None) -> str: +def convert_to_h264(input_root: Path, vid_full_name: str, output_root: Path) -> str: """ 将视频文件转码为 H.264 编码格式 @@ -17,17 +17,14 @@ def convert_to_h264(input_path: str, output_path: str = None) -> str: FileNotFoundError: 输入文件不存在 subprocess.CalledProcessError: FFmpeg 转码失败 """ - input_path_all = Path("input/" + input_path).resolve() - output_path_all = Path("vid_h264/" + input_path).resolve() + input_path_all = Path(input_root,vid_full_name).resolve() + output_vid_full_name = f"{Path(vid_full_name).stem}_h264.mp4" + output_path_all = Path(output_root, output_vid_full_name).resolve() if not input_path_all.exists(): - raise FileNotFoundError(f"输入文件不存在:{input_path}") + raise FileNotFoundError(f"输入文件不存在:{input_path_all}") print(f"开始转码:{input_path_all}") - # if output_path is None: - output_path = str(output_path_all.with_name(f"{input_path_all.stem}_h264.mp4")) - # else: - # output_path = Path(output_path).resolve() cmd = [ "ffmpeg", @@ -36,7 +33,7 @@ def convert_to_h264(input_path: str, output_path: str = None) -> str: "-c:a", "aac", "-b:a", "128k", "-y", - str(output_path) + str(output_path_all) ] result = subprocess.run(cmd, capture_output=True, text=True) @@ -48,9 +45,5 @@ def convert_to_h264(input_path: str, output_path: str = None) -> str: result.stdout, result.stderr ) - print(f"转码完成:{output_path}") - return str(output_path) - -if __name__ == "__main__": - convert_to_h264("VID_20251104_085655_024.AVI", "vid_h264/VID_20251104_085655_024_h264.mp4") - # convert_to_h264("input/short.AVI", "vid_h264/short_h264.mp4") + print(f"转码完成:{output_path_all}") + return str(output_path_all) \ No newline at end of file diff --git a/app/transcode/routes.py b/app/transcode/routes.py new file mode 100644 index 0000000..e837c9d --- /dev/null +++ b/app/transcode/routes.py @@ -0,0 +1,40 @@ +from flask import request, jsonify +from app.utils import make_response +from app.transcode.core import convert_to_h264 +from pathlib import Path + +def register_transcode_routes(app): + """【转码模块入口】只注册路由""" + + # 初始化转码输出目录 + transcode_output_dir = Path(app.config['OUTPUT_DIR'], "vid_h264") + transcode_output_dir.mkdir(parents=True, exist_ok=True) + + @app.route('/api/convert', methods=['GET']) + def convert(): + # 获取参数 + path = request.args.get('path', '') + + # 1. 路径为空时,返回错误响应 + if not path: + return jsonify(make_response(status="error", message="缺少 path")), 400 + + # 2. 路径不为空,转码视频文件 + try: + out = convert_to_h264(app.config['INPUT_DIR'], path, transcode_output_dir) + return jsonify(make_response(message="转码完成", data={"path": out})) + except FileNotFoundError as e: + return jsonify(make_response(status="error", message=str(e))), 404 + except Exception as e: + return jsonify(make_response(status="error", message=f"转码失败:{str(e)}")), 500 + + @app.route('/api/getVidUrl', methods=['GET']) + def getVidUrl(): + path = Path(request.args.get('path', '')) + if not path: + return jsonify(make_response(status="error", message="缺少path")), 400 + vid_path = Path(transcode_output_dir, f"{path.stem}_h264.mp4") + if not vid_path.exists(): + return jsonify(make_response(status="error", message="视频不存在")), 404 + url = f"http://localhost:8086/{path.stem}_h264.mp4" + return jsonify(make_response(data={"url": url})) \ No newline at end of file diff --git a/app/utils.py b/app/utils.py new file mode 100644 index 0000000..258d836 --- /dev/null +++ b/app/utils.py @@ -0,0 +1,30 @@ +from datetime import datetime, timezone +from flask import request, jsonify + +def make_response(status="success", data=None, errors=None, message=None, extra=None): + """统一响应格式""" + response = { + "status": status, + "data": data or {}, + "errors": errors or [], + "message": message or ("操作成功" if status == "success" else "操作失败"), + "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + } + if extra: + response.update(extra) + return response + +def register_cors(app): + """统一注册 CORS""" + @app.after_request + def after_request(response): + origin = request.headers.get('Origin', '*') + response.headers['Access-Control-Allow-Origin'] = origin + response.headers['Access-Control-Allow-Credentials'] = 'true' + response.headers['Access-Control-Allow-Headers'] = 'Content-Type,Authorization' + response.headers['Access-Control-Allow-Methods'] = 'GET,PUT,POST,DELETE,OPTIONS' + return response + + @app.route('/config', methods=['OPTIONS']) + def config_options(): + return '', 200 \ No newline at end of file diff --git a/lib/caddy_windows_amd64.exe b/lib/caddy/caddy_windows_amd64.exe similarity index 100% rename from lib/caddy_windows_amd64.exe rename to lib/caddy/caddy_windows_amd64.exe diff --git a/lib/caddy/run.py b/lib/caddy/run.py new file mode 100644 index 0000000..0f3ede5 --- /dev/null +++ b/lib/caddy/run.py @@ -0,0 +1,8 @@ +import os +import subprocess + +def run_caddy(port: int = 8086): + caddy_dir = os.path.join(os.getcwd(), "output", "vid_h264") + caddy_exe = os.path.join(os.path.dirname(__file__), "caddy_windows_amd64.exe") + + subprocess.Popen([caddy_exe, "file-server", "--listen", f":{port}", "--browse"], cwd=caddy_dir, shell=True) diff --git a/server.py b/server.py index b03ceea..5a9cf62 100644 --- a/server.py +++ b/server.py @@ -1,311 +1,46 @@ -""" -Web API Server for ASR and Speaker Diarization -提供语音识别和说话人分离的 REST API 服务 -""" +from flask import Flask +from app.settings import config +from app.utils import register_cors, make_response +from app.asr.routes import register_asr_routes +from app.transcode.routes import register_transcode_routes +from waitress import serve -import os -import subprocess -import sys -import gc -import json -import shutil -import signal -import threading -from pathlib import Path -from flask import Flask, request, jsonify -from werkzeug.utils import secure_filename -import uuid -from datetime import datetime, timezone +from lib.caddy.run import run_caddy -from lib.convert import convert_to_h264 +def create_app(): + """【统一创建 Flask 服务】唯一创建 app 的地方""" + app = Flask(__name__) + # 加载全局配置 + app.config.update(config) -def make_response(status="success", data=None, errors=None, message=None, extra=None): - """ - 统一 API 响应格式 + # 注册全局工具 + register_cors(app) - Args: - status: 状态 ("success" 或 "error") - data: 返回的数据 - errors: 错误列表 - message: 消息 - extra: 其他额外字段 + # 注册各个业务模块路由(模块只提供入口) + register_asr_routes(app) + register_transcode_routes(app) - Returns: - 统一格式的 JSON 响应 - """ - response = { - "status": status, - "data": data if data is not None else {}, - "errors": errors if errors is not None else [], - "message": message or ("操作成功" if status == "success" else "操作失败"), - "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - } + # 根路由 + @app.route('/') + def index(): + return make_response(message="ASR & Speaker Diarization API 服务运行中") - if extra: - response.update(extra) - - return response - -app = Flask(__name__) -app.config['MAX_CONTENT_LENGTH'] = 500 * 1024 * 1024 -app.config['UPLOAD_FOLDER'] = 'uploads' -app.config['RESULT_FOLDER'] = 'results' -# 增加请求超时时间(秒),支持长时间运行的任务 -app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 300 # 5 -OUTPUT_DIR = Path('output') - -# 手动添加 CORS 响应头 -@app.after_request -def after_request(response): - # 获取请求的 Origin - origin = request.headers.get('Origin', '*') - - # 设置允许的来源(不使用通配符) - response.headers.add('Access-Control-Allow-Origin', origin) - response.headers.add('Access-Control-Allow-Credentials', 'true') - response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization') - response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS') - return response - - -# 处理 OPTIONS 预检请求 -@app.route('/config', methods=['OPTIONS']) -def config_options(): - """处理 /config 接口的 OPTIONS 预检请求""" - return '', 200 - -os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) -os.makedirs(app.config['RESULT_FOLDER'], exist_ok=True) - -GLOBAL_ASR_SERVICE = None -GLOBAL_DIAR_SERVICE = None -ASR_MODEL_LOADED = False -DIAR_MODEL_LOADED = False -ASR_MODEL_LOCK = threading.Lock() -DIAR_MODEL_LOCK = threading.Lock() - -# 全局变量用于控制任务执行 -task_running = {} -task_timeout = 600 # 10 分钟超时(秒) - - -@app.route('/api/recognize', methods=['GET']) -def recognize(): - """文件推理 - 调用 main.py 的 main 函数""" - task_id = str(uuid.uuid4()) - task_running[task_id] = True - - try: - - # 从请求参数获取路径(只接受文件名或 input 目录下的相对路径) - path = request.args.get('path', '') - - if not path: - return jsonify(make_response( - status="error", - message="请提供文件路径", - errors=["缺少必要参数:path"] - )), 400 - - print(f"\n{'='*60}") - print(f"API 收到请求:path={path}, task_id={task_id}") - print(f"{'='*60}") - print(f"开始调用 main 函数...") - - # 切换到 server.py 所在目录 - server_dir = Path(__file__).parent.absolute() - os.chdir(server_dir) - - print(f"当前工作目录:{os.getcwd()}") - - # 设置超时处理 - def timeout_handler(signum, frame): - task_running[task_id] = False - raise TimeoutError(f"任务执行超时 ({task_timeout}秒)") - - # 注册信号处理器(仅在 Unix/Linux/Mac 有效,Windows 下会被忽略) - try: - signal.signal(signal.SIGALRM, timeout_handler) - signal.alarm(task_timeout) - use_alarm = True - except (AttributeError, ValueError): - # Windows 不支持 SIGALRM - use_alarm = False - print("注意:Windows 系统,使用超时检测线程") - - try: - # 调用 main.py 的 main 函数 - from main import main - main(path) - - # 取消超时 - if use_alarm: - signal.alarm(0) - - print(f"main 函数执行完成") - - return jsonify(make_response( - status="success", - message="文件推理完成", - data={"path": path, "task_id": task_id} - )), 200 - - except TimeoutError as e: - print(f"任务超时:{e}") - return jsonify(make_response( - status="error", - message=str(e), - errors=[str(e)] - )), 504 # Gateway Timeout - - finally: - # 清理信号处理器 - if use_alarm: - signal.alarm(0) - task_running[task_id] = False - - except Exception as e: - import traceback - traceback.print_exc() - task_running[task_id] = False - return jsonify(make_response( - status="error", - message=str(e), - errors=[str(e)] - )), 500 - - -@app.route('/api/result', methods=['GET']) -def result(): - """获取文件推理结果""" - try: - # 从请求参数获取路径 - path = request.args.get('path', '') - - if not path: - return jsonify(make_response( - status="error", - message="请提供文件路径", - errors=["缺少必要参数:path"] - )), 400 - - # 读取结果 - print(Path(path).stem) - result_file = OUTPUT_DIR / f"{Path(path).stem}_result.json" - if result_file.exists(): - with open(result_file, 'r', encoding='utf-8') as f: - result_data = json.load(f) - return jsonify(make_response( - status="success", - message="获取成功", - data=result_data - )), 200 - else: - return jsonify(make_response( - status="error", - message="处理完成但未找到结果文件", - errors=["结果文件不存在"] - )), 404 - except Exception as e: - import traceback - traceback.print_exc() - return jsonify(make_response( - status="error", - message=str(e), - errors=[str(e)] - )), 500 - -@app.route('/api/convert', methods=['GET']) -def convert(): - """视频文件转码""" - try: - # 从请求参数获取路径 - path = request.args.get('path', '') - - if not path: - return jsonify(make_response( - status="error", - message="请提供文件路径", - errors=["缺少必要参数:path"] - )), 400 - - # 转码视频文件 - output_path = convert_to_h264(path) - - return jsonify(make_response( - status="success", - message="视频文件转码完成", - data={"path": output_path} - )), 200 - except Exception as e: - import traceback - traceback.print_exc() - return jsonify(make_response( - status="error", - message=str(e), - errors=[str(e)] - )), 500 - -@app.route('/api/getVidUrl', methods=['GET']) -def getVidUrl(): - """获取视频文件URL""" - try: - # 从请求参数获取路径 - path = request.args.get('path', '') - - if not path: - return jsonify(make_response( - status="error", - message="请提供文件路径", - errors=["缺少必要参数:path"] - )), 400 - - - # 检查视频文件是否存在 - if not Path(f"vid_h264/{Path(path).stem}_h264.mp4").exists(): - return jsonify(make_response( - status="error", - message="视频文件不存在", - errors=["视频文件不存在"] - )), 404 - - # 生成视频文件URL - url = f"http://localhost:8086/{Path(path).stem}_h264.mp4" - print(url) - - return jsonify(make_response( - status="success", - message="获取成功", - data={"url": url} - )), 200 - except Exception as e: - import traceback - traceback.print_exc() - return jsonify(make_response( - status="error", - message=str(e), - errors=[str(e)] - )), 500 + return app if __name__ == '__main__': + api_port = config['API_PORT'] + video_port = config['VIDEO_PORT'] + + app = create_app() + print("=" * 60) - print(" ASR & Speaker Diarization API Server") - print("=" * 60) - print("\nAPI 接口:") - print(" GET /api/recognize - 文件推理") - print(" GET /api/result - 获取文件推理结果") - print(" GET /api/convert - 转码视频文件") - print(" GET /api/getVidUrl - 获取视频文件URL") - print("\n" + "=" * 60) - print("启动服务:http://localhost:5000") - print("使用 Waitress WSGI 服务器(无超时限制)") + print(f" API Server (模块化架构) 监听端口:{api_port}") + print(f" 视频文件服务监听端口:{video_port}") print("=" * 60) - # 启动 Caddy 服务(后台运行) - caddy_dir = os.path.join(os.path.dirname(__file__), "vid_h264") - caddy_exe = os.path.join(os.path.dirname(__file__), "lib", "caddy_windows_amd64.exe") - subprocess.Popen([caddy_exe, "file-server", "--listen", ":8086", "--browse"], cwd=caddy_dir, shell=True) + # 启动视频文件服务 + run_caddy(port=video_port) - from waitress import serve - serve(app, host='0.0.0.0', port=5000, threads=4, connection_limit=100) + # 启动服务 + serve(app, host='0.0.0.0', port=api_port, threads=4, connection_limit=100)